1import matplotlib.pyplot as plt
 2import numpy as np
 3import pandas as pd
 4import seaborn as sns
 5from src.code.simulation.galton_watson import SimulateurGaltonWatson
 6from src.code.simulation.probability_distributions import (
 7    create_distributions,
 8    create_distributions_df,
 9)
10from src.code.simulation.utils import test_loi_exponentielle
11from src.code.simulation.yaglom import simulation_yaglom_toutes_lois
12from src.config.config import seed
13from src.utils.utils import init_notebook
1init_notebook(seed)

Théorème de Yaglom#

Mise au propre Yaglom#

1distributions = create_distributions()
1alpha = 0.05
2nb_processus = 30_000
3taille_pas = 2
4nb_repetitions = 100
1p_value_dict, ks_dict, lambda_dict = simulation_yaglom_toutes_lois(
2    distributions,
3    nb_processus,
4    taille_pas,
5    nb_repetitions,
6)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[5], line 1
----> 1 p_value_dict, ks_dict, lambda_dict = simulation_yaglom_toutes_lois(
      2     distributions,
      3     nb_processus,
      4     taille_pas,
      5     nb_repetitions,
      6 )

File ~/work/epidemic-simulation/epidemic-simulation/src/code/simulation/yaglom.py:85, in simulation_yaglom_toutes_lois(distributions, nb_processus, taille_pas, nb_repetitions)
     82 lambda_dict: dict[str, list[float]] = {}
     84 for nom_loi, loi in distributions.items():
---> 85     p_value, ks, lambda_ = simulation_yaglom(
     86         loi,
     87         nb_processus=nb_processus,
     88         taille_pas=taille_pas,
     89         nb_repetitions=nb_repetitions,
     90     )
     92     p_value_dict[nom_loi] = p_value
     93     ks_dict[nom_loi] = ks

File ~/work/epidemic-simulation/epidemic-simulation/src/code/simulation/yaglom.py:48, in simulation_yaglom(loi, nb_processus, taille_pas, nb_repetitions, taille_echantillon, affichage)
     45 sim = SimulateurGaltonWatson(loi, nb_processus=nb_processus)
     47 for i in range(nb_repetitions):
---> 48     sim.simule(nb_epoques=taille_pas)
     49     sim.retire_processus_eteints()
     51     zn_sur_n = sim.get_zn_sur_n()

File ~/work/epidemic-simulation/epidemic-simulation/src/code/simulation/galton_watson.py:207, in SimulateurGaltonWatson.simule(self, nb_epoques)
    205 def simule(self, nb_epoques: int = 10) -> None:
    206     for i in range(self.nb_processus):
--> 207         self.simulations[i].simule(nb_epoques)

File ~/work/epidemic-simulation/epidemic-simulation/src/code/simulation/galton_watson.py:60, in GaltonWatson.simule(self, nb_epoques)
     57 epoque_actuelle = 0
     59 while epoque_actuelle < nb_epoques and self.nb_descendants > 0:
---> 60     liste_descendants = self.loi.rvs(size=self.nb_descendants)
     61     self.liste_descendants.append(liste_descendants)
     63     self.nb_descendants = np.sum(liste_descendants)

File ~/.cache/pypoetry/virtualenvs/epidemic-simulation-XpKh5KBY-py3.10/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:491, in rv_frozen.rvs(self, size, random_state)
    489 kwds = self.kwds.copy()
    490 kwds.update({'size': size, 'random_state': random_state})
--> 491 return self.dist.rvs(*self.args, **kwds)

File ~/.cache/pypoetry/virtualenvs/epidemic-simulation-XpKh5KBY-py3.10/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:3343, in rv_discrete.rvs(self, *args, **kwargs)
   3314 """Random variates of given type.
   3315 
   3316 Parameters
   (...)
   3340 
   3341 """
   3342 kwargs['discrete'] = True
-> 3343 return super().rvs(*args, **kwargs)

File ~/.cache/pypoetry/virtualenvs/epidemic-simulation-XpKh5KBY-py3.10/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:1049, in rv_generic.rvs(self, *args, **kwds)
   1047 args, loc, scale, size = self._parse_args_rvs(*args, **kwds)
   1048 cond = logical_and(self._argcheck(*args), (scale >= 0))
-> 1049 if not np.all(cond):
   1050     message = ("Domain error in arguments. The `scale` parameter must "
   1051                "be positive for all distributions, and many "
   1052                "distributions have restrictions on shape parameters. "
   1053                f"Please see the `scipy.stats.{self.name}` "
   1054                "documentation for details.")
   1055     raise ValueError(message)

KeyboardInterrupt: 
1p_value_df = pd.DataFrame(p_value_dict)
2ks_df = pd.DataFrame(ks_dict)
3lambda_df = pd.DataFrame(lambda_dict)
1p_value_df.to_csv("data/results/p-value-evolution.csv", index=False)
2ks_df.to_csv("data/results/ks-evolution.csv", index=False)
3lambda_df.to_csv("data/results/lambda-evolution.csv", index=False)
1p_value_df = pd.read_csv("data/results/p-value-evolution.csv")
2ks_df = pd.read_csv("data/results/ks-evolution.csv")
3lambda_df = pd.read_csv("data/results/lambda-evolution.csv")
1distributions_df = create_distributions_df()
1lambda_array = np.array(distributions_df["Lambda théorique loi exponentielle Z_n / n"])

Graphiques#

p-values#

Toutes les lois de reproduction#

 1periode_lissage = 15
 2
 3p_value_df.rolling(window=periode_lissage).mean().plot(figsize=(15, 10))
 4plt.title(
 5    "Évolution des p-values (test KS loi exponentielle) pour $Z_n / n$ par loi"
 6    f"\n(lissé avec une fenêtre de taille {periode_lissage})",
 7)
 8plt.plot(
 9    list(range(periode_lissage, nb_repetitions)),
10    [alpha for _ in range(periode_lissage, nb_repetitions)],
11    label=r"$\alpha = 0.05$",
12    color="black",
13    linestyle="dashed",
14)
15plt.legend()
16plt.savefig("assets/img/p-values-evolution-all-laws.png")
17plt.savefig("assets/img/p-values-evolution-all-laws.svg")
../_images/a34a850f7d093090a5b998b878cb2d8c1bd8259a847fb870e5ebf70365e59851.png

Lois de reproduction groupées#

 1groupe1 = [
 2    "Poisson (λ = 1)",
 3    "Uniforme {0, 1, 2}",
 4    "Hyper-Géométrique (N=10, n=2, p=0.5)",
 5    "Hyper-Géométrique (N=100, n=10, p=0.1)",
 6]
 7
 8groupe2 = [
 9    "Binomiale (n=2, p=1/2)",
10    "Binomiale (n=10, p=1/10)",
11    "Binomiale (n=50, p=1/50)",
12    "Négative Binomiale (n=1, p=0.5)",
13    "Négative Binomiale (n=10, p=10/11)",
14]
15
16groupe3 = [
17    "Bêta-Binomiale (n=2, α=3, β=3)",
18    "Bêta-Binomiale (n=5, α=5, β=20)",
19    "Bêta-Binomiale (n=3, α=5, β=10)",
20    "Bêta-Binomiale (n=10, α=5, β=45)",
21]
 1periode_lissage = 15
 2
 3p_value_df["Hyper-Géométrique (N=100, n=10, p=0.1)"].rolling(
 4    window=periode_lissage,
 5).mean().plot(figsize=(10, 10))
 6
 7plt.title(
 8    "Évolution de la p-value (test KS loi exponentielle) pour $Z_n / n$"
 9    f"\n(lissé avec une fenêtre de taille {periode_lissage})",
10)
11plt.plot(
12    list(range(periode_lissage, nb_repetitions)),
13    [alpha for _ in range(periode_lissage, nb_repetitions)],
14    label=r"$\alpha = 0.05$",
15    color="black",
16    linestyle="dashed",
17)
18plt.legend()
19plt.savefig("data/plots/evolution/p-value-hypergeom.png")
../_images/cc7ce444990481408126e38e3dc8740e99cf52a81de633560e1c8f3fc46aeee6.png
 1periode_lissage = 15
 2
 3p_value_df[groupe1].rolling(window=periode_lissage).mean().plot(figsize=(10, 10))
 4
 5plt.title(
 6    "Évolution des p-values (test KS loi exponentielle) pour $Z_n / n$ par loi"
 7    f"\n(lissé avec une fenêtre de taille {periode_lissage})",
 8)
 9plt.plot(
10    list(range(periode_lissage, nb_repetitions)),
11    [alpha for _ in range(periode_lissage, nb_repetitions)],
12    label=r"$\alpha = 0.05$",
13    color="black",
14    linestyle="dashed",
15)
16plt.legend()
17plt.savefig("data/plots/evolution/p-value-group1.png")
../_images/27fca94931b13d55b4fc7eab6c576d869d5a72eb849ece13228039103950df24.png
 1periode_lissage = 15
 2
 3p_value_df[groupe2].rolling(window=periode_lissage).mean().plot(figsize=(10, 10))
 4
 5plt.title(
 6    "Évolution des p-values (test KS loi exponentielle) pour $Z_n / n$ par loi"
 7    f"\n(lissé avec une fenêtre de taille {periode_lissage})",
 8)
 9plt.plot(
10    list(range(periode_lissage, nb_repetitions)),
11    [alpha for _ in range(periode_lissage, nb_repetitions)],
12    label=r"$\alpha = 0.05$",
13    color="black",
14    linestyle="dashed",
15)
16plt.legend()
17
18plt.savefig("data/plots/evolution/p-value-group2.png")
../_images/11c9da51efbf6c1a2bae976397c7ead9e4072e8d1ee5aff1f44c185334b356f8.png
 1periode_lissage = 15
 2
 3p_value_df[groupe3].rolling(window=periode_lissage).mean().plot(figsize=(10, 10))
 4
 5plt.title(
 6    "Évolution des p-values (test KS loi exponentielle) pour $Z_n / n$ par loi"
 7    f"\n(lissé avec une fenêtre de taille {periode_lissage})",
 8)
 9plt.plot(
10    list(range(periode_lissage, nb_repetitions)),
11    [alpha for _ in range(periode_lissage, nb_repetitions)],
12    label=r"$\alpha = 0.05$",
13    color="black",
14    linestyle="dashed",
15)
16plt.legend()
17
18plt.savefig("data/plots/evolution/p-value-group3.png")
../_images/d154b241707108b638a0fb42833521fafac520ce4efc8fc26451dfb79c5c2994.png

Statistique KS#

 1for group_name, group in (
 2    ("group1", groupe1),
 3    ("group2", groupe2),
 4    ("group3", groupe3),
 5):
 6    ks_df[group].plot(figsize=(10, 10))
 7
 8    plt.title(
 9        "Évolution de la valeur du test KS (loi exponentielle) pour $Z_n / n$ par loi",
10    )
11
12    plt.savefig(f"data/plots/evolution/ks-{group_name}.png")
../_images/6cfad71dcbe2ddc42a8e3682bd0f1333c49b2d9e179dbd877f51ecd659c3f6cb.png ../_images/66c4bd906e666e43de262e1fb541eef37af99955bc8d64328789bee008482c3f.png ../_images/4b566d6b830848fedbf4fe4f89c1c8ac04e0e65308f57e423dc8d01e3c8c4aeb.png
1ks_df.plot(figsize=(15, 10))
2
3plt.title(
4    "Évolution de la valeur du test KS (loi exponentielle) pour $Z_n / n$ par loi",
5)
6
7plt.savefig("data/plots/evolution/ks-all.png")
../_images/39306e352da5b4c174f78b9f941797d456a9f8c4e14daa2438f90c122c451068.png

Distance au lambda théorique#

 1for group_name, group in (
 2    ("group1", groupe1),
 3    ("group2", groupe2),
 4    ("group3", groupe3),
 5):
 6    index = [list(lambda_df.columns).index(group[i]) for i in range(len(group))]
 7
 8    abs(lambda_df[group] - lambda_array[index]).plot(figsize=(10, 10))
 9
10    plt.title(
11        r"Évolution de la distance au lambda théorique $|\lambda_{théorique} - \lambda_{\hat{EMV}}|$ pour $Z_n / n$ par loi",
12    )
13
14    plt.savefig(f"data/plots/evolution/lambda-distance-{group_name}.png")
../_images/d8b52b189167c69cbc11d6ae65454bbb2aa8b7df487272f7dfc54d17580ce9c6.png ../_images/f8fba886d392a905b9a5f1161310d1ded18dedc7bf9ca371f1e5825edfc771bb.png ../_images/5be292c72c98763c3bdc44026a5a2203a4d59256069dbe0e4e1fbfc4a96f7e97.png
1abs(lambda_df - lambda_array).plot(figsize=(15, 10))
2
3plt.title(
4    r"Évolution de la distance au lambda théorique $|\lambda_{théorique} - \lambda_{\hat{EMV}}|$ pour $Z_n / n$ par loi",
5)
6
7plt.savefig("data/plots/evolution/lambda-distance-all.png")
../_images/1922fe2bf8b4b6634b75f8f87e825d42e20258e1f160ef15b98d24f7e3377af7.png

Évolution de la distribution#

 1sim = SimulateurGaltonWatson(
 2    distributions["Hyper-Géométrique (N=10, n=2, p=0.5)"],
 3    nb_processus=10_000,
 4)
 5taille_pas = 5
 6nb_repetitions = 10
 7taille_echantillon = 100
 8
 9for i in range(nb_repetitions):
10    sim.simule(nb_epoques=taille_pas)
11    sim.retire_processus_eteints()
12
13    zn_sur_n = sim.get_zn_sur_n()
14    zn_sur_n_sample = zn_sur_n[taille_echantillon:]
15
16    plt.title(
17        r"Distribution des $\dfrac{Z_n}{n}$ à l'époque $n = "
18        + str((i + 1) * taille_pas)
19        + "$",
20    )
21    sns.histplot(zn_sur_n_sample, stat="density")
22    plt.savefig(f"data/results/distribution/hyper-geo-{(i + 1) * taille_pas}.png")
23    plt.show()
24
25    p_value, statistique_ks = test_loi_exponentielle(zn_sur_n)
26    print(f"{p_value = }")
../_images/e0db692f4f112fe8afb8bfe74e3c74bd8ca27622370255ad4d706bd2d395bf9c.png
p_value = 0.0
../_images/d4e8b7911fcff794ac04576547563d09d301514f5a1f0908b229abaf993f5253.png
p_value = 9.622011434558595e-152
../_images/1e648ee7780e4e43fe2592f07422ff71e4f4e71612704db5fb667f77d2d96e96.png
p_value = 2.827435930932723e-70
../_images/87de093003958201c87109c1bac671fb702beffa6b5de78b6853186e1be5af0c.png
p_value = 2.8210546622397323e-39
../_images/7eb9d4fb5fad55aeb1f434cfa7b27699983a532d550ed4085598b2f4da51c586.png
p_value = 3.2216117932465623e-24
../_images/495e2a955a7f80b19e125f1e896dee7ffb988ad7b1abdee8d64ab95ce1ffdb13.png
p_value = 7.887052073352042e-20
../_images/c960ba78ebd9b129ace1a7e199aaeef11921970bdc4d44f31f3198b4aaaffe1e.png
p_value = 3.461167774975927e-12
../_images/87a11985c9e049ebccd9781efc32ab561036e9beb5ab741a9e4b5f7a901af296.png
p_value = 1.7994986428624622e-12
../_images/2c9f6e24b7bbf5ed4c634378e04317b71f67f4a3d69a6cb6a012aebff3118592.png
p_value = 7.08994488987903e-08
../_images/603aa0f205aa0413bfc0160beeaa825f974f092c4f7da73c29d1c056f3cf9c40.png
p_value = 3.728747180083857e-08

Graphique transparent (pour le style)#

 1periode_lissage = 15
 2p_value_df.rolling(window=periode_lissage).mean().plot(figsize=(15, 10), legend=None)
 3
 4plt.plot(
 5    list(range(periode_lissage, nb_repetitions)),
 6    [alpha for _ in range(periode_lissage, nb_repetitions)],
 7    label=r"$\alpha = 0.05$",
 8    color="black",
 9    linestyle="dashed",
10)
11plt.grid(False)
12
13# Remove both x and y axes
14plt.axis("off")
15# Hide major ticks and labels on the x-axis
16plt.tick_params(axis="x", which="major", length=0, labelbottom=False)
17
18# Hide major ticks and labels on the y-axis
19plt.tick_params(axis="y", which="major", length=0, labelleft=False)
20# plt.savefig("assets/img/p-values-evolution-all-laws.svg")
21
22plt.savefig("assets/img/p-values-evolution-all-laws-transparent.png", transparent=True)
../_images/9fe53747005dbe64c4d2bc086e3deb29b15ac488ba6d130d783a691ff95b5dbb.png