From f04b6c6863807cfb910289a499dd7bc9577de0c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gi=C3=B9=20Marcer?= Date: Thu, 11 Jun 2020 18:30:30 +0200 Subject: [PATCH] sections: fix and add a lot of things --- slides/sections/0.md | 8 +++ slides/sections/4.md | 3 +- slides/sections/5.md | 35 +++++------- slides/sections/6.md | 126 +++++++++++++++++++++++++++---------------- slides/sections/7.md | 31 +++++++---- slides/sections/8.md | 77 ++++++++++++++++---------- 6 files changed, 174 insertions(+), 106 deletions(-) diff --git a/slides/sections/0.md b/slides/sections/0.md index 3d26702..2a4e652 100644 --- a/slides/sections/0.md +++ b/slides/sections/0.md @@ -28,6 +28,14 @@ references: container-title: Journal of Econometrics issued: year: 2015 + - type: book + id: silver86 + author: + family: Silverman + given: Bernard W. + title: Density Estimation for Statistics and Data Analysis + issued: + year: 1986 header-includes: | ```{=latex} diff --git a/slides/sections/4.md b/slides/sections/4.md index 874d5eb..b2c6f31 100644 --- a/slides/sections/4.md +++ b/slides/sections/4.md @@ -177,7 +177,6 @@ $$ exp(-\x*\x) + exp(-(\x - 1.4)*(\x - 1.4)) + exp(-(\x - 0.8)*(\x - 0.8)) + 0.1}); - \end{tikzpicture} \end{center} \setbeamercovered{transparent} @@ -187,7 +186,7 @@ $$ ## Sample FWHM -Silverman's rule of thumb: +Silverman's rule of thumb [@silver86]: $$ \varepsilon = 0.88 \, S_N diff --git a/slides/sections/5.md b/slides/sections/5.md index d14c55e..c1e232a 100644 --- a/slides/sections/5.md +++ b/slides/sections/5.md @@ -3,14 +3,10 @@ ## KS -Quantify distance between expected and observed CDF - -. . . +Quantify distance between expected and observed CDF. KS statistic: :::: {.columns} ::: {.column width=50% .c} - KS statistic: - $$ D_N = \text{sup}_x |F_N(x) - F(x)| $$ @@ -22,23 +18,22 @@ Quantify distance between expected and observed CDF - sort points in ascending order - number of points preceding the point normalized by $N$ + . . . + ::: ::: {.column width=50%} \setbeamercovered{} \begin{center} \begin{tikzpicture} - % axes - \draw [thick, ->] (-2.5,0) -- (0,0) -- (0,4.5); - \draw [thick, ->] (0,0) -- (2.5,0); % empiric \draw [cyclamen, fill=cyclamen!20!white] (-2.5,0) rectangle (-1.5,0.5); \draw [cyclamen, fill=cyclamen!20!white] (-1.5,0) rectangle (-0.9,1); \draw [cyclamen, fill=cyclamen!20!white] (-0.9,0) rectangle (-0.6,1.5); - \draw [cyclamen, fill=cyclamen!20!white] (-0.6,0) rectangle ( 0.2,2); - \draw [cyclamen, fill=cyclamen!20!white] ( 0.2,0) rectangle ( 0.5,2.5); - \draw [cyclamen, fill=cyclamen!20!white] ( 0.5,0) rectangle ( 0.8,3); - \draw [cyclamen, fill=cyclamen!20!white] ( 0.8,0) rectangle ( 1.6,3.5); + \draw [cyclamen, fill=cyclamen!20!white] (-0.6,0) rectangle ( 0.5,2); + \draw [cyclamen, fill=cyclamen!20!white] ( 0.5,0) rectangle ( 0.7,2.5); + \draw [cyclamen, fill=cyclamen!20!white] ( 0.7,0) rectangle ( 1.2,3); + \draw [cyclamen, fill=cyclamen!20!white] ( 1.2,0) rectangle ( 1.6,3.5); \draw [cyclamen, fill=cyclamen!20!white] ( 1.6,0) rectangle ( 2.3,4); \draw [cyclamen, fill=cyclamen!20!white] ( 2.3,0) rectangle ( 2.5,4.5); % points @@ -46,9 +41,9 @@ Quantify distance between expected and observed CDF \draw [blue!50!black, fill=blue] (-1.6,-0.1) rectangle (-1.4,0.1); %-1.5 \draw [blue!50!black, fill=blue] (-1,-0.1) rectangle (-0.8,0.1); %-0.9 \draw [blue!50!black, fill=blue] (-0.7,-0.1) rectangle (-0.5,0.1); %-0.6 - \draw [blue!50!black, fill=blue] (0.1,-0.1) rectangle (0.3,0.1); % 0.2 \draw [blue!50!black, fill=blue] (0.4,-0.1) rectangle (0.6,0.1); % 0.5 - \draw [blue!50!black, fill=blue] (0.7,-0.1) rectangle (0.9,0.1); % 0.8 + \draw [blue!50!black, fill=blue] (0.6,-0.1) rectangle (0.8,0.1); % 0.7 + \draw [blue!50!black, fill=blue] (1.1,-0.1) rectangle (1.3,0.1); % 1.2 \draw [blue!50!black, fill=blue] (1.5,-0.1) rectangle (1.7,0.1); % 1.6 \draw [blue!50!black, fill=blue] (2.2,-0.1) rectangle (2.4,0.1); % 2.3 % expected @@ -56,7 +51,7 @@ Quantify distance between expected and observed CDF \draw[domain=-2.5:2.5, yscale=5, smooth, variable=\x, blue, very thick] plot ({\x}, {((atan(\x)*pi/180) + pi/2)/pi}); \pause - \draw [very thick, cyclamen] (0.8,3.6) -- (0.8,4.05); + \draw [very thick, cyclamen, <->] (0.5,2.5) -- (0.5,3.25); \end{tikzpicture} \end{center} \setbeamercovered{transparent} @@ -70,19 +65,17 @@ $H_0$: points sampled according to $F(x)$ . . . -If $H_0$ is true: +If $H_0$ is true: $\sqrt{N}D_N \xrightarrow{N \rightarrow + \infty} K$ -- $\sqrt{N}D_N \xrightarrow{N \rightarrow + \infty} K$ - -Kolmogorov distribution with CDF: +$K$ Kolmogorov variable with CDF: $$ - P(K \leqslant K_0) = 1 - p = \frac{\sqrt{2 \pi}}{K_0} + P(K \leqslant K_0) = \frac{\sqrt{2 \pi}}{K_0} \sum_{j = 1}^{+ \infty} e^{-(2j - 1)^2 \pi^2 / 8 K_0^2} $$ . . . -a $p$-value can be computed +A $p$-value can be computed - At 95% confidence level, $H_0$ cannot be disproved if $p > 0.05$ diff --git a/slides/sections/6.md b/slides/sections/6.md index c045b40..49a9b28 100644 --- a/slides/sections/6.md +++ b/slides/sections/6.md @@ -1,53 +1,75 @@ # Trapani test -## A pathological distribution - -Because of its fat tail: -\begin{align*} - \mu_1 &= \text{E}\left[|x|\right] \longrightarrow + \infty \\ - \mu_2 &= \text{E}\left[|x|^2\right] \longrightarrow + \infty -\end{align*} - -. . . - -No closed form for parameters $\thus$ numerical estimations - -. . . - -For a Moyal PDF: -\begin{align*} - E_M[x] &= \mu + \sigma [ \gamma + \ln(2) ] \\ - V_M[x] &= \frac{\pi^2 \sigma^2}{2} -\end{align*} - - -## Infinite moments - -- Check whether a moment is finite or infinite -\begin{align*} - \text{infinite} &\thus Landau \\ - \text{finite} &\thus Moyal -\end{align*} - -. . . - - -# Trapani test - - ## Trapani test ::: incremental - Random variable $\left\{ x_i \right\}$ sampled from a distribution $f$ - - Sample moments according to $f$ moments + - Sample moments estimate as $f$ moments - $H_0$: $\mu_k \longrightarrow + \infty$ - - Statistic with 1 dof chi-squared distribution + - Statistic with 1 dof $\chi^2$ distribution + - $p$-value $\hence$ reject or accept $H_0$ ::: +## Infinite moments + +- Generate a sample $L$ from a Landau PDF +- Generate a sample $M$ from a Moyal PDF + +. . . + +\vspace{20pt} + +:::: {.columns} +::: {.column width=50% .c} + For the Landau PDF: + \begin{align*} + \mu_1 &= \text{E}\left[|x|\right] = + \infty \\ + \mu_2 &= \text{E}\left[|x|^2\right] = + \infty + \end{align*} +::: + +::: {.column width=50%} + . . . + + For the Moyal PDF: + \begin{align*} + \mu_1 &= \text{E}\left[|x|\right] < + \infty \\ + \mu_2 &= \text{E}\left[|x|^2\right] < + \infty + \end{align*} +::: +:::: + + +## Infinite moments + +- Previous tests: points sampled from Landau PDF? + +. . . + +- Trapani test: check whether a moment is finite or infinite +\begin{align*} + \text{infinite} &\thus \text{Landau} \\ + \text{finite} &\thus \text{not Landau} +\end{align*} + +. . . + +- Compatibility test with $\mu_k = + \infty$ + +. . . + +- If points were sampled from a Cauchy distribution... + + +## Trapani test + +![](images/cauchy-pdf.pdf) + + ## Trapani test - Start with $\left\{ x_i \right\}^N$ and compute $\mu_k$ as: @@ -61,14 +83,19 @@ For a Moyal PDF: $\left\{ a_j \right\}^r$ as: $$ a_j = \sqrt{e^{\mu_k}} \cdot \xi_j - \thus G'\left( 0, \sqrt{e^{\mu_k}} \right) + \thus G\left( 0, \sqrt{e^{\mu_k}} \right) $$ . . . -The greater $\mu^k$, the 'larger' $G'$ - -- if $\mu_k \longrightarrow + \infty \thus a_j$ distributed uniformly +The greater $\mu^k$, the 'larger' $G\left( 0, \sqrt{e^{\mu_k}} \right)$ +$$ +\begin{cases} + \mu_k \longrightarrow + \infty \\ + r \longrightarrow + \infty +\end{cases} +\thus a_j \text{ distributed uniformly} +$$ ## Trapani test @@ -115,9 +142,10 @@ The greater $\mu^k$, the 'larger' $G'$ . . . -If $a_j$ uniformly distributed and $N \rightarrow + \infty$: +If $a_j$ uniformly distributed: -- $\zeta_j (u)$ Bernoulli PDF with $P(\zeta_j (u) = 1) = \frac{1}{2}$ +- $\zeta_j (u)$ Bernoulli PDF with $P\left( \zeta_j (u) = 1 \right) = \frac{1}{2}$ + $\hence E[\zeta_j]_j = \frac{1}{2} \quad \wedge \quad V[\zeta_j]_j = \frac{1}{4}$ ## Trapani test @@ -130,7 +158,7 @@ $$ . . . -If $a_j$ uniformly distributed and $N \rightarrow + \infty$, for the CLT: +If $a_j$ uniformly distributed, for the CLT: $$ \sum_j \zeta_j (u) \hence G \left( \frac{r}{2}, \frac{r}{4} \right) @@ -151,15 +179,15 @@ $$ According to L. Trapani [@trapani15]: - $r = o(N) \hence r = N^{0.75}$ -- $\underbar{u} = 1 \quad \wedge \quad \bar{u} = 1$ -- $\psi(u) = \chi_{[\underbar{u}, \bar{u}]}$ +- $\underbar{u} = -1 \quad \wedge \quad \bar{u} = 1$ +- $\psi(u) = \frac{1}{\bar{u} - \underbar{u}} \, \chi_{[\underbar{u}, \bar{u}]}$ . . . $\mu_k$ must be scale invariant for $k > 1$: $$ - \tilde{\mu_k} = \frac{\mu_k}{ \left( \mu_{\phi} \right)^{k/\phi} } + \mu_k^* = \frac{\mu_k}{ \left( \mu_{\phi} \right)^{k/\phi} } \with \phi \in (0, k) $$ @@ -167,7 +195,9 @@ $$ ## Trapani test If $\mu_k \ne + \infty \hence \left\{ a_j \right\}$ are not uniformly distributed + \vspace{20pt} + Rewriting: $$ \vartheta (u) = \frac{2}{\sqrt{r}} @@ -178,4 +208,6 @@ $$ \vspace{20pt} +. . . + Residues become very large $\hence$ $p$-values decreases. diff --git a/slides/sections/7.md b/slides/sections/7.md index dcb0107..4063d65 100644 --- a/slides/sections/7.md +++ b/slides/sections/7.md @@ -60,22 +60,35 @@ This leads to more different medians: \end{align*} -## Compatibility test +## Landau Sample -Comparing results: +Sample N random points following $L(x)$ $$ - p = 1 - \text{erf} \left( \frac{t}{\sqrt{2}} \right)\ \with - t = \frac{|x\ex - x\ob|}{\sqrt{\sigma\ex^2 + \sigma\ob^2}} + L(x) = \frac{1}{\pi} \int \limits_{0}^{+ \infty} + dt \, e^{-t \ln(t) -xt} \sin (\pi t) $$ -- $x\ex$ and $x\ob$ are the expected and observed values -- $\sigma\ex$ and $\sigma\ob$ are their absolute errors - . . . -At 95% confidence level, the values are compatible if: +gsl_ran_Landau(gsl_rng) + + +## Moyal sample + +Sample N random points following $M_{\mu \sigma}(x)$ $$ - p > 0.05 + M_{\mu \sigma}(x) = \frac{1}{\sqrt{2 \pi} \sigma} \exp + \left[ - \frac{1}{2} \left( + \frac{x - \mu}{\sigma} + + e^{-\frac{x - \mu}{\sigma}} \right) \right] $$ + +. . . + +reverse sampling + +- sampling $y$ uniformly in [0, 1] $\hence x = Q_M(y)$ + + diff --git a/slides/sections/8.md b/slides/sections/8.md index fbe10db..ac73cba 100644 --- a/slides/sections/8.md +++ b/slides/sections/8.md @@ -1,18 +1,62 @@ -# Landau sample +# Results -## Sample +## Compatibility test -Sample N = 50'000 random points following $L(x)$ +Comparing sample properties: $$ - L(x) = \frac{1}{\pi} \int \limits_{0}^{+ \infty} - dt \, e^{-t \ln(t) -xt} \sin (\pi t) + p = 1 - \text{erf} \left( \frac{t}{\sqrt{2}} \right)\ \with + t = \frac{|x\ex - x\ob|}{\sqrt{\sigma\ex^2 + \sigma\ob^2}} $$ +- $x\ex$ and $x\ob$ are the expected and observed values +- $\sigma\ex$ and $\sigma\ob$ are their absolute errors + . . . -gsl_ran_Landau(gsl_rng) +At 95% confidence level, the values are compatible if: +$$ + p > 0.05 +$$ + + +## Compatibility test + +\setbeamercovered{} +\begin{center} +\begin{tikzpicture} + %notes + \draw [very thick, gray] (0,0) -- (0,3); + \draw [very thick, gray] (-1.45,1.5) -- (1.45,1.5); + \draw [very thick, gray] (-1.35,1.3) -- (-1.55,1.7); + \draw [very thick, gray] ( 1.35,1.3) -- ( 1.55,1.7); + \node [below] at (0,-0.7) {$x\ex$}; + \node [above right] at (1.5,1.5) {$2 \, \sqrt{\sigma\ex^2 + \sigma\ob^2}$}; + % axes + \draw [very thick, <->] (-5,4) -- (-5,0) -- (5,0); + % Gaussian + \draw [domain=-5:5, smooth, variable=\x, cyclamen, very thick] + plot ({\x}, {3*exp(-(\x*\x/3))}); + \pause + % area + \fill [domain=2:5, smooth, variable=\x, cyclamen!20!white, very thick] + (2,0) -- plot ({\x}, {3*exp(-(\x*\x/3))}) -- (5,0) -- cycle; + \fill [domain=-5:-2, smooth, variable=\x, cyclamen!20!white, very thick] + (-5,0) -- plot ({\x}, {3*exp(-(\x*\x/3))}) -- (-2,0) -- cycle; + % axes + \draw [very thick, <->] (-5,4) -- (-5,0) -- (5,0); + % Gaussian + \draw [domain=-5:5, smooth, variable=\x, cyclamen, very thick] + plot ({\x}, {3*exp(-(\x*\x/3))}); + %notes + \draw [thick, cyclamen] (-2,0) -- (-2,0.8); + \draw [thick, cyclamen] ( 2,0) -- ( 2,0.8); + \node at (2,-0.7) {$x\ob$}; +\end{tikzpicture} +\end{center} +\setbeamercovered{transparent} + ## Compatibility results: @@ -71,27 +115,6 @@ FWHM: :::: -# Moyal sample - - -## Sample - -Sample N = 50'000 random points following $M_{\mu \sigma}(x)$ - -$$ - M_{\mu \sigma}(x) = \frac{1}{\sqrt{2 \pi} \sigma} \exp - \left[ - \frac{1}{2} \left( - \frac{x - \mu}{\sigma} - + e^{-\frac{x - \mu}{\sigma}} \right) \right] -$$ - -. . . - -reverse sampling - -- sampling $y$ uniformly in [0, 1] $\hence x = Q_M(y)$ - - ## Compatibility results: Median: