From d1325a61c7fd203e31b5187de908977dcedd3a1d Mon Sep 17 00:00:00 2001 From: rnhmjoj Date: Tue, 9 Jun 2020 11:46:51 +0000 Subject: [PATCH] ex-1: add infinite moments test --- ex-1/main.c | 44 ++++++++++++------ ex-1/tests.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++- ex-1/tests.h | 12 +++++ 3 files changed, 167 insertions(+), 16 deletions(-) diff --git a/ex-1/main.c b/ex-1/main.c index 7d3c00e..157a60c 100644 --- a/ex-1/main.c +++ b/ex-1/main.c @@ -18,10 +18,10 @@ int main(int argc, char** argv) { size_t samples = 50000; char* distr = "lan"; - double m_params [2] = {-0.22278298, 1.1191486}; + double m_params[2] = {-0.22278298, 1.1191486}; /* Process CLI arguments */ - for (size_t i = 1; i < argc; i++) { + for (size_t i = 1; i < (size_t)argc; i++) { if (!strcmp(argv[i], "-n")) samples = atol(argv[++i]); else if (!strcmp(argv[i], "-m")) distr = argv[++i]; else { @@ -48,23 +48,20 @@ int main(int argc, char** argv) { */ fprintf(stderr, "# Sampling\n"); fprintf(stderr, "generating %ld points... ", samples); - double x; - /* Sample points from the Landau distribution using the GSL Landau generator or - * from the Moyal distribution using inverse sampling. + + /* Sample points from the Landau distribution + * using the GSL Landau generator or from the + * Moyal distribution using inverse sampling. */ for(size_t i=0; i < samples; i++){ - if (!strcmp(distr, "lan")){ - x = gsl_ran_landau(r); - sample[i] = x; - } - if (!strcmp(distr, "moy")){ - x = gsl_rng_uniform(r); - sample[i] = moyal_qdf(x, m_params); - } + if (!strcmp(distr, "lan")) + sample[i] = gsl_ran_landau(r); + if (!strcmp(distr, "moy")) + sample[i] = moyal_qdf(gsl_rng_uniform(r), m_params); } fprintf(stderr, "done\n"); - // sort the sample: needed for HSM and ks tests + // sort the sample: needed for HSM and KS tests qsort(sample, samples, sizeof(double), &cmp_double); @@ -81,7 +78,6 @@ int main(int argc, char** argv) { if (d > D) D = d; } - double beta = kolmogorov_cdf(D, samples); // print the results @@ -174,6 +170,24 @@ int main(int argc, char** argv) { fprintf(stderr, "p=%.3f\n", p); + /* Infinite moments test + * + * Apply the Trapani test for infinite moment + * to the mean and variance. + * + * Use r=n^0.75 points in both cases and rescale + * the variance by the ψ=2 moment. The mean is not + * rescaled. + */ + fprintf(stderr, "\n\n# Infinite moments test\n"); + double p_mean = trapani(r, 0.75, 0, 1, sample, samples); + double p_var = trapani(r, 0.75, 1, 2, sample, samples); + + // print the results + fprintf(stderr, "\n## Results\n"); + fprintf(stderr, "mean: p=%.3f\n", p_mean); + fprintf(stderr, "variance: p=%.3f\n", p_var); + // clean up and exit gsl_rng_free(r); free(sample); diff --git a/ex-1/tests.c b/ex-1/tests.c index a8c8a94..0e4b24e 100644 --- a/ex-1/tests.c +++ b/ex-1/tests.c @@ -5,11 +5,14 @@ #include #include +#include +#include #include #include #include #include "landau.h" +#include "bootstrap.h" /* Kolmogorov distribution CDF @@ -46,7 +49,6 @@ double kolmogorov_cdf(double D, int n) { } - /* This is a high-order function (ie a function that operates * on functions) in disguise. It takes a function f and produces * a function that computes -f. In lambda calculus it would be @@ -219,3 +221,126 @@ double numeric_fwhm(double min, double max, gsl_root_fsolver_free(s); return x_upp - x_low; } + + +/* The absolute moment of given `order` + * of the normal distribution N(0, 1). + * + * μ_k = E[|X|^k] = 2^(k/2) Γ((k+1)/2) / √π + */ +double normal_moment(double order) { + return pow(2, order/2) * gsl_sf_gamma((order+1)/2) / sqrt(M_PI); +} + + +/* Trapani infinite moment test + * + * Tests whether the `order`-th moment of the + * distribution of `sample` is infinite. + * The test generates an artificial sample of + * r points, where r = n^θ and returns a p-value. + */ +double trapani(gsl_rng *rng, double theta, double psi, + int order, double *sample, size_t n) { + fprintf(stderr, "\n## Trapani test (k = %d)\n", order); + + /* Compute the moment μ_k and rescale it + * to make it scale-free. This is done by + * also computing the moment μ_ψ and taking + * + * m_k = μ_k / μ_ψ^(k/ψ) + * + * where: + * 1. ψ ∈ (0, k) + * 2. k is the `order` of the moment + */ + + double moment = 0; + double moment_psi = 0; + + for (size_t i = 0; i < n; i++) { + moment += pow(fabs(sample[i]), order); + moment_psi += pow(fabs(sample[i]), psi); + } + moment /= n; + moment_psi /= n; + fprintf(stderr, "%d-th sample moment: %.2g\n", order, moment); + + /* Rescale the moment + * Note: k/ψ = 2 becase ψ=k/2 + */ + if (psi > 0) { + fprintf(stderr, "%g-th sample moment: %.2g\n", psi, moment_psi); + moment /= pow(moment_psi, order/psi); + + /* Rescale further by the standard gaussian + * moments. + * + * m_k' = m_k (μ_ψ)^(k/ψ) / μ_k + */ + moment *= pow(normal_moment(psi), order/psi) / normal_moment(order); + + fprintf(stderr, "rescaled moment: %.2g\n", moment); + } + + /* Generate r points from a standard + * normal distribution and multiply + * them by √(e^m_k). + */ + size_t r = round(pow(n, theta)); + + fprintf(stderr, "n: %ld\n", n); + fprintf(stderr, "r: %ld\n", r); + double factor = sqrt(exp(moment)); + double *noise = calloc(r, sizeof(double)); + for (size_t i = 0; i < r; i++) { + noise[i] = factor * gsl_ran_gaussian(rng, 1); // σ = 1 + } + + /* Compute the intel of the squared + * sum of the residues: + * + * Θ = ∫[-L, L] sum(u) φ(u)du + * + * where sum(u) = 2/√r (Σ_j=0 ^r θ(u - noise[j]) - 1/2) + * + * L = 1 and φ(u)=1/2L is a good enough choice. + */ + + // sort the noise sample + qsort(noise, r, sizeof(double), &cmp_double); + + double integral = 0; + double L = 1; + for (size_t i = 0; i < r; i++) { + if (noise[i] < -L) { + if (i+1 < r && noise[i+1] < -L) + factor = 0; + else if (i+1 < r && noise[i+1] < L) + factor = noise[i+1] + L; + else + factor = 2*L; + } + else if (noise[i] < L) { + if (i+1 < r && noise[i+1] < L) + factor = noise[i+1] - noise[i]; + else + factor = L - noise[i]; + } + else + factor = 0; + integral += (i + 1)*((i + 1)/(double)r - 1) * factor; + } + integral = r + 2/L * integral; + fprintf(stderr, "Θ=%g\n", integral); + + // free memory + free(noise); + + /* Assuming Θ is distributed as a χ² with + * 1 DoF, compute the the p-value: + * + * p = P(χ² > Θ ; ν=1) + */ + return gsl_cdf_chisq_Q(integral, 1); +} diff --git a/ex-1/tests.h b/ex-1/tests.h index 6320040..20ed513 100644 --- a/ex-1/tests.h +++ b/ex-1/tests.h @@ -3,6 +3,7 @@ * from a Landau distribution. */ +#include #include #pragma once @@ -38,3 +39,14 @@ double numeric_mode(double min, double max, double numeric_fwhm(double min, double max, gsl_function *pdf, int err); + + +/* Trapani infinite moment test + * + * Tests whether the `order`-th moment of the + * distribution of `sample` is infinite. + * The test generates an artificial sample of + * r points, where r = n^θ and returns a p-value. + */ +double trapani(gsl_rng *rng, double theta, double psi, + int order, double *sample, size_t n);