2020-03-24 02:17:39 +01:00
|
|
|
|
/* This file contains functions to perform
|
|
|
|
|
* statistical tests on the points sampled
|
|
|
|
|
* from a Landau distribution.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <gsl/gsl_randist.h>
|
2020-06-09 13:46:51 +02:00
|
|
|
|
#include <gsl/gsl_sf_gamma.h>
|
|
|
|
|
#include <gsl/gsl_cdf.h>
|
2020-03-24 02:17:39 +01:00
|
|
|
|
#include <gsl/gsl_min.h>
|
2020-04-06 20:43:00 +02:00
|
|
|
|
#include <gsl/gsl_roots.h>
|
2020-03-24 02:17:39 +01:00
|
|
|
|
#include <gsl/gsl_sum.h>
|
|
|
|
|
|
|
|
|
|
#include "landau.h"
|
2020-06-09 13:46:51 +02:00
|
|
|
|
#include "bootstrap.h"
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Kolmogorov distribution CDF
|
|
|
|
|
* for sample size n and statistic D
|
|
|
|
|
*/
|
|
|
|
|
double kolmogorov_cdf(double D, int n) {
|
|
|
|
|
double x = sqrt(n) * D;
|
|
|
|
|
|
|
|
|
|
// trick to reduce estimate error
|
|
|
|
|
x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
|
|
|
|
|
|
|
|
|
|
// calculate the first n_terms of the series
|
|
|
|
|
// Σ_k=1 exp(-(2k - 1)²π²/8x²)
|
|
|
|
|
size_t n_terms = 30;
|
|
|
|
|
double *terms = calloc(n_terms, sizeof(double));
|
|
|
|
|
for (size_t k=0; k<n_terms; k++) {
|
|
|
|
|
terms[k] = exp(-pow((2*(double)(k + 1) - 1)*M_PI/x, 2) / 8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// do a transform to accelerate the convergence
|
|
|
|
|
double sum, abserr;
|
|
|
|
|
gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
|
|
|
|
|
gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
|
|
|
|
|
|
|
|
|
|
fprintf(stderr, "\n## Kolmogorov CDF\n");
|
2020-04-08 12:43:14 +02:00
|
|
|
|
fprintf(stderr, "accel sum: %g\n", sum);
|
|
|
|
|
fprintf(stderr, "plain sum: %g\n", s->sum_plain);
|
|
|
|
|
fprintf(stderr, "err: %g\n", abserr);
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
|
|
|
|
gsl_sum_levin_utrunc_free(s);
|
|
|
|
|
free(terms);
|
|
|
|
|
|
|
|
|
|
return sqrt(2*M_PI)/x * sum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-04-06 16:57:00 +02:00
|
|
|
|
/* This is a high-order function (ie a function that operates
|
|
|
|
|
* on functions) in disguise. It takes a function f and produces
|
|
|
|
|
* a function that computes -f. In lambda calculus it would be
|
|
|
|
|
* the map λf.λx -f(x).
|
|
|
|
|
*
|
|
|
|
|
* Since there is no notion of lambda functions in C (the
|
|
|
|
|
* standard one, at least) we use a trick involving the
|
|
|
|
|
* gsl_function struct: `negate_func(x, fp)` takes a point `x`
|
|
|
|
|
* and a gsl_function `fp` as the usual void pointer `params`.
|
|
|
|
|
* It then calls the function `fp.function` with `x` and their
|
|
|
|
|
* `fp.params` and return the negated result.
|
|
|
|
|
*
|
|
|
|
|
* So, given a `gsl_function f` its negated function is
|
|
|
|
|
* contructed as follows:
|
|
|
|
|
*
|
|
|
|
|
* gsl_function nf;
|
|
|
|
|
* nf.function = &negate_func;
|
|
|
|
|
* nf.params = &f;
|
2020-03-24 02:17:39 +01:00
|
|
|
|
*/
|
2020-04-06 16:57:00 +02:00
|
|
|
|
double negate_func(double x, void * fp) {
|
|
|
|
|
gsl_function f = *((gsl_function*) fp);
|
|
|
|
|
return -f.function(x, f.params);
|
2020-03-24 02:17:39 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Numerically computes the mode of a Landau
|
|
|
|
|
* distribution by maximising the derivative.
|
2020-04-08 12:33:11 +02:00
|
|
|
|
* The `min,max` parameters are the initial search
|
2020-03-24 02:17:39 +01:00
|
|
|
|
* interval for the optimisation.
|
2020-04-08 12:33:11 +02:00
|
|
|
|
*
|
|
|
|
|
* If `err` is true print the estimate error.
|
2020-03-24 02:17:39 +01:00
|
|
|
|
*/
|
2020-04-06 16:57:00 +02:00
|
|
|
|
double numeric_mode(double min, double max,
|
2020-04-08 12:33:11 +02:00
|
|
|
|
gsl_function *pdf,
|
|
|
|
|
int err) {
|
2020-04-06 16:57:00 +02:00
|
|
|
|
|
|
|
|
|
/* Negate the PDF to maximise it by
|
|
|
|
|
* using a GSL minimisation method.
|
|
|
|
|
* (There usually are no maximisation methods)
|
|
|
|
|
*/
|
|
|
|
|
gsl_function npdf;
|
|
|
|
|
npdf.function = &negate_func;
|
|
|
|
|
npdf.params = pdf;
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
|
|
|
|
// initialize minimization
|
2020-04-08 12:33:11 +02:00
|
|
|
|
double x = 0;
|
2020-03-24 02:17:39 +01:00
|
|
|
|
int max_iter = 100;
|
|
|
|
|
double prec = 1e-7;
|
2020-06-08 13:44:31 +02:00
|
|
|
|
int status = GSL_CONTINUE;
|
2020-04-08 12:33:11 +02:00
|
|
|
|
const gsl_min_fminimizer_type *T = gsl_min_fminimizer_brent;
|
|
|
|
|
gsl_min_fminimizer *s = gsl_min_fminimizer_alloc(T);
|
|
|
|
|
gsl_min_fminimizer_set(s, &npdf, x, min, max);
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
2020-04-06 16:57:00 +02:00
|
|
|
|
// minimisation
|
2020-04-27 23:22:24 +02:00
|
|
|
|
for (int iter = 0; status == GSL_CONTINUE && iter < max_iter; iter++)
|
|
|
|
|
{
|
|
|
|
|
status = gsl_min_fminimizer_iterate(s);
|
|
|
|
|
x = gsl_min_fminimizer_x_minimum(s);
|
|
|
|
|
min = gsl_min_fminimizer_x_lower(s);
|
|
|
|
|
max = gsl_min_fminimizer_x_upper(s);
|
|
|
|
|
status = gsl_min_test_interval(min, max, 0, prec);
|
|
|
|
|
}
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
2020-04-06 16:57:00 +02:00
|
|
|
|
/* The error is simply given by the width of
|
|
|
|
|
* the final interval containing the solution
|
|
|
|
|
*/
|
2020-04-08 12:33:11 +02:00
|
|
|
|
if (err)
|
|
|
|
|
fprintf(stderr, "mode error: %.3g\n", max - min);
|
2020-04-04 20:12:58 +02:00
|
|
|
|
|
2020-04-06 16:57:00 +02:00
|
|
|
|
// free memory
|
2020-03-24 02:17:39 +01:00
|
|
|
|
gsl_min_fminimizer_free(s);
|
2020-04-08 12:33:11 +02:00
|
|
|
|
return x;
|
2020-03-24 02:17:39 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-04-06 20:43:00 +02:00
|
|
|
|
/* A structure containing the half-maximum
|
|
|
|
|
* of a PDF and a gsl_function of the PDF
|
|
|
|
|
* itself, used by `numeric_fwhm` to compute
|
|
|
|
|
* the FWHM.
|
2020-03-24 02:17:39 +01:00
|
|
|
|
*/
|
2020-04-06 20:43:00 +02:00
|
|
|
|
struct fwhm_params {
|
|
|
|
|
double halfmax;
|
|
|
|
|
gsl_function *pdf;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* This is the implicit equation that is solved
|
|
|
|
|
* in `numeric_fwhm` by a numerical root-finding
|
|
|
|
|
* method. This function takes a point `x`, the
|
|
|
|
|
* parameters defined in `fwhm_params` and returns
|
|
|
|
|
* the value of:
|
|
|
|
|
*
|
|
|
|
|
* f(x) - f(max)/2
|
|
|
|
|
*
|
|
|
|
|
* where f is the PDF of interest.
|
|
|
|
|
*/
|
|
|
|
|
double fwhm_equation(double x, void* params) {
|
|
|
|
|
struct fwhm_params p = *((struct fwhm_params*) params);
|
|
|
|
|
return p.pdf->function(x, p.pdf->params) - p.halfmax;
|
2020-03-24 02:17:39 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-04-06 20:43:00 +02:00
|
|
|
|
/* Numerically computes the FWHM of a PDF
|
|
|
|
|
* distribution using the definition.
|
2020-03-24 02:17:39 +01:00
|
|
|
|
* The `min,max` parameters are the initial search
|
2020-04-06 20:43:00 +02:00
|
|
|
|
* interval for the root search of equation
|
|
|
|
|
* `fwhm_equation`. Two searches are performed in
|
|
|
|
|
* [min, mode] and [mode, max] that will give the
|
|
|
|
|
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
|
2020-04-08 12:33:11 +02:00
|
|
|
|
*
|
|
|
|
|
* If `err` is true print the estimate error.
|
2020-03-24 02:17:39 +01:00
|
|
|
|
*/
|
2020-04-06 20:43:00 +02:00
|
|
|
|
double numeric_fwhm(double min, double max,
|
2020-04-08 12:33:11 +02:00
|
|
|
|
gsl_function *pdf,
|
|
|
|
|
int err) {
|
2020-04-06 20:43:00 +02:00
|
|
|
|
/* Create the gls_function structure
|
|
|
|
|
* for the equation to be solved.
|
|
|
|
|
*/
|
2020-04-08 12:33:11 +02:00
|
|
|
|
double mode = numeric_mode(min, max, pdf, 0);
|
2020-04-06 20:43:00 +02:00
|
|
|
|
struct fwhm_params p = {
|
|
|
|
|
pdf->function(mode, pdf->params)/2,
|
|
|
|
|
pdf
|
|
|
|
|
};
|
|
|
|
|
gsl_function equation;
|
|
|
|
|
equation.function = &fwhm_equation;
|
|
|
|
|
equation.params = &p;
|
|
|
|
|
|
2020-04-08 12:33:11 +02:00
|
|
|
|
const gsl_root_fsolver_type *T = gsl_root_fsolver_brent;
|
|
|
|
|
gsl_root_fsolver *s = gsl_root_fsolver_alloc(T);
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
|
|
|
|
// initialize minimization for x₋
|
2020-04-06 20:43:00 +02:00
|
|
|
|
double x, fmin, fmax;
|
2020-03-24 02:17:39 +01:00
|
|
|
|
int max_iter = 100;
|
|
|
|
|
double prec = 1e-7;
|
|
|
|
|
int status;
|
|
|
|
|
|
|
|
|
|
// minimization
|
2020-04-06 20:43:00 +02:00
|
|
|
|
gsl_root_fsolver_set(s, &equation, min, mode);
|
|
|
|
|
status = GSL_CONTINUE;
|
|
|
|
|
for (int iter = 0; status == GSL_CONTINUE && iter < max_iter; iter++)
|
|
|
|
|
{ status = gsl_root_fsolver_iterate(s);
|
|
|
|
|
x = gsl_root_fsolver_root(s);
|
|
|
|
|
fmin = gsl_root_fsolver_x_lower(s);
|
|
|
|
|
fmax = gsl_root_fsolver_x_upper(s);
|
|
|
|
|
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
|
|
|
|
}
|
|
|
|
|
double x_low = x;
|
2020-04-08 12:33:11 +02:00
|
|
|
|
double err_low = fmax - fmin;
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
|
|
|
|
// initialize minimization for x₊
|
2020-04-06 20:43:00 +02:00
|
|
|
|
gsl_root_fsolver_set(s, &equation, mode, max);
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
|
|
|
|
// minimization
|
2020-04-06 20:43:00 +02:00
|
|
|
|
status = GSL_CONTINUE;
|
|
|
|
|
for (int iter = 0; status == GSL_CONTINUE && iter < max_iter; iter++)
|
|
|
|
|
{ status = gsl_root_fsolver_iterate(s);
|
|
|
|
|
x = gsl_root_fsolver_root(s);
|
|
|
|
|
fmin = gsl_root_fsolver_x_lower(s);
|
|
|
|
|
fmax = gsl_root_fsolver_x_upper(s);
|
|
|
|
|
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
|
|
|
|
}
|
|
|
|
|
double x_upp = x;
|
2020-04-08 12:33:11 +02:00
|
|
|
|
double err_upp = fmax - fmin;
|
|
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
|
fprintf(stderr, "fhwm error: %g\n",
|
|
|
|
|
sqrt(err_low*err_low + err_upp*err_upp));
|
2020-03-24 02:17:39 +01:00
|
|
|
|
|
2020-04-06 20:43:00 +02:00
|
|
|
|
// free memory
|
|
|
|
|
gsl_root_fsolver_free(s);
|
2020-03-24 02:17:39 +01:00
|
|
|
|
return x_upp - x_low;
|
|
|
|
|
}
|
2020-06-09 13:46:51 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* The absolute moment of given `order`
|
|
|
|
|
* of the normal distribution N(0, 1).
|
|
|
|
|
*
|
|
|
|
|
* μ_k = E[|X|^k] = 2^(k/2) Γ((k+1)/2) / √π
|
|
|
|
|
*/
|
|
|
|
|
double normal_moment(double order) {
|
|
|
|
|
return pow(2, order/2) * gsl_sf_gamma((order+1)/2) / sqrt(M_PI);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Trapani infinite moment test
|
|
|
|
|
*
|
|
|
|
|
* Tests whether the `order`-th moment of the
|
|
|
|
|
* distribution of `sample` is infinite.
|
|
|
|
|
* The test generates an artificial sample of
|
|
|
|
|
* r points, where r = n^θ and returns a p-value.
|
|
|
|
|
*/
|
|
|
|
|
double trapani(gsl_rng *rng, double theta, double psi,
|
|
|
|
|
int order, double *sample, size_t n) {
|
|
|
|
|
fprintf(stderr, "\n## Trapani test (k = %d)\n", order);
|
|
|
|
|
|
|
|
|
|
/* Compute the moment μ_k and rescale it
|
|
|
|
|
* to make it scale-free. This is done by
|
|
|
|
|
* also computing the moment μ_ψ and taking
|
|
|
|
|
*
|
|
|
|
|
* m_k = μ_k / μ_ψ^(k/ψ)
|
|
|
|
|
*
|
|
|
|
|
* where:
|
|
|
|
|
* 1. ψ ∈ (0, k)
|
|
|
|
|
* 2. k is the `order` of the moment
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
double moment = 0;
|
|
|
|
|
double moment_psi = 0;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
|
moment += pow(fabs(sample[i]), order);
|
|
|
|
|
moment_psi += pow(fabs(sample[i]), psi);
|
|
|
|
|
}
|
|
|
|
|
moment /= n;
|
|
|
|
|
moment_psi /= n;
|
|
|
|
|
fprintf(stderr, "%d-th sample moment: %.2g\n", order, moment);
|
|
|
|
|
|
|
|
|
|
/* Rescale the moment
|
|
|
|
|
* Note: k/ψ = 2 becase ψ=k/2
|
|
|
|
|
*/
|
|
|
|
|
if (psi > 0) {
|
|
|
|
|
fprintf(stderr, "%g-th sample moment: %.2g\n", psi, moment_psi);
|
|
|
|
|
moment /= pow(moment_psi, order/psi);
|
|
|
|
|
|
|
|
|
|
/* Rescale further by the standard gaussian
|
|
|
|
|
* moments.
|
|
|
|
|
*
|
|
|
|
|
* m_k' = m_k (μ_ψ)^(k/ψ) / μ_k
|
|
|
|
|
*/
|
|
|
|
|
moment *= pow(normal_moment(psi), order/psi) / normal_moment(order);
|
|
|
|
|
|
|
|
|
|
fprintf(stderr, "rescaled moment: %.2g\n", moment);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Generate r points from a standard
|
|
|
|
|
* normal distribution and multiply
|
|
|
|
|
* them by √(e^m_k).
|
|
|
|
|
*/
|
|
|
|
|
size_t r = round(pow(n, theta));
|
|
|
|
|
|
|
|
|
|
fprintf(stderr, "n: %ld\n", n);
|
|
|
|
|
fprintf(stderr, "r: %ld\n", r);
|
|
|
|
|
double factor = sqrt(exp(moment));
|
|
|
|
|
double *noise = calloc(r, sizeof(double));
|
|
|
|
|
for (size_t i = 0; i < r; i++) {
|
|
|
|
|
noise[i] = factor * gsl_ran_gaussian(rng, 1); // σ = 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Compute the intel of the squared
|
|
|
|
|
* sum of the residues:
|
|
|
|
|
*
|
|
|
|
|
* Θ = ∫[-L, L] sum(u) φ(u)du
|
|
|
|
|
*
|
|
|
|
|
* where sum(u) = 2/√r (Σ_j=0 ^r θ(u - noise[j]) - 1/2)
|
|
|
|
|
*
|
|
|
|
|
* L = 1 and φ(u)=1/2L is a good enough choice.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// sort the noise sample
|
|
|
|
|
qsort(noise, r, sizeof(double), &cmp_double);
|
|
|
|
|
|
|
|
|
|
double integral = 0;
|
|
|
|
|
double L = 1;
|
|
|
|
|
for (size_t i = 0; i < r; i++) {
|
|
|
|
|
if (noise[i] < -L) {
|
|
|
|
|
if (i+1 < r && noise[i+1] < -L)
|
|
|
|
|
factor = 0;
|
|
|
|
|
else if (i+1 < r && noise[i+1] < L)
|
|
|
|
|
factor = noise[i+1] + L;
|
|
|
|
|
else
|
|
|
|
|
factor = 2*L;
|
|
|
|
|
}
|
|
|
|
|
else if (noise[i] < L) {
|
|
|
|
|
if (i+1 < r && noise[i+1] < L)
|
|
|
|
|
factor = noise[i+1] - noise[i];
|
|
|
|
|
else
|
|
|
|
|
factor = L - noise[i];
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
factor = 0;
|
|
|
|
|
integral += (i + 1)*((i + 1)/(double)r - 1) * factor;
|
|
|
|
|
}
|
|
|
|
|
integral = r + 2/L * integral;
|
|
|
|
|
fprintf(stderr, "Θ=%g\n", integral);
|
|
|
|
|
|
|
|
|
|
// free memory
|
|
|
|
|
free(noise);
|
|
|
|
|
|
|
|
|
|
/* Assuming Θ is distributed as a χ² with
|
|
|
|
|
* 1 DoF, compute the the p-value:
|
|
|
|
|
*
|
|
|
|
|
* p = P(χ² > Θ ; ν=1)
|
|
|
|
|
*/
|
|
|
|
|
return gsl_cdf_chisq_Q(integral, 1);
|
|
|
|
|
}
|