analistica/ex-1/tests.c

347 lines
9.3 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* This file contains functions to perform
* statistical tests on the points sampled
* from a Landau distribution.
*/
#include <stdio.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_sf_gamma.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_min.h>
#include <gsl/gsl_roots.h>
#include <gsl/gsl_sum.h>
#include "landau.h"
#include "bootstrap.h"
/* Kolmogorov distribution CDF
* for sample size n and statistic D
*/
double kolmogorov_cdf(double D, int n) {
double x = sqrt(n) * D;
// trick to reduce estimate error
x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
// calculate the first n_terms of the series
// Σ_k=1 exp(-(2k - 1)²π²/8x²)
size_t n_terms = 30;
double *terms = calloc(n_terms, sizeof(double));
for (size_t k=0; k<n_terms; k++) {
terms[k] = exp(-pow((2*(double)(k + 1) - 1)*M_PI/x, 2) / 8);
}
// do a transform to accelerate the convergence
double sum, abserr;
gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
fprintf(stderr, "\n## Kolmogorov CDF\n");
fprintf(stderr, "accel sum: %g\n", sum);
fprintf(stderr, "plain sum: %g\n", s->sum_plain);
fprintf(stderr, "err: %g\n", abserr);
gsl_sum_levin_utrunc_free(s);
free(terms);
return sqrt(2*M_PI)/x * sum;
}
/* This is a high-order function (ie a function that operates
* on functions) in disguise. It takes a function f and produces
* a function that computes -f. In lambda calculus it would be
* the map λf.λx -f(x).
*
* Since there is no notion of lambda functions in C (the
* standard one, at least) we use a trick involving the
* gsl_function struct: `negate_func(x, fp)` takes a point `x`
* and a gsl_function `fp` as the usual void pointer `params`.
* It then calls the function `fp.function` with `x` and their
* `fp.params` and return the negated result.
*
* So, given a `gsl_function f` its negated function is
* contructed as follows:
*
* gsl_function nf;
* nf.function = &negate_func;
* nf.params = &f;
*/
double negate_func(double x, void * fp) {
gsl_function f = *((gsl_function*) fp);
return -f.function(x, f.params);
}
/* Numerically computes the mode of a Landau
* distribution by maximising the derivative.
* The `min,max` parameters are the initial search
* interval for the optimisation.
*
* If `err` is true print the estimate error.
*/
double numeric_mode(double min, double max,
gsl_function *pdf,
int err) {
/* Negate the PDF to maximise it by
* using a GSL minimisation method.
* (There usually are no maximisation methods)
*/
gsl_function npdf;
npdf.function = &negate_func;
npdf.params = pdf;
// initialize minimization
double x = 0;
int max_iter = 100;
double prec = 1e-7;
int status = GSL_CONTINUE;
const gsl_min_fminimizer_type *T = gsl_min_fminimizer_brent;
gsl_min_fminimizer *s = gsl_min_fminimizer_alloc(T);
gsl_min_fminimizer_set(s, &npdf, x, min, max);
// minimisation
for (int iter = 0; status == GSL_CONTINUE && iter < max_iter; iter++)
{
status = gsl_min_fminimizer_iterate(s);
x = gsl_min_fminimizer_x_minimum(s);
min = gsl_min_fminimizer_x_lower(s);
max = gsl_min_fminimizer_x_upper(s);
status = gsl_min_test_interval(min, max, 0, prec);
}
/* The error is simply given by the width of
* the final interval containing the solution
*/
if (err)
fprintf(stderr, "mode error: %.3g\n", max - min);
// free memory
gsl_min_fminimizer_free(s);
return x;
}
/* A structure containing the half-maximum
* of a PDF and a gsl_function of the PDF
* itself, used by `numeric_fwhm` to compute
* the FWHM.
*/
struct fwhm_params {
double halfmax;
gsl_function *pdf;
};
/* This is the implicit equation that is solved
* in `numeric_fwhm` by a numerical root-finding
* method. This function takes a point `x`, the
* parameters defined in `fwhm_params` and returns
* the value of:
*
* f(x) - f(max)/2
*
* where f is the PDF of interest.
*/
double fwhm_equation(double x, void* params) {
struct fwhm_params p = *((struct fwhm_params*) params);
return p.pdf->function(x, p.pdf->params) - p.halfmax;
}
/* Numerically computes the FWHM of a PDF
* distribution using the definition.
* The `min,max` parameters are the initial search
* interval for the root search of equation
* `fwhm_equation`. Two searches are performed in
* [min, mode] and [mode, max] that will give the
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
*
* If `err` is true print the estimate error.
*/
double numeric_fwhm(double min, double max,
gsl_function *pdf,
int err) {
/* Create the gls_function structure
* for the equation to be solved.
*/
double mode = numeric_mode(min, max, pdf, 0);
struct fwhm_params p = {
pdf->function(mode, pdf->params)/2,
pdf
};
gsl_function equation;
equation.function = &fwhm_equation;
equation.params = &p;
const gsl_root_fsolver_type *T = gsl_root_fsolver_brent;
gsl_root_fsolver *s = gsl_root_fsolver_alloc(T);
// initialize minimization for x₋
double x, fmin, fmax;
int max_iter = 100;
double prec = 1e-7;
int status;
// minimization
gsl_root_fsolver_set(s, &equation, min, mode);
status = GSL_CONTINUE;
for (int iter = 0; status == GSL_CONTINUE && iter < max_iter; iter++)
{ status = gsl_root_fsolver_iterate(s);
x = gsl_root_fsolver_root(s);
fmin = gsl_root_fsolver_x_lower(s);
fmax = gsl_root_fsolver_x_upper(s);
status = gsl_min_test_interval(fmin, fmax, 0, prec);
}
double x_low = x;
double err_low = fmax - fmin;
// initialize minimization for x₊
gsl_root_fsolver_set(s, &equation, mode, max);
// minimization
status = GSL_CONTINUE;
for (int iter = 0; status == GSL_CONTINUE && iter < max_iter; iter++)
{ status = gsl_root_fsolver_iterate(s);
x = gsl_root_fsolver_root(s);
fmin = gsl_root_fsolver_x_lower(s);
fmax = gsl_root_fsolver_x_upper(s);
status = gsl_min_test_interval(fmin, fmax, 0, prec);
}
double x_upp = x;
double err_upp = fmax - fmin;
if (err)
fprintf(stderr, "fhwm error: %g\n",
sqrt(err_low*err_low + err_upp*err_upp));
// free memory
gsl_root_fsolver_free(s);
return x_upp - x_low;
}
/* The absolute moment of given `order`
* of the normal distribution N(0, 1).
*
* μ_k = E[|X|^k] = 2^(k/2) Γ((k+1)/2) / √π
*/
double normal_moment(double order) {
return pow(2, order/2) * gsl_sf_gamma((order+1)/2) / sqrt(M_PI);
}
/* Trapani infinite moment test
*
* Tests whether the `order`-th moment of the
* distribution of `sample` is infinite.
* The test generates an artificial sample of
* r points, where r = n^θ and returns a p-value.
*/
double trapani(gsl_rng *rng, double theta, double psi,
int order, double *sample, size_t n) {
fprintf(stderr, "\n## Trapani test (k = %d)\n", order);
/* Compute the moment μ_k and rescale it
* to make it scale-free. This is done by
* also computing the moment μ_ψ and taking
*
* m_k = μ_k / μ_ψ^(k/ψ)
*
* where:
* 1. ψ ∈ (0, k)
* 2. k is the `order` of the moment
*/
double moment = 0;
double moment_psi = 0;
for (size_t i = 0; i < n; i++) {
moment += pow(fabs(sample[i]), order);
moment_psi += pow(fabs(sample[i]), psi);
}
moment /= n;
moment_psi /= n;
fprintf(stderr, "%d-th sample moment: %.2g\n", order, moment);
/* Rescale the moment
* Note: k/ψ = 2 becase ψ=k/2
*/
if (psi > 0) {
fprintf(stderr, "%g-th sample moment: %.2g\n", psi, moment_psi);
moment /= pow(moment_psi, order/psi);
/* Rescale further by the standard gaussian
* moments.
*
* m_k' = m_k (μ_ψ)^(k/ψ) / μ_k
*/
moment *= pow(normal_moment(psi), order/psi) / normal_moment(order);
fprintf(stderr, "rescaled moment: %.2g\n", moment);
}
/* Generate r points from a standard
* normal distribution and multiply
* them by √(e^m_k).
*/
size_t r = round(pow(n, theta));
fprintf(stderr, "n: %ld\n", n);
fprintf(stderr, "r: %ld\n", r);
double factor = sqrt(exp(moment));
double *noise = calloc(r, sizeof(double));
for (size_t i = 0; i < r; i++) {
noise[i] = factor * gsl_ran_gaussian(rng, 1); // σ = 1
}
/* Compute the intel of the squared
* sum of the residues:
*
* Θ = ∫[-L, L] sum(u) φ(u)du
*
* where sum(u) = 2/√r (Σ_j=0 ^r θ(u - noise[j]) - 1/2)
*
* L = 1 and φ(u)=1/2L is a good enough choice.
*/
// sort the noise sample
qsort(noise, r, sizeof(double), &cmp_double);
double integral = 0;
double L = 1;
for (size_t i = 0; i < r; i++) {
if (noise[i] < -L) {
if (i+1 < r && noise[i+1] < -L)
factor = 0;
else if (i+1 < r && noise[i+1] < L)
factor = noise[i+1] + L;
else
factor = 2*L;
}
else if (noise[i] < L) {
if (i+1 < r && noise[i+1] < L)
factor = noise[i+1] - noise[i];
else
factor = L - noise[i];
}
else
factor = 0;
integral += (i + 1)*((i + 1)/(double)r - 1) * factor;
}
integral = r + 2/L * integral;
fprintf(stderr, "Θ=%g\n", integral);
// free memory
free(noise);
/* Assuming Θ is distributed as a χ² with
* 1 DoF, compute the the p-value:
*
* p = P(χ² > Θ ; ν=1)
*/
return gsl_cdf_chisq_Q(integral, 1);
}