analistica/ex-1/main.c

366 lines
9.2 KiB
C
Raw Normal View History

2020-03-06 02:24:32 +01:00
#include <stdio.h>
#include <stdlib.h>
#include <gsl/gsl_min.h>
#include <gsl/gsl_sum.h>
#include <gsl/gsl_roots.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_integration.h>
/* Function that compare doubles for sorting:
* x > y 1
* x == y 0
* x < y -1
*/
int cmp_double (const void *xp, const void *yp) {
double x = *(double*)xp,
y = *(double*)yp;
return x > y ? 1 : -1;
}
/* This is a wrapper needed by `landau_cdf` because
* the numerical integration expects a function
* with parameters.
*/
double landau_pdf(double x, void* params) {
return gsl_ran_landau_pdf(x);
}
/* The cumulative function of the Landau distribution
* calculated by numerical integration.
*/
double landau_cdf(double x, void* params) {
// create the integrand
gsl_function pdf;
pdf.function = &landau_pdf;
pdf.params = NULL;
// set up the integration
double res, err;
size_t iter = 500;
gsl_integration_workspace* w =
gsl_integration_workspace_alloc(iter);
// clip values too small
if (x < -200) x = -200;
// We integrate the pdf in [x, +∞) instead
// of (-∞, x] to avoid a singularity and
// then return 1 - P.
gsl_integration_qagiu(
&pdf, // integrand
x, // lower bound
1e-10, 1e-6, // abs and rel error
iter, // max iteration
w, // "workspace"
&res, &err); // result, abs error
// free the memory
gsl_integration_workspace_free(w);
return 1 - res;
}
/* Another wrapper: this time it is needed by
* `landau_qdf` to solve the equation:
* `landau_cdf(x) = p0` for x
*/
double landau_cdf_root(double x, void* params) {
double p0 = *(double*)params;
return p0 - landau_cdf(x, NULL);
}
/* The quantile function (inverse CDF) of the Landau
* distribution calculated by numerical root method.
*/
double landau_qdf(double p0) {
// create function
gsl_function cdf;
cdf.function = &landau_cdf_root;
cdf.params = &p0;
// use the Brent method
gsl_root_fsolver* s =
gsl_root_fsolver_alloc(gsl_root_fsolver_brent);
// search interval
double low = -1000, // lower value
upp = 100000; // upper value
gsl_root_fsolver_set(s, &cdf, low, upp);
// iterative search
size_t iter = 1000; // max iteration
int stat = GSL_CONTINUE;
for (size_t i=0; stat==GSL_CONTINUE && i<iter; i++) {
stat = gsl_root_fsolver_iterate(s);
low = gsl_root_fsolver_x_lower(s);
upp = gsl_root_fsolver_x_upper(s);
stat = gsl_root_test_interval(low, upp, 0, 0.01);
}
return gsl_root_fsolver_root(s);
}
/* Kolmogorov distribution CDF
* for sample size n and statistic D
*/
double kolmogorov_cdf(double D, int n) {
double x = sqrt(n) * D;
// trick to reduce estimate error
x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
// calculate the first n_terms of the series
// Σ_k=1 exp(-(2k - 1)²π²/8x²)
size_t n_terms = 30;
double *terms = calloc(n_terms, sizeof(double));
for (size_t k=1; k<n_terms; k++) {
terms[k] = exp(-pow((2*(double)k - 1)*M_PI/x, 2) / 8);
}
// do a transform to accelerate the convergence
double sum, abserr;
gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
fprintf(stderr, "\n# Kolmogorov CDF\n");
fprintf(stderr, "accel sum: %f\n", sum);
fprintf(stderr, "plain sum: %f\n", s->sum_plain);
fprintf(stderr, "err: %f\n", abserr);
gsl_sum_levin_utrunc_free(s);
free(terms);
return sqrt(2*M_PI)/x * sum;
}
/* This is a wrapper needed by `numeric_mode` because
* the minimization expects a function to be minimized and not
* maximized.
*/
double neg_landau_pdf(double x, void* params) {
return (-1) * gsl_ran_landau_pdf(x);
}
/* Compute numerically the mode of the Landau distribution.
*/
double numeric_mode (double min, double max) {
// create funtion
gsl_function pdf;
pdf.function = &neg_landau_pdf;
pdf.params = NULL;
// initialize minimization
double guess = 0;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min, max);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
// Free memory
gsl_min_fminimizer_free (s);
return guess;
}
/* This is the function to be minimized in `numeric_FWHM`.
*/
double abs_landau_pdf(double x, void* params_) {
double* params = ((double *) params_);
return fabs(gsl_ran_landau_pdf(x) - params[0]);
}
/* Compute numerically the FWHM of the Landau distribution.
*/
double numeric_fwhm (double min_lim, double mode, double max_lim) {
// create funtion
gsl_function pdf;
pdf.function = &abs_landau_pdf;
double params [1];
params[0]= gsl_ran_landau_pdf(mode)/2;
pdf.params = params;
// initialize minimization for x₋
double guess = mode - 1;
double min, max;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min_lim, mode);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_low = guess;
// initialize minimization for x₊
guess = mode + 1;
gsl_min_fminimizer_set(s, &pdf, guess, mode, max_lim);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_upp = guess;
// Free memory
gsl_min_fminimizer_free (s);
return x_upp - x_low;
}
/* Here we generate random numbers in a uniform
* range and by using the quantile we map them
* to a Landau distribution. Then we generate an
* histogram to check the correctness.
*/
int main(int argc, char** argv) {
// initialize an RNG
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
// prepare histogram
size_t samples = 100000;
double* sample = calloc(samples, sizeof(double));
size_t bins = 40;
double min = -20;
double max = 20;
gsl_histogram* hist = gsl_histogram_alloc(bins);
gsl_histogram_set_ranges_uniform(hist, min, max);
/* sample points from the Landau distribution
* and fill the histogram
*/
double x;
for(size_t i=0; i<samples; i++) {
x = gsl_ran_landau(r);
sample[i] = x;
gsl_histogram_increment(hist, x);
}
// sort the sample
qsort(sample, samples, sizeof(double), &cmp_double);
// calculate Kolmogorov-Smirnov D
double D = 0;
double d;
for(size_t i=0; i<samples; i++) {
d = fabs(landau_cdf(sample[i], NULL) - ((double)i+1)/samples);
if (d > D)
D = d;
}
// Kolmogorov-Smirnov test
double beta = kolmogorov_cdf(D, samples);
fprintf(stderr, "\n# K-S test results\n");
fprintf(stderr, "D: %f\n", D);
fprintf(stderr, "α: %g\n", 1 - beta);
// Mode comparison
//
// find the bin with the maximum number of events
double mode_o, maxbin = 0;
double f_mode_o = 0;
double m1, m2 = 0;
for(size_t i=0; i<bins; i++) {
m1 = hist->bin[i];
if (m1 > m2){
m2 = m1;
maxbin = (double)i;
f_mode_o = hist->bin[i];
}
}
fprintf(stderr, "\nstep:%.2f\n ", (max - min)/bins);
f_mode_o = f_mode_o/samples;
mode_o = min + (maxbin + 0.5)*(max - min)/bins;
// print the results
double mode_e = numeric_mode(min, max);
fprintf(stderr, "\n# Numeric mode\n");
fprintf(stderr, "expected mode: %.7f\n", mode_e);
fprintf(stderr, "observed mode: %.3f\n", mode_o);
// FWHM comparison
//
// find the bins x₋ and x₊
double half = f_mode_o*samples/2;
m2 = samples;
double x_low = 0;
double x_upp = 0;
double diff;
for(size_t i=0; i<maxbin; i++) {
m1 = hist->bin[i];
diff = fabs(m1 - half);
if (diff < m2){
m2 = diff;
x_low = (double)i;
}
}
m2 = samples;
for(size_t i=maxbin; i<bins; i++) {
m1 = hist->bin[i];
diff = fabs(m1 - half);
if (diff < m2){
m2 = diff;
x_upp = (double)i;
}
}
x_low = min + (x_low + 0.5)*(max - min)/bins;
x_upp = min + (x_upp + 0.5)*(max - min)/bins;
double fwhm_o = x_upp - x_low;
// print the results
fprintf(stderr, "\n# Numeric FWHM\n");
double fwhm_e = numeric_fwhm(min, mode_e, max);
fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e);
fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o);
// print the counts
// gsl_histogram_fprintf(stdout, hist, "%g", "%g");
// clean up and exit
gsl_histogram_free(hist);
gsl_rng_free(r);
free(sample);
return EXIT_SUCCESS;
}