analistica/ex-1/main.c
2020-03-06 02:24:32 +01:00

366 lines
9.2 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <stdlib.h>
#include <gsl/gsl_min.h>
#include <gsl/gsl_sum.h>
#include <gsl/gsl_roots.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_integration.h>
/* Function that compare doubles for sorting:
* x > y ⇒ 1
* x == y ⇒ 0
* x < y ⇒ -1
*/
int cmp_double (const void *xp, const void *yp) {
double x = *(double*)xp,
y = *(double*)yp;
return x > y ? 1 : -1;
}
/* This is a wrapper needed by `landau_cdf` because
* the numerical integration expects a function
* with parameters.
*/
double landau_pdf(double x, void* params) {
return gsl_ran_landau_pdf(x);
}
/* The cumulative function of the Landau distribution
* calculated by numerical integration.
*/
double landau_cdf(double x, void* params) {
// create the integrand
gsl_function pdf;
pdf.function = &landau_pdf;
pdf.params = NULL;
// set up the integration
double res, err;
size_t iter = 500;
gsl_integration_workspace* w =
gsl_integration_workspace_alloc(iter);
// clip values too small
if (x < -200) x = -200;
// We integrate the pdf in [x, +∞) instead
// of (-∞, x] to avoid a singularity and
// then return 1 - P.
gsl_integration_qagiu(
&pdf, // integrand
x, // lower bound
1e-10, 1e-6, // abs and rel error
iter, // max iteration
w, // "workspace"
&res, &err); // result, abs error
// free the memory
gsl_integration_workspace_free(w);
return 1 - res;
}
/* Another wrapper: this time it is needed by
* `landau_qdf` to solve the equation:
* `landau_cdf(x) = p0` for x
*/
double landau_cdf_root(double x, void* params) {
double p0 = *(double*)params;
return p0 - landau_cdf(x, NULL);
}
/* The quantile function (inverse CDF) of the Landau
* distribution calculated by numerical root method.
*/
double landau_qdf(double p0) {
// create function
gsl_function cdf;
cdf.function = &landau_cdf_root;
cdf.params = &p0;
// use the Brent method
gsl_root_fsolver* s =
gsl_root_fsolver_alloc(gsl_root_fsolver_brent);
// search interval
double low = -1000, // lower value
upp = 100000; // upper value
gsl_root_fsolver_set(s, &cdf, low, upp);
// iterative search
size_t iter = 1000; // max iteration
int stat = GSL_CONTINUE;
for (size_t i=0; stat==GSL_CONTINUE && i<iter; i++) {
stat = gsl_root_fsolver_iterate(s);
low = gsl_root_fsolver_x_lower(s);
upp = gsl_root_fsolver_x_upper(s);
stat = gsl_root_test_interval(low, upp, 0, 0.01);
}
return gsl_root_fsolver_root(s);
}
/* Kolmogorov distribution CDF
* for sample size n and statistic D
*/
double kolmogorov_cdf(double D, int n) {
double x = sqrt(n) * D;
// trick to reduce estimate error
x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
// calculate the first n_terms of the series
// Σ_k=1 exp(-(2k - 1)²π²/8x²)
size_t n_terms = 30;
double *terms = calloc(n_terms, sizeof(double));
for (size_t k=1; k<n_terms; k++) {
terms[k] = exp(-pow((2*(double)k - 1)*M_PI/x, 2) / 8);
}
// do a transform to accelerate the convergence
double sum, abserr;
gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
fprintf(stderr, "\n# Kolmogorov CDF\n");
fprintf(stderr, "accel sum: %f\n", sum);
fprintf(stderr, "plain sum: %f\n", s->sum_plain);
fprintf(stderr, "err: %f\n", abserr);
gsl_sum_levin_utrunc_free(s);
free(terms);
return sqrt(2*M_PI)/x * sum;
}
/* This is a wrapper needed by `numeric_mode` because
* the minimization expects a function to be minimized and not
* maximized.
*/
double neg_landau_pdf(double x, void* params) {
return (-1) * gsl_ran_landau_pdf(x);
}
/* Compute numerically the mode of the Landau distribution.
*/
double numeric_mode (double min, double max) {
// create funtion
gsl_function pdf;
pdf.function = &neg_landau_pdf;
pdf.params = NULL;
// initialize minimization
double guess = 0;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min, max);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
// Free memory
gsl_min_fminimizer_free (s);
return guess;
}
/* This is the function to be minimized in `numeric_FWHM`.
*/
double abs_landau_pdf(double x, void* params_) {
double* params = ((double *) params_);
return fabs(gsl_ran_landau_pdf(x) - params[0]);
}
/* Compute numerically the FWHM of the Landau distribution.
*/
double numeric_fwhm (double min_lim, double mode, double max_lim) {
// create funtion
gsl_function pdf;
pdf.function = &abs_landau_pdf;
double params [1];
params[0]= gsl_ran_landau_pdf(mode)/2;
pdf.params = params;
// initialize minimization for x₋
double guess = mode - 1;
double min, max;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min_lim, mode);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_low = guess;
// initialize minimization for x₊
guess = mode + 1;
gsl_min_fminimizer_set(s, &pdf, guess, mode, max_lim);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_upp = guess;
// Free memory
gsl_min_fminimizer_free (s);
return x_upp - x_low;
}
/* Here we generate random numbers in a uniform
* range and by using the quantile we map them
* to a Landau distribution. Then we generate an
* histogram to check the correctness.
*/
int main(int argc, char** argv) {
// initialize an RNG
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
// prepare histogram
size_t samples = 100000;
double* sample = calloc(samples, sizeof(double));
size_t bins = 40;
double min = -20;
double max = 20;
gsl_histogram* hist = gsl_histogram_alloc(bins);
gsl_histogram_set_ranges_uniform(hist, min, max);
/* sample points from the Landau distribution
* and fill the histogram
*/
double x;
for(size_t i=0; i<samples; i++) {
x = gsl_ran_landau(r);
sample[i] = x;
gsl_histogram_increment(hist, x);
}
// sort the sample
qsort(sample, samples, sizeof(double), &cmp_double);
// calculate Kolmogorov-Smirnov D
double D = 0;
double d;
for(size_t i=0; i<samples; i++) {
d = fabs(landau_cdf(sample[i], NULL) - ((double)i+1)/samples);
if (d > D)
D = d;
}
// Kolmogorov-Smirnov test
double beta = kolmogorov_cdf(D, samples);
fprintf(stderr, "\n# K-S test results\n");
fprintf(stderr, "D: %f\n", D);
fprintf(stderr, "α: %g\n", 1 - beta);
// Mode comparison
//
// find the bin with the maximum number of events
double mode_o, maxbin = 0;
double f_mode_o = 0;
double m1, m2 = 0;
for(size_t i=0; i<bins; i++) {
m1 = hist->bin[i];
if (m1 > m2){
m2 = m1;
maxbin = (double)i;
f_mode_o = hist->bin[i];
}
}
fprintf(stderr, "\nstep:%.2f\n ", (max - min)/bins);
f_mode_o = f_mode_o/samples;
mode_o = min + (maxbin + 0.5)*(max - min)/bins;
// print the results
double mode_e = numeric_mode(min, max);
fprintf(stderr, "\n# Numeric mode\n");
fprintf(stderr, "expected mode: %.7f\n", mode_e);
fprintf(stderr, "observed mode: %.3f\n", mode_o);
// FWHM comparison
//
// find the bins x₋ and x₊
double half = f_mode_o*samples/2;
m2 = samples;
double x_low = 0;
double x_upp = 0;
double diff;
for(size_t i=0; i<maxbin; i++) {
m1 = hist->bin[i];
diff = fabs(m1 - half);
if (diff < m2){
m2 = diff;
x_low = (double)i;
}
}
m2 = samples;
for(size_t i=maxbin; i<bins; i++) {
m1 = hist->bin[i];
diff = fabs(m1 - half);
if (diff < m2){
m2 = diff;
x_upp = (double)i;
}
}
x_low = min + (x_low + 0.5)*(max - min)/bins;
x_upp = min + (x_upp + 0.5)*(max - min)/bins;
double fwhm_o = x_upp - x_low;
// print the results
fprintf(stderr, "\n# Numeric FWHM\n");
double fwhm_e = numeric_fwhm(min, mode_e, max);
fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e);
fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o);
// print the counts
// gsl_histogram_fprintf(stdout, hist, "%g", "%g");
// clean up and exit
gsl_histogram_free(hist);
gsl_rng_free(r);
free(sample);
return EXIT_SUCCESS;
}