ex-1: split into several files

This commit is contained in:
Michele Guerini Rocco 2020-03-24 01:17:39 +00:00
parent 3cd011ed5b
commit 0ee7f9adb3
6 changed files with 346 additions and 248 deletions

95
ex-1/landau.c Normal file
View File

@ -0,0 +1,95 @@
/* This file contains functions to
* compute the Landau distribution
* PDF, CDF and QDF functions.
*/
#include <gsl/gsl_roots.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_integration.h>
/* This is a wrapper needed by `landau_cdf` because
* the numerical integration expects a function
* with parameters.
*/
double landau_pdf(double x, void* params) {
return gsl_ran_landau_pdf(x);
}
/* The cumulative function of the Landau distribution
* calculated by numerical integration.
*/
double landau_cdf(double x, void* params) {
// create the integrand
gsl_function pdf;
pdf.function = &landau_pdf;
pdf.params = NULL;
// set up the integration
double res, err;
size_t iter = 500;
gsl_integration_workspace* w =
gsl_integration_workspace_alloc(iter);
// clip values too small
if (x < -200) x = -200;
// We integrate the pdf in [x, +∞) instead
// of (-∞, x] to avoid a singularity and
// then return 1 - P.
gsl_integration_qagiu(
&pdf, // integrand
x, // lower bound
1e-10, 1e-6, // abs and rel error
iter, // max iteration
w, // "workspace"
&res, &err); // result, abs error
// free the memory
gsl_integration_workspace_free(w);
return 1 - res;
}
/* Another wrapper: this time it is needed by
* `landau_qdf` to solve the equation:
* `landau_cdf(x) = p0` for x
*/
double landau_cdf_root(double x, void* params) {
double p0 = *(double*)params;
return p0 - landau_cdf(x, NULL);
}
/* The quantile function (inverse CDF) of the Landau
* distribution calculated by numerical root method.
*/
double landau_qdf(double p0) {
// create function
gsl_function cdf;
cdf.function = &landau_cdf_root;
cdf.params = &p0;
// use the Brent method
gsl_root_fsolver* s =
gsl_root_fsolver_alloc(gsl_root_fsolver_brent);
// search interval
double low = -1000, // lower value
upp = 100000; // upper value
gsl_root_fsolver_set(s, &cdf, low, upp);
// iterative search
size_t iter = 1000; // max iteration
int stat = GSL_CONTINUE;
for (size_t i=0; stat==GSL_CONTINUE && i<iter; i++) {
stat = gsl_root_fsolver_iterate(s);
low = gsl_root_fsolver_x_lower(s);
upp = gsl_root_fsolver_x_upper(s);
stat = gsl_root_test_interval(low, upp, 0, 0.01);
}
return gsl_root_fsolver_root(s);
}

24
ex-1/landau.h Normal file
View File

@ -0,0 +1,24 @@
/* This file contains functions to
* compute the Landau distribution
* PDF, CDF and QDF functions.
*/
#pragma once
/* This is a wrapper needed by `landau_cdf` because
* the numerical integration expects a function
* with parameters.
*/
double landau_pdf(double x, void* params);
/* The cumulative function of the Landau distribution
* calculated by numerical integration.
*/
double landau_cdf(double x, void* params);
/* The quantile function (inverse CDF) of the Landau
* distribution calculated by numerical root method.
*/
double landau_qdf(double p0);

View File

@ -1,11 +1,11 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <gsl/gsl_min.h> #include <math.h>
#include <gsl/gsl_sum.h>
#include <gsl/gsl_roots.h>
#include <gsl/gsl_randist.h> #include <gsl/gsl_randist.h>
#include <gsl/gsl_histogram.h> #include <gsl/gsl_histogram.h>
#include <gsl/gsl_integration.h>
#include "landau.h"
#include "tests.h"
/* Function that compare doubles for sorting: /* Function that compare doubles for sorting:
@ -20,233 +20,6 @@ int cmp_double (const void *xp, const void *yp) {
} }
/* This is a wrapper needed by `landau_cdf` because
* the numerical integration expects a function
* with parameters.
*/
double landau_pdf(double x, void* params) {
return gsl_ran_landau_pdf(x);
}
/* The cumulative function of the Landau distribution
* calculated by numerical integration.
*/
double landau_cdf(double x, void* params) {
// create the integrand
gsl_function pdf;
pdf.function = &landau_pdf;
pdf.params = NULL;
// set up the integration
double res, err;
size_t iter = 500;
gsl_integration_workspace* w =
gsl_integration_workspace_alloc(iter);
// clip values too small
if (x < -200) x = -200;
// We integrate the pdf in [x, +∞) instead
// of (-∞, x] to avoid a singularity and
// then return 1 - P.
gsl_integration_qagiu(
&pdf, // integrand
x, // lower bound
1e-10, 1e-6, // abs and rel error
iter, // max iteration
w, // "workspace"
&res, &err); // result, abs error
// free the memory
gsl_integration_workspace_free(w);
return 1 - res;
}
/* Another wrapper: this time it is needed by
* `landau_qdf` to solve the equation:
* `landau_cdf(x) = p0` for x
*/
double landau_cdf_root(double x, void* params) {
double p0 = *(double*)params;
return p0 - landau_cdf(x, NULL);
}
/* The quantile function (inverse CDF) of the Landau
* distribution calculated by numerical root method.
*/
double landau_qdf(double p0) {
// create function
gsl_function cdf;
cdf.function = &landau_cdf_root;
cdf.params = &p0;
// use the Brent method
gsl_root_fsolver* s =
gsl_root_fsolver_alloc(gsl_root_fsolver_brent);
// search interval
double low = -1000, // lower value
upp = 100000; // upper value
gsl_root_fsolver_set(s, &cdf, low, upp);
// iterative search
size_t iter = 1000; // max iteration
int stat = GSL_CONTINUE;
for (size_t i=0; stat==GSL_CONTINUE && i<iter; i++) {
stat = gsl_root_fsolver_iterate(s);
low = gsl_root_fsolver_x_lower(s);
upp = gsl_root_fsolver_x_upper(s);
stat = gsl_root_test_interval(low, upp, 0, 0.01);
}
return gsl_root_fsolver_root(s);
}
/* Kolmogorov distribution CDF
* for sample size n and statistic D
*/
double kolmogorov_cdf(double D, int n) {
double x = sqrt(n) * D;
// trick to reduce estimate error
x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
// calculate the first n_terms of the series
// Σ_k=1 exp(-(2k - 1)²π²/8x²)
size_t n_terms = 30;
double *terms = calloc(n_terms, sizeof(double));
for (size_t k=1; k<n_terms; k++) {
terms[k] = exp(-pow((2*(double)k - 1)*M_PI/x, 2) / 8);
}
// do a transform to accelerate the convergence
double sum, abserr;
gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
fprintf(stderr, "\n# Kolmogorov CDF\n");
fprintf(stderr, "accel sum: %f\n", sum);
fprintf(stderr, "plain sum: %f\n", s->sum_plain);
fprintf(stderr, "err: %f\n", abserr);
gsl_sum_levin_utrunc_free(s);
free(terms);
return sqrt(2*M_PI)/x * sum;
}
/* This is a wrapper needed by `numeric_mode` because
* the minimization expects a function to be minimized and not
* maximized.
*/
double neg_landau_pdf(double x, void* params) {
return (-1) * gsl_ran_landau_pdf(x);
}
/* Compute numerically the mode of the Landau distribution.
*/
double numeric_mode (double min, double max) {
// create funtion
gsl_function pdf;
pdf.function = &neg_landau_pdf;
pdf.params = NULL;
// initialize minimization
double guess = 0;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min, max);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
// Free memory
gsl_min_fminimizer_free (s);
return guess;
}
/* This is the function to be minimized in `numeric_FWHM`.
*/
double abs_landau_pdf(double x, void* params_) {
double* params = ((double *) params_);
return fabs(gsl_ran_landau_pdf(x) - params[0]);
}
/* Compute numerically the FWHM of the Landau distribution.
*/
double numeric_fwhm (double min_lim, double mode, double max_lim) {
// create funtion
gsl_function pdf;
pdf.function = &abs_landau_pdf;
double params [1];
params[0]= gsl_ran_landau_pdf(mode)/2;
pdf.params = params;
// initialize minimization for x₋
double guess = mode - 1;
double min, max;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min_lim, mode);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_low = guess;
// initialize minimization for x₊
guess = mode + 1;
gsl_min_fminimizer_set(s, &pdf, guess, mode, max_lim);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_upp = guess;
// Free memory
gsl_min_fminimizer_free (s);
return x_upp - x_low;
}
/* Here we generate random numbers in a uniform /* Here we generate random numbers in a uniform
* range and by using the quantile we map them * range and by using the quantile we map them
* to a Landau distribution. Then we generate an * to a Landau distribution. Then we generate an
@ -266,20 +39,31 @@ int main(int argc, char** argv) {
gsl_histogram* hist = gsl_histogram_alloc(bins); gsl_histogram* hist = gsl_histogram_alloc(bins);
gsl_histogram_set_ranges_uniform(hist, min, max); gsl_histogram_set_ranges_uniform(hist, min, max);
/* sample points from the Landau distribution
* and fill the histogram /* Sample generation
*
* Sample points from the Landau
* distribution and fill the histogram.
*/ */
fprintf(stderr, "# Sampling\n");
fprintf(stderr, "generating %ld points... ", samples);
double x; double x;
for(size_t i=0; i<samples; i++) { for(size_t i=0; i<samples; i++) {
x = gsl_ran_landau(r); x = gsl_ran_landau(r);
sample[i] = x; sample[i] = x;
gsl_histogram_increment(hist, x); gsl_histogram_increment(hist, x);
} }
fprintf(stderr, "done\n");
// sort the sample // sort the sample
qsort(sample, samples, sizeof(double), &cmp_double); qsort(sample, samples, sizeof(double), &cmp_double);
// calculate Kolmogorov-Smirnov D
/* Kolmogorov-Smirnov test
*
* Compute the D statistic and its
* associated probability.
*/
double D = 0; double D = 0;
double d; double d;
for(size_t i=0; i<samples; i++) { for(size_t i=0; i<samples; i++) {
@ -287,16 +71,19 @@ int main(int argc, char** argv) {
if (d > D) if (d > D)
D = d; D = d;
} }
fprintf(stderr, "\n\n# Kolmogorov-Smirnov test\n");
// Kolmogorov-Smirnov test
double beta = kolmogorov_cdf(D, samples); double beta = kolmogorov_cdf(D, samples);
fprintf(stderr, "\n# K-S test results\n");
// print the results
fprintf(stderr, "\n## Results\n");
fprintf(stderr, "D: %f\n", D); fprintf(stderr, "D: %f\n", D);
fprintf(stderr, "α: %g\n", 1 - beta); fprintf(stderr, "α: %g\n", 1 - beta);
// Mode comparison
// /* Mode comparison
// find the bin with the maximum number of events *
* Find the bin with the maximum number of events
*/
double mode_o, maxbin = 0; double mode_o, maxbin = 0;
double f_mode_o = 0; double f_mode_o = 0;
double m1, m2 = 0; double m1, m2 = 0;
@ -308,19 +95,22 @@ int main(int argc, char** argv) {
f_mode_o = hist->bin[i]; f_mode_o = hist->bin[i];
} }
} }
fprintf(stderr, "\n\n# Mode comparison\n");
fprintf(stderr, "\nstep: %.2f\n ", (max - min)/bins); fprintf(stderr, "\nstep: %.2f\n ", (max - min)/bins);
f_mode_o = f_mode_o/samples; f_mode_o = f_mode_o/samples;
mode_o = min + (maxbin + 0.5)*(max - min)/bins; mode_o = min + (maxbin + 0.5)*(max - min)/bins;
// print the results // print the results
double mode_e = numeric_mode(min, max); double mode_e = numeric_mode(min, max);
fprintf(stderr, "\n# Numeric mode\n"); fprintf(stderr, "\n## Results\n");
fprintf(stderr, "expected mode: %.7f\n", mode_e); fprintf(stderr, "expected mode: %.7f\n", mode_e);
fprintf(stderr, "observed mode: %.3f\n", mode_o); fprintf(stderr, "observed mode: %.3f\n", mode_o);
// FWHM comparison
// /* FWHM comparison
// find the bins x₋ and x₊ *
* Find the bins x and x.
*/
double half = f_mode_o*samples/2; double half = f_mode_o*samples/2;
m2 = samples; m2 = samples;
double x_low = 0; double x_low = 0;
@ -348,8 +138,9 @@ int main(int argc, char** argv) {
double fwhm_o = x_upp - x_low; double fwhm_o = x_upp - x_low;
// print the results // print the results
fprintf(stderr, "\n# Numeric FWHM\n"); fprintf(stderr, "\n\n# FWHM comparison\n");
double fwhm_e = numeric_fwhm(min, mode_e, max); double fwhm_e = numeric_fwhm(min, max, mode_e);
fprintf(stderr, "\n# Results\n");
fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e); fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e);
fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o); fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o);

158
ex-1/tests.c Normal file
View File

@ -0,0 +1,158 @@
/* This file contains functions to perform
* statistical tests on the points sampled
* from a Landau distribution.
*/
#include <stdio.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_min.h>
#include <gsl/gsl_sum.h>
#include "landau.h"
/* Kolmogorov distribution CDF
* for sample size n and statistic D
*/
double kolmogorov_cdf(double D, int n) {
double x = sqrt(n) * D;
// trick to reduce estimate error
x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
// calculate the first n_terms of the series
// Σ_k=1 exp(-(2k - 1)²π²/8x²)
size_t n_terms = 30;
double *terms = calloc(n_terms, sizeof(double));
for (size_t k=0; k<n_terms; k++) {
terms[k] = exp(-pow((2*(double)(k + 1) - 1)*M_PI/x, 2) / 8);
}
// do a transform to accelerate the convergence
double sum, abserr;
gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
fprintf(stderr, "\n## Kolmogorov CDF\n");
fprintf(stderr, "accel sum: %f\n", sum);
fprintf(stderr, "plain sum: %f\n", s->sum_plain);
fprintf(stderr, "err: %f\n", abserr);
gsl_sum_levin_utrunc_free(s);
free(terms);
return sqrt(2*M_PI)/x * sum;
}
/* This is a wrapper needed by `numeric_mode` because
* the minimization expects a function to be minimized and not
* maximized.
*/
double neg_landau_pdf(double x, void* params) {
return (-1) * gsl_ran_landau_pdf(x);
}
/* Numerically computes the mode of a Landau
* distribution by maximising the derivative.
* The min,max parameters are the initial search
* interval for the optimisation.
*/
double numeric_mode(double min, double max) {
// create funtion
gsl_function pdf;
pdf.function = &neg_landau_pdf;
pdf.params = NULL;
// initialize minimization
double guess = 0;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc(T);
gsl_min_fminimizer_set(s, &pdf, guess, min, max);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate(s);
guess = gsl_min_fminimizer_x_minimum(s);
min = gsl_min_fminimizer_x_lower(s);
max = gsl_min_fminimizer_x_upper(s);
status = gsl_min_test_interval(min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
// Free memory
gsl_min_fminimizer_free(s);
return guess;
}
/* This is the function to be minimized in `numeric_FWHM`.
*/
double abs_landau_pdf(double x, void* params_) {
double* params = ((double *) params_);
return fabs(gsl_ran_landau_pdf(x) - params[0]);
}
/* Numerically computes the FWHM of Landau
* distribution by maximising the derivative.
* The `min,max` parameters are the initial search
* interval for the optimisation. `mode` can be
* computer with `numeric_mode(min, max)`.
*/
double numeric_fwhm(double min, double max, double mode) {
// create funtion
gsl_function pdf;
pdf.function = &abs_landau_pdf;
double params [1];
params[0]= gsl_ran_landau_pdf(mode)/2;
pdf.params = params;
// initialize minimization for x₋
double guess = mode - 1;
double fmin, fmax;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc(T);
gsl_min_fminimizer_set(s, &pdf, guess, min, mode);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate(s);
guess = gsl_min_fminimizer_x_minimum(s);
fmin = gsl_min_fminimizer_x_lower(s);
fmax = gsl_min_fminimizer_x_upper(s);
status = gsl_min_test_interval(fmin, fmax, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_low = guess;
// initialize minimization for x₊
guess = mode + 1;
gsl_min_fminimizer_set(s, &pdf, guess, mode, max);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate(s);
guess = gsl_min_fminimizer_x_minimum(s);
fmin = gsl_min_fminimizer_x_lower(s);
fmax = gsl_min_fminimizer_x_upper(s);
status = gsl_min_test_interval(fmin, fmax, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_upp = guess;
// Free memory
gsl_min_fminimizer_free(s);
return x_upp - x_low;
}

28
ex-1/tests.h Normal file
View File

@ -0,0 +1,28 @@
/* This file contains functions to perform
* statistical tests on the points sampled
* from a Landau distribution.
*/
#pragma once
/* Kolmogorov distribution CDF
* for sample size n and statistic D
*/
double kolmogorov_cdf(double D, int n);
/* Numerically computes the mode of a Landau
* distribution by maximising the derivative.
* The min,max parameters are the initial search
* interval for the optimisation.
*/
double numeric_mode(double min, double max);
/* Numerically computes the FWHM of Landau
* distribution by maximising the derivative.
* The `min,max` parameters are the initial search
* interval for the optimisation. `mode` can be
* computer with `numeric_mode(min, max)`.
*/
double numeric_fwhm(double min, double max, double mode);

View File

@ -6,7 +6,9 @@ CCOMPILE = \
mkdir -p $(@D); \ mkdir -p $(@D); \
$(CC) $(CFLAGS) $^ -o $@ $(CC) $(CFLAGS) $^ -o $@
ex-1/bin/%: ex-1/%.c ex-1/bin/main: ex-1/main.c ex-1/landau.c ex-1/tests.c
$(CCOMPILE)
ex-1/bin/pdf: ex-1/pdf.c
$(CCOMPILE) $(CCOMPILE)
ex-2/bin/%: ex-2/%.c ex-2/bin/%: ex-2/%.c