analistica/ex-1/main.c

169 lines
4.4 KiB
C
Raw Normal View History

2020-03-06 02:24:32 +01:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2020-03-24 02:17:39 +01:00
#include <math.h>
2020-03-06 02:24:32 +01:00
#include <gsl/gsl_randist.h>
2020-03-24 02:41:45 +01:00
#include <gsl/gsl_statistics_double.h>
2020-03-24 02:17:39 +01:00
#include "landau.h"
#include "tests.h"
#include "bootstrap.h"
2020-03-06 02:24:32 +01:00
2020-04-08 15:43:16 +02:00
/* Here we generate random numbers following
* the Landau distribution and run a series of
* test to check if they really belong to such a
* distribution.
2020-03-06 02:24:32 +01:00
*/
int main(int argc, char** argv) {
size_t samples = 50000;
/* Process CLI arguments */
for (size_t i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-n")) samples = atol(argv[++i]);
else {
fprintf(stderr, "Usage: %s -[hn]\n", argv[0]);
fprintf(stderr, "\t-h\tShow this message.\n");
fprintf(stderr, "\t-n N\tThe size of sample to generate. (default: 50000)\n");
return EXIT_FAILURE;
}
}
2020-03-06 02:24:32 +01:00
// initialize an RNG
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
// prepare histogram
double* sample = calloc(samples, sizeof(double));
double min = -10;
double max = 10;
2020-03-06 02:24:32 +01:00
2020-03-24 02:17:39 +01:00
/* Sample generation
*
2020-04-08 15:43:16 +02:00
* Sample points from the Landau distribution
* using the GSL Landau generator.
2020-03-06 02:24:32 +01:00
*/
2020-03-24 02:17:39 +01:00
fprintf(stderr, "# Sampling\n");
fprintf(stderr, "generating %ld points... ", samples);
2020-03-06 02:24:32 +01:00
double x;
for(size_t i=0; i<samples; i++) {
x = gsl_ran_landau(r);
sample[i] = x;
}
2020-03-24 02:17:39 +01:00
fprintf(stderr, "done\n");
2020-03-06 02:24:32 +01:00
// sort the sample: needed for HSM and ks tests
2020-03-06 02:24:32 +01:00
qsort(sample, samples, sizeof(double), &cmp_double);
2020-03-24 02:17:39 +01:00
/* Kolmogorov-Smirnov test
*
* Compute the D statistic and its
* associated probability.
*/
2020-04-08 15:43:16 +02:00
fprintf(stderr, "\n\n# Kolmogorov-Smirnov test\n");
2020-03-06 02:24:32 +01:00
double D = 0;
double d;
for(size_t i=0; i<samples; i++) {
d = fabs(landau_cdf(sample[i], NULL) - ((double)i+1)/samples);
if (d > D)
D = d;
}
double beta = kolmogorov_cdf(D, samples);
2020-03-24 02:17:39 +01:00
// print the results
fprintf(stderr, "\n## Results\n");
fprintf(stderr, "D=%g\n", D);
fprintf(stderr, "p=%.3f\n", 1 - beta);
2020-03-06 02:24:32 +01:00
2020-03-24 02:17:39 +01:00
/* Mode comparison
*
* Compute the half-sample mode by bootstrapping
* and compare the result with the value found by
* numerical maximisation of the PDF.
2020-03-24 02:17:39 +01:00
*/
fprintf(stderr, "\n\n# Mode comparison\n");
2020-03-06 02:24:32 +01:00
/* A structure used by the optimisation
* routines in numeric_mode and others
* functions below.
*/
gsl_function pdf;
pdf.function = &landau_pdf;
pdf.params = NULL;
const size_t boots = 100;
double mode_e = numeric_mode(min, max, &pdf, 1);
uncert mode_o = bootstrap_mode(r, sample, samples, boots);
// print the results
2020-03-24 02:17:39 +01:00
fprintf(stderr, "\n## Results\n");
fprintf(stderr, "expected mode: %.8f\n", mode_e);
2020-04-06 10:32:26 +02:00
fprintf(stderr, "observed mode: %.4f±%.4f\n", mode_o.n, mode_o.s);
// t-test
2020-04-06 10:32:26 +02:00
double t = fabs(mode_e - mode_o.n)/mode_o.s;
double p = 1 - erf(t/sqrt(2));
fprintf(stderr, "\n## t-test\n");
fprintf(stderr, "t=%.3f\n", t);
fprintf(stderr, "p=%.3f\n", p);
2020-03-06 02:24:32 +01:00
2020-03-24 02:17:39 +01:00
/* FWHM comparison
*
* Estimate the FWHM of the sample by constructing
* an empirical PDF via a KDE method and applying
* the definition on it (numerical solution of
* `f(x) = max/2` x-x). This is again bootstrapped
* to estimate the standard errors and compared against
* the numerical value of FWHM from the true PDF.
2020-03-24 02:17:39 +01:00
*/
fprintf(stderr, "\n\n# FWHM comparison\n");
double fwhm_e = numeric_fwhm(min, max, &pdf, 1);
uncert fwhm_o = bootstrap_fwhm(r, min, max, sample, samples, boots);
// print the results
fprintf(stderr, "\n## Results\n");
fprintf(stderr, "expected fwhm: %.7f\n", fwhm_e);
fprintf(stderr, "observed fwhm: %.4f±%.4f\n", fwhm_o.n, fwhm_o.s);
// t-test
t = fabs(fwhm_e - fwhm_o.n)/fwhm_o.s;
p = 1 - erf(t/sqrt(2));
fprintf(stderr, "\n## t-test\n");
fprintf(stderr, "t=%.3f\n", t);
fprintf(stderr, "p=%.3f\n", p);
2020-03-06 02:24:32 +01:00
2020-03-24 02:41:45 +01:00
/* Median comparison
*
* Compute the median of the sample by bootstrapping
* it and comparing it with the QDF(1/2).
2020-03-24 02:41:45 +01:00
*/
fprintf(stderr, "\n\n# Median comparison\n");
double med_e = landau_qdf(0.5);
uncert med_o = bootstrap_median(r, sample, samples, boots);
2020-03-24 02:41:45 +01:00
// print the results
fprintf(stderr, "\n## Results\n");
2020-03-24 02:41:45 +01:00
fprintf(stderr, "expected median: %.7f\n", med_e);
fprintf(stderr, "observed median: %.4f±%.4f\n", med_o.n, med_o.s);
// t-test
2020-04-06 10:32:26 +02:00
t = fabs(med_e - med_o.n)/med_o.s;
p = 1 - erf(t/sqrt(2));
fprintf(stderr, "\n## t-test\n");
fprintf(stderr, "t=%.3f\n", t);
fprintf(stderr, "p=%.3f\n", p);
2020-03-06 02:24:32 +01:00
// clean up and exit
gsl_rng_free(r);
free(sample);
return EXIT_SUCCESS;
}