ex-1: implement a KDE-based estimation of the FWHM
This commit is contained in:
parent
6ad77013b6
commit
29dda3e151
113
ex-1/bootstrap.c
113
ex-1/bootstrap.c
@ -1,10 +1,12 @@
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <gsl/gsl_roots.h>
|
||||
#include <gsl/gsl_rstat.h>
|
||||
#include <gsl/gsl_vector.h>
|
||||
#include <gsl/gsl_statistics_double.h>
|
||||
|
||||
#include "bootstrap.h"
|
||||
#include "tests.h"
|
||||
|
||||
|
||||
/* Function that compares doubles for sorting:
|
||||
@ -101,6 +103,42 @@ double hsm(double *x, size_t n) {
|
||||
}
|
||||
|
||||
|
||||
/* Parameters needed to compute the
|
||||
* KDE function of a sample.
|
||||
*/
|
||||
struct kde_params {
|
||||
double n; // sample size
|
||||
double var; // sample variance
|
||||
double *sample; // sample
|
||||
};
|
||||
|
||||
|
||||
/* A KDE (kernel density estimation) is an
|
||||
* approximation of a sample PDF obtained by
|
||||
* convolving the sample points with a smooth
|
||||
* kernel, in this case a standard gaussian.
|
||||
*
|
||||
* `gauss_kde(x, p)` gives the value of the PDF
|
||||
* at `x`, where `p` is a `kde_params` struct.
|
||||
*/
|
||||
double gauss_kde(double x, void * params) {
|
||||
struct kde_params p = *((struct kde_params*) params);
|
||||
|
||||
/* Apply the Silverman's rule of thumb to the
|
||||
* bandwidth estimation. The badwidth is given
|
||||
* by the sample variance times a factor which
|
||||
* depends on the number of points and dimension.
|
||||
*/
|
||||
double bw = p.var * pow((double)p.n*3.0/4, -2.0/5);
|
||||
|
||||
double sum = 0;
|
||||
for (size_t i = 0; i < p.n; i++)
|
||||
sum += exp(-pow(x - p.sample[i], 2) / (2*bw));
|
||||
|
||||
return sum / sqrt(2*M_PI*bw) / p.n;
|
||||
}
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic median
|
||||
* and its standard deviation by bootstrapping (ie
|
||||
* repeated resampling) the original `sample`, `boots`
|
||||
@ -188,3 +226,78 @@ uncert bootstrap_mode(
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic fwhm
|
||||
* and its standard deviation by bootstrapping (ie
|
||||
* repeated resampling) the original `sample`, `boots`
|
||||
* times.
|
||||
*
|
||||
* `min,max` are the bounds of interval that is
|
||||
* expected to contain the mode of the sample.
|
||||
*
|
||||
* The functions returns an `uncert` pair of mean and
|
||||
* stddev of the fwhms computed on each sample.
|
||||
*/
|
||||
uncert bootstrap_fwhm(
|
||||
const gsl_rng *r,
|
||||
double min, double max,
|
||||
double *sample, size_t n,
|
||||
int boots) {
|
||||
/* We use a running statistics to compute
|
||||
* a trimmed mean/variance of the sample.
|
||||
*/
|
||||
gsl_rstat_workspace* w = gsl_rstat_alloc();
|
||||
|
||||
// fwhm values and resample
|
||||
double *values = calloc(boots, sizeof(double));
|
||||
double *boot = calloc(n, sizeof(double));
|
||||
|
||||
// set the size here because it's fixed
|
||||
struct kde_params p;
|
||||
p.n = n;
|
||||
|
||||
// struct for numeric_fwhm
|
||||
gsl_function pdf;
|
||||
pdf.function = &gauss_kde;
|
||||
|
||||
for (size_t i = 0; i < boots; i++) {
|
||||
gsl_rstat_reset(w);
|
||||
|
||||
/* The sampling is simply done by generating
|
||||
* an array index uniformely in [0, n-1].
|
||||
*/
|
||||
for (size_t j = 0; j < n; j++) {
|
||||
double x = sample[gsl_rng_uniform_int(r, n)];
|
||||
boot[j] = x;
|
||||
/* Remove too extreme values from the
|
||||
* the trimmed statistics workspace.
|
||||
*/
|
||||
if (fabs(x) < 10) gsl_rstat_add(x, w);
|
||||
}
|
||||
|
||||
/* Set the trimmed variance of the sample as
|
||||
* the (uncorrected) variance of the gaussian
|
||||
* kernel estimator.
|
||||
*/
|
||||
p.sample = boot;
|
||||
p.var = gsl_rstat_variance(w);
|
||||
pdf.params = &p;
|
||||
|
||||
values[i] = numeric_fwhm(min, max, &pdf, 0);
|
||||
}
|
||||
|
||||
/* Compute mean and stddev of the fwhms
|
||||
* of each newly bootstrapped sample.
|
||||
*/
|
||||
uncert fwhm;
|
||||
fwhm.n = gsl_stats_mean(values, 1, boots);
|
||||
fwhm.s = gsl_stats_sd(values, 1, boots);
|
||||
|
||||
// free memory
|
||||
free(values);
|
||||
free(boot);
|
||||
gsl_rstat_free(w);
|
||||
|
||||
return fwhm;
|
||||
}
|
||||
|
@ -45,3 +45,21 @@ uncert bootstrap_mode(
|
||||
const gsl_rng *r,
|
||||
double *sample, size_t n,
|
||||
int boots);
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic fwhm
|
||||
* and its standard deviation by bootstrapping (ie
|
||||
* repeated resampling) the original `sample`, `boots`
|
||||
* times.
|
||||
*
|
||||
* `min,max` are the bounds of interval that is
|
||||
* expected to contain the mode of the sample.
|
||||
*
|
||||
* The functions returns an `uncert` pair of mean and
|
||||
* stddev of the fwhms computed on each sample.
|
||||
*/
|
||||
uncert bootstrap_fwhm(
|
||||
const gsl_rng *r,
|
||||
double min, double max,
|
||||
double *sample, size_t n,
|
||||
int boots);
|
||||
|
19
ex-1/main.c
19
ex-1/main.c
@ -24,8 +24,8 @@ int main(int argc, char** argv) {
|
||||
size_t samples = 100000;
|
||||
double* sample = calloc(samples, sizeof(double));
|
||||
size_t bins = 40;
|
||||
double min = -20;
|
||||
double max = 20;
|
||||
double min = -10;
|
||||
double max = 10;
|
||||
gsl_histogram* hist = gsl_histogram_alloc(bins);
|
||||
gsl_histogram_set_ranges_uniform(hist, min, max);
|
||||
|
||||
@ -86,7 +86,7 @@ int main(int argc, char** argv) {
|
||||
pdf.function = &landau_pdf;
|
||||
pdf.params = NULL;
|
||||
|
||||
double mode_e = numeric_mode(min, max, &pdf);
|
||||
double mode_e = numeric_mode(min, max, &pdf, 1);
|
||||
uncert mode_o = bootstrap_mode(r, sample, samples, 100);
|
||||
|
||||
// print the results
|
||||
@ -94,6 +94,7 @@ int main(int argc, char** argv) {
|
||||
fprintf(stderr, "expected mode: %.7f\n", mode_e);
|
||||
fprintf(stderr, "observed mode: %.4f±%.4f\n", mode_o.n, mode_o.s);
|
||||
|
||||
// t-test
|
||||
double t = fabs(mode_e - mode_o.n)/mode_o.s;
|
||||
double p = 1 - erf(t/sqrt(2));
|
||||
fprintf(stderr, "\n## t-test\n");
|
||||
@ -112,16 +113,17 @@ int main(int argc, char** argv) {
|
||||
*/
|
||||
fprintf(stderr, "\n\n# FWHM comparison\n");
|
||||
|
||||
double fwhm_e = numeric_fwhm(min, max, &pdf);
|
||||
//uncert fwhm_o = bootstrap_fwhm(r, sample, samples, 100);
|
||||
double fwhm_e = numeric_fwhm(min, max, &pdf, 1);
|
||||
uncert fwhm_o = bootstrap_fwhm(r, min, max, sample, samples, 100);
|
||||
|
||||
// print the results
|
||||
fprintf(stderr, "\n## Results\n");
|
||||
fprintf(stderr, "expected fwhm: %.7f\n", fwhm_e);
|
||||
//fprintf(stderr, "observed fwhm: %.4f±%.4f\n", fwhm_o.n, fwhm_o.s);
|
||||
fprintf(stderr, "observed fwhm: %.4f±%.4f\n", fwhm_o.n, fwhm_o.s);
|
||||
|
||||
//t = fabs(fwhm_e - fwhm_o.n)/fwhm_o.s;
|
||||
//p = 1 - erf(t/sqrt(2));
|
||||
// t-test
|
||||
t = fabs(fwhm_e - fwhm_o.n)/fwhm_o.s;
|
||||
p = 1 - erf(t/sqrt(2));
|
||||
fprintf(stderr, "\n## t-test\n");
|
||||
fprintf(stderr, "t=%.3f\n", t);
|
||||
fprintf(stderr, "p=%.3f\n", p);
|
||||
@ -141,6 +143,7 @@ int main(int argc, char** argv) {
|
||||
fprintf(stderr, "expected median: %.7f\n", med_e);
|
||||
fprintf(stderr, "observed median: %.4f±%.4f\n", med_o.n, med_o.s);
|
||||
|
||||
// t-test
|
||||
t = fabs(med_e - med_o.n)/med_o.s;
|
||||
p = 1 - erf(t/sqrt(2));
|
||||
fprintf(stderr, "\n## t-test\n");
|
||||
|
43
ex-1/tests.c
43
ex-1/tests.c
@ -74,11 +74,14 @@ double negate_func(double x, void * fp) {
|
||||
|
||||
/* Numerically computes the mode of a Landau
|
||||
* distribution by maximising the derivative.
|
||||
* The min,max parameters are the initial search
|
||||
* The `min,max` parameters are the initial search
|
||||
* interval for the optimisation.
|
||||
*
|
||||
* If `err` is true print the estimate error.
|
||||
*/
|
||||
double numeric_mode(double min, double max,
|
||||
gsl_function *pdf) {
|
||||
gsl_function *pdf,
|
||||
int err) {
|
||||
|
||||
/* Negate the PDF to maximise it by
|
||||
* using a GSL minimisation method.
|
||||
@ -89,22 +92,22 @@ double numeric_mode(double min, double max,
|
||||
npdf.params = pdf;
|
||||
|
||||
// initialize minimization
|
||||
double guess = 0;
|
||||
double x = 0;
|
||||
int iter = 0;
|
||||
int max_iter = 100;
|
||||
double prec = 1e-7;
|
||||
int status;
|
||||
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_brent;
|
||||
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc(T);
|
||||
gsl_min_fminimizer_set(s, &npdf, guess, min, max);
|
||||
const gsl_min_fminimizer_type *T = gsl_min_fminimizer_brent;
|
||||
gsl_min_fminimizer *s = gsl_min_fminimizer_alloc(T);
|
||||
gsl_min_fminimizer_set(s, &npdf, x, min, max);
|
||||
|
||||
// minimisation
|
||||
do {
|
||||
iter++;
|
||||
status = gsl_min_fminimizer_iterate(s);
|
||||
guess = gsl_min_fminimizer_x_minimum(s);
|
||||
min = gsl_min_fminimizer_x_lower(s);
|
||||
max = gsl_min_fminimizer_x_upper(s);
|
||||
x = gsl_min_fminimizer_x_minimum(s);
|
||||
min = gsl_min_fminimizer_x_lower(s);
|
||||
max = gsl_min_fminimizer_x_upper(s);
|
||||
status = gsl_min_test_interval(min, max, prec, prec);
|
||||
|
||||
} while (status == GSL_CONTINUE && iter < max_iter);
|
||||
@ -112,11 +115,12 @@ double numeric_mode(double min, double max,
|
||||
/* The error is simply given by the width of
|
||||
* the final interval containing the solution
|
||||
*/
|
||||
fprintf(stderr, "mode error: %.3g\n", max - min);
|
||||
if (err)
|
||||
fprintf(stderr, "mode error: %.3g\n", max - min);
|
||||
|
||||
// free memory
|
||||
gsl_min_fminimizer_free(s);
|
||||
return guess;
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
@ -154,13 +158,16 @@ double fwhm_equation(double x, void* params) {
|
||||
* `fwhm_equation`. Two searches are performed in
|
||||
* [min, mode] and [mode, max] that will give the
|
||||
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
|
||||
*
|
||||
* If `err` is true print the estimate error.
|
||||
*/
|
||||
double numeric_fwhm(double min, double max,
|
||||
gsl_function *pdf) {
|
||||
gsl_function *pdf,
|
||||
int err) {
|
||||
/* Create the gls_function structure
|
||||
* for the equation to be solved.
|
||||
*/
|
||||
double mode = numeric_mode(min, max, pdf);
|
||||
double mode = numeric_mode(min, max, pdf, 0);
|
||||
struct fwhm_params p = {
|
||||
pdf->function(mode, pdf->params)/2,
|
||||
pdf
|
||||
@ -169,8 +176,8 @@ double numeric_fwhm(double min, double max,
|
||||
equation.function = &fwhm_equation;
|
||||
equation.params = &p;
|
||||
|
||||
const gsl_root_fsolver_type * T = gsl_root_fsolver_brent;
|
||||
gsl_root_fsolver * s = gsl_root_fsolver_alloc(T);
|
||||
const gsl_root_fsolver_type *T = gsl_root_fsolver_brent;
|
||||
gsl_root_fsolver *s = gsl_root_fsolver_alloc(T);
|
||||
|
||||
// initialize minimization for x₋
|
||||
double x, fmin, fmax;
|
||||
@ -189,6 +196,7 @@ double numeric_fwhm(double min, double max,
|
||||
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
||||
}
|
||||
double x_low = x;
|
||||
double err_low = fmax - fmin;
|
||||
|
||||
// initialize minimization for x₊
|
||||
gsl_root_fsolver_set(s, &equation, mode, max);
|
||||
@ -203,6 +211,11 @@ double numeric_fwhm(double min, double max,
|
||||
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
||||
}
|
||||
double x_upp = x;
|
||||
double err_upp = fmax - fmin;
|
||||
|
||||
if (err)
|
||||
fprintf(stderr, "fhwm error: %g\n",
|
||||
sqrt(err_low*err_low + err_upp*err_upp));
|
||||
|
||||
// free memory
|
||||
gsl_root_fsolver_free(s);
|
||||
|
12
ex-1/tests.h
12
ex-1/tests.h
@ -15,11 +15,14 @@ double kolmogorov_cdf(double D, int n);
|
||||
|
||||
/* Numerically computes the mode of a Landau
|
||||
* distribution by maximising the derivative.
|
||||
* The min,max parameters are the initial search
|
||||
* The `min,max` parameters are the initial search
|
||||
* interval for the optimisation.
|
||||
*
|
||||
* If `err` is true print the estimate error.
|
||||
*/
|
||||
double numeric_mode(double min, double max,
|
||||
gsl_function *pdf);
|
||||
gsl_function *pdf,
|
||||
int err);
|
||||
|
||||
|
||||
/* Numerically computes the FWHM of a PDF
|
||||
@ -29,6 +32,9 @@ double numeric_mode(double min, double max,
|
||||
* `fwhm_equation`. Two searches are performed in
|
||||
* [min, mode] and [mode, max] that will give the
|
||||
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
|
||||
*
|
||||
* If `err` is true print the estimate error.
|
||||
*/
|
||||
double numeric_fwhm(double min, double max,
|
||||
gsl_function *pdf);
|
||||
gsl_function *pdf,
|
||||
int err);
|
||||
|
Loading…
Reference in New Issue
Block a user