ex-1: implement a KDE-based estimation of the FWHM
This commit is contained in:
parent
6ad77013b6
commit
29dda3e151
113
ex-1/bootstrap.c
113
ex-1/bootstrap.c
@ -1,10 +1,12 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <gsl/gsl_roots.h>
|
||||||
#include <gsl/gsl_rstat.h>
|
#include <gsl/gsl_rstat.h>
|
||||||
#include <gsl/gsl_vector.h>
|
#include <gsl/gsl_vector.h>
|
||||||
#include <gsl/gsl_statistics_double.h>
|
#include <gsl/gsl_statistics_double.h>
|
||||||
|
|
||||||
#include "bootstrap.h"
|
#include "bootstrap.h"
|
||||||
|
#include "tests.h"
|
||||||
|
|
||||||
|
|
||||||
/* Function that compares doubles for sorting:
|
/* Function that compares doubles for sorting:
|
||||||
@ -101,6 +103,42 @@ double hsm(double *x, size_t n) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Parameters needed to compute the
|
||||||
|
* KDE function of a sample.
|
||||||
|
*/
|
||||||
|
struct kde_params {
|
||||||
|
double n; // sample size
|
||||||
|
double var; // sample variance
|
||||||
|
double *sample; // sample
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* A KDE (kernel density estimation) is an
|
||||||
|
* approximation of a sample PDF obtained by
|
||||||
|
* convolving the sample points with a smooth
|
||||||
|
* kernel, in this case a standard gaussian.
|
||||||
|
*
|
||||||
|
* `gauss_kde(x, p)` gives the value of the PDF
|
||||||
|
* at `x`, where `p` is a `kde_params` struct.
|
||||||
|
*/
|
||||||
|
double gauss_kde(double x, void * params) {
|
||||||
|
struct kde_params p = *((struct kde_params*) params);
|
||||||
|
|
||||||
|
/* Apply the Silverman's rule of thumb to the
|
||||||
|
* bandwidth estimation. The badwidth is given
|
||||||
|
* by the sample variance times a factor which
|
||||||
|
* depends on the number of points and dimension.
|
||||||
|
*/
|
||||||
|
double bw = p.var * pow((double)p.n*3.0/4, -2.0/5);
|
||||||
|
|
||||||
|
double sum = 0;
|
||||||
|
for (size_t i = 0; i < p.n; i++)
|
||||||
|
sum += exp(-pow(x - p.sample[i], 2) / (2*bw));
|
||||||
|
|
||||||
|
return sum / sqrt(2*M_PI*bw) / p.n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Computes an approximation to the asymptotic median
|
/* Computes an approximation to the asymptotic median
|
||||||
* and its standard deviation by bootstrapping (ie
|
* and its standard deviation by bootstrapping (ie
|
||||||
* repeated resampling) the original `sample`, `boots`
|
* repeated resampling) the original `sample`, `boots`
|
||||||
@ -188,3 +226,78 @@ uncert bootstrap_mode(
|
|||||||
|
|
||||||
return mode;
|
return mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Computes an approximation to the asymptotic fwhm
|
||||||
|
* and its standard deviation by bootstrapping (ie
|
||||||
|
* repeated resampling) the original `sample`, `boots`
|
||||||
|
* times.
|
||||||
|
*
|
||||||
|
* `min,max` are the bounds of interval that is
|
||||||
|
* expected to contain the mode of the sample.
|
||||||
|
*
|
||||||
|
* The functions returns an `uncert` pair of mean and
|
||||||
|
* stddev of the fwhms computed on each sample.
|
||||||
|
*/
|
||||||
|
uncert bootstrap_fwhm(
|
||||||
|
const gsl_rng *r,
|
||||||
|
double min, double max,
|
||||||
|
double *sample, size_t n,
|
||||||
|
int boots) {
|
||||||
|
/* We use a running statistics to compute
|
||||||
|
* a trimmed mean/variance of the sample.
|
||||||
|
*/
|
||||||
|
gsl_rstat_workspace* w = gsl_rstat_alloc();
|
||||||
|
|
||||||
|
// fwhm values and resample
|
||||||
|
double *values = calloc(boots, sizeof(double));
|
||||||
|
double *boot = calloc(n, sizeof(double));
|
||||||
|
|
||||||
|
// set the size here because it's fixed
|
||||||
|
struct kde_params p;
|
||||||
|
p.n = n;
|
||||||
|
|
||||||
|
// struct for numeric_fwhm
|
||||||
|
gsl_function pdf;
|
||||||
|
pdf.function = &gauss_kde;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < boots; i++) {
|
||||||
|
gsl_rstat_reset(w);
|
||||||
|
|
||||||
|
/* The sampling is simply done by generating
|
||||||
|
* an array index uniformely in [0, n-1].
|
||||||
|
*/
|
||||||
|
for (size_t j = 0; j < n; j++) {
|
||||||
|
double x = sample[gsl_rng_uniform_int(r, n)];
|
||||||
|
boot[j] = x;
|
||||||
|
/* Remove too extreme values from the
|
||||||
|
* the trimmed statistics workspace.
|
||||||
|
*/
|
||||||
|
if (fabs(x) < 10) gsl_rstat_add(x, w);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set the trimmed variance of the sample as
|
||||||
|
* the (uncorrected) variance of the gaussian
|
||||||
|
* kernel estimator.
|
||||||
|
*/
|
||||||
|
p.sample = boot;
|
||||||
|
p.var = gsl_rstat_variance(w);
|
||||||
|
pdf.params = &p;
|
||||||
|
|
||||||
|
values[i] = numeric_fwhm(min, max, &pdf, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute mean and stddev of the fwhms
|
||||||
|
* of each newly bootstrapped sample.
|
||||||
|
*/
|
||||||
|
uncert fwhm;
|
||||||
|
fwhm.n = gsl_stats_mean(values, 1, boots);
|
||||||
|
fwhm.s = gsl_stats_sd(values, 1, boots);
|
||||||
|
|
||||||
|
// free memory
|
||||||
|
free(values);
|
||||||
|
free(boot);
|
||||||
|
gsl_rstat_free(w);
|
||||||
|
|
||||||
|
return fwhm;
|
||||||
|
}
|
||||||
|
@ -45,3 +45,21 @@ uncert bootstrap_mode(
|
|||||||
const gsl_rng *r,
|
const gsl_rng *r,
|
||||||
double *sample, size_t n,
|
double *sample, size_t n,
|
||||||
int boots);
|
int boots);
|
||||||
|
|
||||||
|
|
||||||
|
/* Computes an approximation to the asymptotic fwhm
|
||||||
|
* and its standard deviation by bootstrapping (ie
|
||||||
|
* repeated resampling) the original `sample`, `boots`
|
||||||
|
* times.
|
||||||
|
*
|
||||||
|
* `min,max` are the bounds of interval that is
|
||||||
|
* expected to contain the mode of the sample.
|
||||||
|
*
|
||||||
|
* The functions returns an `uncert` pair of mean and
|
||||||
|
* stddev of the fwhms computed on each sample.
|
||||||
|
*/
|
||||||
|
uncert bootstrap_fwhm(
|
||||||
|
const gsl_rng *r,
|
||||||
|
double min, double max,
|
||||||
|
double *sample, size_t n,
|
||||||
|
int boots);
|
||||||
|
19
ex-1/main.c
19
ex-1/main.c
@ -24,8 +24,8 @@ int main(int argc, char** argv) {
|
|||||||
size_t samples = 100000;
|
size_t samples = 100000;
|
||||||
double* sample = calloc(samples, sizeof(double));
|
double* sample = calloc(samples, sizeof(double));
|
||||||
size_t bins = 40;
|
size_t bins = 40;
|
||||||
double min = -20;
|
double min = -10;
|
||||||
double max = 20;
|
double max = 10;
|
||||||
gsl_histogram* hist = gsl_histogram_alloc(bins);
|
gsl_histogram* hist = gsl_histogram_alloc(bins);
|
||||||
gsl_histogram_set_ranges_uniform(hist, min, max);
|
gsl_histogram_set_ranges_uniform(hist, min, max);
|
||||||
|
|
||||||
@ -86,7 +86,7 @@ int main(int argc, char** argv) {
|
|||||||
pdf.function = &landau_pdf;
|
pdf.function = &landau_pdf;
|
||||||
pdf.params = NULL;
|
pdf.params = NULL;
|
||||||
|
|
||||||
double mode_e = numeric_mode(min, max, &pdf);
|
double mode_e = numeric_mode(min, max, &pdf, 1);
|
||||||
uncert mode_o = bootstrap_mode(r, sample, samples, 100);
|
uncert mode_o = bootstrap_mode(r, sample, samples, 100);
|
||||||
|
|
||||||
// print the results
|
// print the results
|
||||||
@ -94,6 +94,7 @@ int main(int argc, char** argv) {
|
|||||||
fprintf(stderr, "expected mode: %.7f\n", mode_e);
|
fprintf(stderr, "expected mode: %.7f\n", mode_e);
|
||||||
fprintf(stderr, "observed mode: %.4f±%.4f\n", mode_o.n, mode_o.s);
|
fprintf(stderr, "observed mode: %.4f±%.4f\n", mode_o.n, mode_o.s);
|
||||||
|
|
||||||
|
// t-test
|
||||||
double t = fabs(mode_e - mode_o.n)/mode_o.s;
|
double t = fabs(mode_e - mode_o.n)/mode_o.s;
|
||||||
double p = 1 - erf(t/sqrt(2));
|
double p = 1 - erf(t/sqrt(2));
|
||||||
fprintf(stderr, "\n## t-test\n");
|
fprintf(stderr, "\n## t-test\n");
|
||||||
@ -112,16 +113,17 @@ int main(int argc, char** argv) {
|
|||||||
*/
|
*/
|
||||||
fprintf(stderr, "\n\n# FWHM comparison\n");
|
fprintf(stderr, "\n\n# FWHM comparison\n");
|
||||||
|
|
||||||
double fwhm_e = numeric_fwhm(min, max, &pdf);
|
double fwhm_e = numeric_fwhm(min, max, &pdf, 1);
|
||||||
//uncert fwhm_o = bootstrap_fwhm(r, sample, samples, 100);
|
uncert fwhm_o = bootstrap_fwhm(r, min, max, sample, samples, 100);
|
||||||
|
|
||||||
// print the results
|
// print the results
|
||||||
fprintf(stderr, "\n## Results\n");
|
fprintf(stderr, "\n## Results\n");
|
||||||
fprintf(stderr, "expected fwhm: %.7f\n", fwhm_e);
|
fprintf(stderr, "expected fwhm: %.7f\n", fwhm_e);
|
||||||
//fprintf(stderr, "observed fwhm: %.4f±%.4f\n", fwhm_o.n, fwhm_o.s);
|
fprintf(stderr, "observed fwhm: %.4f±%.4f\n", fwhm_o.n, fwhm_o.s);
|
||||||
|
|
||||||
//t = fabs(fwhm_e - fwhm_o.n)/fwhm_o.s;
|
// t-test
|
||||||
//p = 1 - erf(t/sqrt(2));
|
t = fabs(fwhm_e - fwhm_o.n)/fwhm_o.s;
|
||||||
|
p = 1 - erf(t/sqrt(2));
|
||||||
fprintf(stderr, "\n## t-test\n");
|
fprintf(stderr, "\n## t-test\n");
|
||||||
fprintf(stderr, "t=%.3f\n", t);
|
fprintf(stderr, "t=%.3f\n", t);
|
||||||
fprintf(stderr, "p=%.3f\n", p);
|
fprintf(stderr, "p=%.3f\n", p);
|
||||||
@ -141,6 +143,7 @@ int main(int argc, char** argv) {
|
|||||||
fprintf(stderr, "expected median: %.7f\n", med_e);
|
fprintf(stderr, "expected median: %.7f\n", med_e);
|
||||||
fprintf(stderr, "observed median: %.4f±%.4f\n", med_o.n, med_o.s);
|
fprintf(stderr, "observed median: %.4f±%.4f\n", med_o.n, med_o.s);
|
||||||
|
|
||||||
|
// t-test
|
||||||
t = fabs(med_e - med_o.n)/med_o.s;
|
t = fabs(med_e - med_o.n)/med_o.s;
|
||||||
p = 1 - erf(t/sqrt(2));
|
p = 1 - erf(t/sqrt(2));
|
||||||
fprintf(stderr, "\n## t-test\n");
|
fprintf(stderr, "\n## t-test\n");
|
||||||
|
43
ex-1/tests.c
43
ex-1/tests.c
@ -74,11 +74,14 @@ double negate_func(double x, void * fp) {
|
|||||||
|
|
||||||
/* Numerically computes the mode of a Landau
|
/* Numerically computes the mode of a Landau
|
||||||
* distribution by maximising the derivative.
|
* distribution by maximising the derivative.
|
||||||
* The min,max parameters are the initial search
|
* The `min,max` parameters are the initial search
|
||||||
* interval for the optimisation.
|
* interval for the optimisation.
|
||||||
|
*
|
||||||
|
* If `err` is true print the estimate error.
|
||||||
*/
|
*/
|
||||||
double numeric_mode(double min, double max,
|
double numeric_mode(double min, double max,
|
||||||
gsl_function *pdf) {
|
gsl_function *pdf,
|
||||||
|
int err) {
|
||||||
|
|
||||||
/* Negate the PDF to maximise it by
|
/* Negate the PDF to maximise it by
|
||||||
* using a GSL minimisation method.
|
* using a GSL minimisation method.
|
||||||
@ -89,22 +92,22 @@ double numeric_mode(double min, double max,
|
|||||||
npdf.params = pdf;
|
npdf.params = pdf;
|
||||||
|
|
||||||
// initialize minimization
|
// initialize minimization
|
||||||
double guess = 0;
|
double x = 0;
|
||||||
int iter = 0;
|
int iter = 0;
|
||||||
int max_iter = 100;
|
int max_iter = 100;
|
||||||
double prec = 1e-7;
|
double prec = 1e-7;
|
||||||
int status;
|
int status;
|
||||||
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_brent;
|
const gsl_min_fminimizer_type *T = gsl_min_fminimizer_brent;
|
||||||
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc(T);
|
gsl_min_fminimizer *s = gsl_min_fminimizer_alloc(T);
|
||||||
gsl_min_fminimizer_set(s, &npdf, guess, min, max);
|
gsl_min_fminimizer_set(s, &npdf, x, min, max);
|
||||||
|
|
||||||
// minimisation
|
// minimisation
|
||||||
do {
|
do {
|
||||||
iter++;
|
iter++;
|
||||||
status = gsl_min_fminimizer_iterate(s);
|
status = gsl_min_fminimizer_iterate(s);
|
||||||
guess = gsl_min_fminimizer_x_minimum(s);
|
x = gsl_min_fminimizer_x_minimum(s);
|
||||||
min = gsl_min_fminimizer_x_lower(s);
|
min = gsl_min_fminimizer_x_lower(s);
|
||||||
max = gsl_min_fminimizer_x_upper(s);
|
max = gsl_min_fminimizer_x_upper(s);
|
||||||
status = gsl_min_test_interval(min, max, prec, prec);
|
status = gsl_min_test_interval(min, max, prec, prec);
|
||||||
|
|
||||||
} while (status == GSL_CONTINUE && iter < max_iter);
|
} while (status == GSL_CONTINUE && iter < max_iter);
|
||||||
@ -112,11 +115,12 @@ double numeric_mode(double min, double max,
|
|||||||
/* The error is simply given by the width of
|
/* The error is simply given by the width of
|
||||||
* the final interval containing the solution
|
* the final interval containing the solution
|
||||||
*/
|
*/
|
||||||
fprintf(stderr, "mode error: %.3g\n", max - min);
|
if (err)
|
||||||
|
fprintf(stderr, "mode error: %.3g\n", max - min);
|
||||||
|
|
||||||
// free memory
|
// free memory
|
||||||
gsl_min_fminimizer_free(s);
|
gsl_min_fminimizer_free(s);
|
||||||
return guess;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -154,13 +158,16 @@ double fwhm_equation(double x, void* params) {
|
|||||||
* `fwhm_equation`. Two searches are performed in
|
* `fwhm_equation`. Two searches are performed in
|
||||||
* [min, mode] and [mode, max] that will give the
|
* [min, mode] and [mode, max] that will give the
|
||||||
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
|
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
|
||||||
|
*
|
||||||
|
* If `err` is true print the estimate error.
|
||||||
*/
|
*/
|
||||||
double numeric_fwhm(double min, double max,
|
double numeric_fwhm(double min, double max,
|
||||||
gsl_function *pdf) {
|
gsl_function *pdf,
|
||||||
|
int err) {
|
||||||
/* Create the gls_function structure
|
/* Create the gls_function structure
|
||||||
* for the equation to be solved.
|
* for the equation to be solved.
|
||||||
*/
|
*/
|
||||||
double mode = numeric_mode(min, max, pdf);
|
double mode = numeric_mode(min, max, pdf, 0);
|
||||||
struct fwhm_params p = {
|
struct fwhm_params p = {
|
||||||
pdf->function(mode, pdf->params)/2,
|
pdf->function(mode, pdf->params)/2,
|
||||||
pdf
|
pdf
|
||||||
@ -169,8 +176,8 @@ double numeric_fwhm(double min, double max,
|
|||||||
equation.function = &fwhm_equation;
|
equation.function = &fwhm_equation;
|
||||||
equation.params = &p;
|
equation.params = &p;
|
||||||
|
|
||||||
const gsl_root_fsolver_type * T = gsl_root_fsolver_brent;
|
const gsl_root_fsolver_type *T = gsl_root_fsolver_brent;
|
||||||
gsl_root_fsolver * s = gsl_root_fsolver_alloc(T);
|
gsl_root_fsolver *s = gsl_root_fsolver_alloc(T);
|
||||||
|
|
||||||
// initialize minimization for x₋
|
// initialize minimization for x₋
|
||||||
double x, fmin, fmax;
|
double x, fmin, fmax;
|
||||||
@ -189,6 +196,7 @@ double numeric_fwhm(double min, double max,
|
|||||||
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
||||||
}
|
}
|
||||||
double x_low = x;
|
double x_low = x;
|
||||||
|
double err_low = fmax - fmin;
|
||||||
|
|
||||||
// initialize minimization for x₊
|
// initialize minimization for x₊
|
||||||
gsl_root_fsolver_set(s, &equation, mode, max);
|
gsl_root_fsolver_set(s, &equation, mode, max);
|
||||||
@ -203,6 +211,11 @@ double numeric_fwhm(double min, double max,
|
|||||||
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
status = gsl_min_test_interval(fmin, fmax, 0, prec);
|
||||||
}
|
}
|
||||||
double x_upp = x;
|
double x_upp = x;
|
||||||
|
double err_upp = fmax - fmin;
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
fprintf(stderr, "fhwm error: %g\n",
|
||||||
|
sqrt(err_low*err_low + err_upp*err_upp));
|
||||||
|
|
||||||
// free memory
|
// free memory
|
||||||
gsl_root_fsolver_free(s);
|
gsl_root_fsolver_free(s);
|
||||||
|
12
ex-1/tests.h
12
ex-1/tests.h
@ -15,11 +15,14 @@ double kolmogorov_cdf(double D, int n);
|
|||||||
|
|
||||||
/* Numerically computes the mode of a Landau
|
/* Numerically computes the mode of a Landau
|
||||||
* distribution by maximising the derivative.
|
* distribution by maximising the derivative.
|
||||||
* The min,max parameters are the initial search
|
* The `min,max` parameters are the initial search
|
||||||
* interval for the optimisation.
|
* interval for the optimisation.
|
||||||
|
*
|
||||||
|
* If `err` is true print the estimate error.
|
||||||
*/
|
*/
|
||||||
double numeric_mode(double min, double max,
|
double numeric_mode(double min, double max,
|
||||||
gsl_function *pdf);
|
gsl_function *pdf,
|
||||||
|
int err);
|
||||||
|
|
||||||
|
|
||||||
/* Numerically computes the FWHM of a PDF
|
/* Numerically computes the FWHM of a PDF
|
||||||
@ -29,6 +32,9 @@ double numeric_mode(double min, double max,
|
|||||||
* `fwhm_equation`. Two searches are performed in
|
* `fwhm_equation`. Two searches are performed in
|
||||||
* [min, mode] and [mode, max] that will give the
|
* [min, mode] and [mode, max] that will give the
|
||||||
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
|
* two solutions x₋, x₊. The FWHM is then x₊-x₋.
|
||||||
|
*
|
||||||
|
* If `err` is true print the estimate error.
|
||||||
*/
|
*/
|
||||||
double numeric_fwhm(double min, double max,
|
double numeric_fwhm(double min, double max,
|
||||||
gsl_function *pdf);
|
gsl_function *pdf,
|
||||||
|
int err);
|
||||||
|
Loading…
Reference in New Issue
Block a user