ex-1: implement bootstrapping for the median
This commit is contained in:
parent
720ecd4742
commit
56fcdd5fbb
47
ex-1/bootstrap.c
Normal file
47
ex-1/bootstrap.c
Normal file
@ -0,0 +1,47 @@
|
||||
#include <stdlib.h>
|
||||
#include <gsl/gsl_rstat.h>
|
||||
#include <gsl/gsl_statistics_double.h>
|
||||
|
||||
#include "bootstrap.h"
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic median
|
||||
* and its standard deviation by bootstrapping (ie
|
||||
* repeated resampling) the original `sample`, `boots`
|
||||
* times.
|
||||
*
|
||||
* The functions returns an `uncert` pair of mean and
|
||||
* stdev of the medians computed on each sample.
|
||||
*/
|
||||
uncert bootstrap_median(
|
||||
const gsl_rng *r,
|
||||
double *sample, size_t n,
|
||||
int boots) {
|
||||
|
||||
/* We use a running statistics to not
|
||||
* store the full resampled array.
|
||||
*/
|
||||
gsl_rstat_workspace* w = gsl_rstat_alloc();
|
||||
|
||||
double *values = calloc(boots, sizeof(double));
|
||||
|
||||
for (size_t i = 0; i < boots; i++) {
|
||||
/* The sampling is simply done by generating
|
||||
* an array index uniformely in [0, n-1].
|
||||
*/
|
||||
for (size_t j = 0; j < n; j++) {
|
||||
size_t choice = gsl_rng_uniform_int(r, n);
|
||||
gsl_rstat_add(sample[choice], w);
|
||||
}
|
||||
values[i] = gsl_rstat_median(w);
|
||||
}
|
||||
|
||||
/* Compute mean and stdev of the medians
|
||||
* of each newly bootstrapped sample.
|
||||
*/
|
||||
uncert median;
|
||||
median.n = gsl_stats_mean(values, 1, boots);
|
||||
median.s = gsl_stats_sd(values, 1, boots);
|
||||
|
||||
return median;
|
||||
}
|
24
ex-1/bootstrap.h
Normal file
24
ex-1/bootstrap.h
Normal file
@ -0,0 +1,24 @@
|
||||
#include <gsl/gsl_rng.h>
|
||||
|
||||
|
||||
/* A pair structure that represent
|
||||
* a value with an uncertainty
|
||||
*/
|
||||
typedef struct {
|
||||
double n; // nominal value
|
||||
double s; // uncertainty
|
||||
} uncert;
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic median
|
||||
* and its standard deviation by bootstrapping (ie
|
||||
* repeated resampling) the original `sample`, `boots`
|
||||
* times.
|
||||
*
|
||||
* The functions returns an `uncert` pair of mean and
|
||||
* stdev of the medians computed on each sample.
|
||||
*/
|
||||
uncert bootstrap_median(
|
||||
const gsl_rng *r,
|
||||
double *sample, size_t n,
|
||||
int boots);
|
18
ex-1/main.c
18
ex-1/main.c
@ -7,6 +7,7 @@
|
||||
|
||||
#include "landau.h"
|
||||
#include "tests.h"
|
||||
#include "bootstrap.h"
|
||||
|
||||
|
||||
/* Function that compare doubles for sorting:
|
||||
@ -149,20 +150,23 @@ int main(int argc, char** argv) {
|
||||
|
||||
/* Median comparison
|
||||
*
|
||||
* Compute the median of the sample
|
||||
* and compare it with QDF(1/2).
|
||||
* Compute the median of the sample by bootstrapping
|
||||
* it and comparing it with the QDF(1/2).
|
||||
*/
|
||||
fprintf(stderr, "\n\n# Median comparison\n");
|
||||
double med_e = landau_qdf(0.5);
|
||||
double med_o = gsl_stats_median_from_sorted_data(
|
||||
sample, // sorted data
|
||||
1, // array stride
|
||||
samples); // number of elements
|
||||
uncert med_o = bootstrap_median(r, sample, samples, 100);
|
||||
|
||||
// print the results
|
||||
fprintf(stderr, "\n## Results\n");
|
||||
fprintf(stderr, "expected median: %.7f\n", med_e);
|
||||
fprintf(stderr, "observed median: %.7f\n", med_o);
|
||||
fprintf(stderr, "observed median: %.4f±%.4f\n", med_o.n, med_o.s);
|
||||
|
||||
double t = (med_e - med_o.n)/med_o.s;
|
||||
double p = 1 - erf(t/sqrt(2));
|
||||
fprintf(stderr, "\n## t-test\n");
|
||||
fprintf(stderr, "t=%.3f\n", t);
|
||||
fprintf(stderr, "p=%.3f\n", p);
|
||||
|
||||
|
||||
// clean up and exit
|
||||
|
Loading…
Reference in New Issue
Block a user