ex-1: implement bootstrapping for the median

This commit is contained in:
Michele Guerini Rocco 2020-04-03 01:16:47 +00:00
parent 720ecd4742
commit 56fcdd5fbb
4 changed files with 84 additions and 9 deletions

47
ex-1/bootstrap.c Normal file
View File

@ -0,0 +1,47 @@
#include <stdlib.h>
#include <gsl/gsl_rstat.h>
#include <gsl/gsl_statistics_double.h>
#include "bootstrap.h"
/* Computes an approximation to the asymptotic median
* and its standard deviation by bootstrapping (ie
* repeated resampling) the original `sample`, `boots`
* times.
*
* The functions returns an `uncert` pair of mean and
* stdev of the medians computed on each sample.
*/
uncert bootstrap_median(
const gsl_rng *r,
double *sample, size_t n,
int boots) {
/* We use a running statistics to not
* store the full resampled array.
*/
gsl_rstat_workspace* w = gsl_rstat_alloc();
double *values = calloc(boots, sizeof(double));
for (size_t i = 0; i < boots; i++) {
/* The sampling is simply done by generating
* an array index uniformely in [0, n-1].
*/
for (size_t j = 0; j < n; j++) {
size_t choice = gsl_rng_uniform_int(r, n);
gsl_rstat_add(sample[choice], w);
}
values[i] = gsl_rstat_median(w);
}
/* Compute mean and stdev of the medians
* of each newly bootstrapped sample.
*/
uncert median;
median.n = gsl_stats_mean(values, 1, boots);
median.s = gsl_stats_sd(values, 1, boots);
return median;
}

24
ex-1/bootstrap.h Normal file
View File

@ -0,0 +1,24 @@
#include <gsl/gsl_rng.h>
/* A pair structure that represent
* a value with an uncertainty
*/
typedef struct {
double n; // nominal value
double s; // uncertainty
} uncert;
/* Computes an approximation to the asymptotic median
* and its standard deviation by bootstrapping (ie
* repeated resampling) the original `sample`, `boots`
* times.
*
* The functions returns an `uncert` pair of mean and
* stdev of the medians computed on each sample.
*/
uncert bootstrap_median(
const gsl_rng *r,
double *sample, size_t n,
int boots);

View File

@ -7,6 +7,7 @@
#include "landau.h" #include "landau.h"
#include "tests.h" #include "tests.h"
#include "bootstrap.h"
/* Function that compare doubles for sorting: /* Function that compare doubles for sorting:
@ -149,21 +150,24 @@ int main(int argc, char** argv) {
/* Median comparison /* Median comparison
* *
* Compute the median of the sample * Compute the median of the sample by bootstrapping
* and compare it with QDF(1/2). * it and comparing it with the QDF(1/2).
*/ */
fprintf(stderr, "\n\n# Median comparison\n"); fprintf(stderr, "\n\n# Median comparison\n");
double med_e = landau_qdf(0.5); double med_e = landau_qdf(0.5);
double med_o = gsl_stats_median_from_sorted_data( uncert med_o = bootstrap_median(r, sample, samples, 100);
sample, // sorted data
1, // array stride
samples); // number of elements
// print the results // print the results
fprintf(stderr, "\n## Results\n"); fprintf(stderr, "\n## Results\n");
fprintf(stderr, "expected median: %.7f\n", med_e); fprintf(stderr, "expected median: %.7f\n", med_e);
fprintf(stderr, "observed median: %.7f\n", med_o); fprintf(stderr, "observed median: %.4f±%.4f\n", med_o.n, med_o.s);
double t = (med_e - med_o.n)/med_o.s;
double p = 1 - erf(t/sqrt(2));
fprintf(stderr, "\n## t-test\n");
fprintf(stderr, "t=%.3f\n", t);
fprintf(stderr, "p=%.3f\n", p);
// clean up and exit // clean up and exit
gsl_histogram_free(hist); gsl_histogram_free(hist);

View File

@ -6,7 +6,7 @@ CCOMPILE = \
mkdir -p $(@D); \ mkdir -p $(@D); \
$(CC) $(CFLAGS) $^ -o $@ $(CC) $(CFLAGS) $^ -o $@
ex-1/bin/main: ex-1/main.c ex-1/landau.c ex-1/tests.c ex-1/bin/main: ex-1/main.c ex-1/landau.c ex-1/tests.c ex-1/bootstrap.c
$(CCOMPILE) $(CCOMPILE)
ex-1/bin/pdf: ex-1/pdf.c ex-1/bin/pdf: ex-1/pdf.c
$(CCOMPILE) $(CCOMPILE)