ex-1: implement bootstrapping for the median
This commit is contained in:
parent
720ecd4742
commit
56fcdd5fbb
47
ex-1/bootstrap.c
Normal file
47
ex-1/bootstrap.c
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <gsl/gsl_rstat.h>
|
||||||
|
#include <gsl/gsl_statistics_double.h>
|
||||||
|
|
||||||
|
#include "bootstrap.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Computes an approximation to the asymptotic median
|
||||||
|
* and its standard deviation by bootstrapping (ie
|
||||||
|
* repeated resampling) the original `sample`, `boots`
|
||||||
|
* times.
|
||||||
|
*
|
||||||
|
* The functions returns an `uncert` pair of mean and
|
||||||
|
* stdev of the medians computed on each sample.
|
||||||
|
*/
|
||||||
|
uncert bootstrap_median(
|
||||||
|
const gsl_rng *r,
|
||||||
|
double *sample, size_t n,
|
||||||
|
int boots) {
|
||||||
|
|
||||||
|
/* We use a running statistics to not
|
||||||
|
* store the full resampled array.
|
||||||
|
*/
|
||||||
|
gsl_rstat_workspace* w = gsl_rstat_alloc();
|
||||||
|
|
||||||
|
double *values = calloc(boots, sizeof(double));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < boots; i++) {
|
||||||
|
/* The sampling is simply done by generating
|
||||||
|
* an array index uniformely in [0, n-1].
|
||||||
|
*/
|
||||||
|
for (size_t j = 0; j < n; j++) {
|
||||||
|
size_t choice = gsl_rng_uniform_int(r, n);
|
||||||
|
gsl_rstat_add(sample[choice], w);
|
||||||
|
}
|
||||||
|
values[i] = gsl_rstat_median(w);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute mean and stdev of the medians
|
||||||
|
* of each newly bootstrapped sample.
|
||||||
|
*/
|
||||||
|
uncert median;
|
||||||
|
median.n = gsl_stats_mean(values, 1, boots);
|
||||||
|
median.s = gsl_stats_sd(values, 1, boots);
|
||||||
|
|
||||||
|
return median;
|
||||||
|
}
|
24
ex-1/bootstrap.h
Normal file
24
ex-1/bootstrap.h
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#include <gsl/gsl_rng.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* A pair structure that represent
|
||||||
|
* a value with an uncertainty
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
double n; // nominal value
|
||||||
|
double s; // uncertainty
|
||||||
|
} uncert;
|
||||||
|
|
||||||
|
|
||||||
|
/* Computes an approximation to the asymptotic median
|
||||||
|
* and its standard deviation by bootstrapping (ie
|
||||||
|
* repeated resampling) the original `sample`, `boots`
|
||||||
|
* times.
|
||||||
|
*
|
||||||
|
* The functions returns an `uncert` pair of mean and
|
||||||
|
* stdev of the medians computed on each sample.
|
||||||
|
*/
|
||||||
|
uncert bootstrap_median(
|
||||||
|
const gsl_rng *r,
|
||||||
|
double *sample, size_t n,
|
||||||
|
int boots);
|
20
ex-1/main.c
20
ex-1/main.c
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "landau.h"
|
#include "landau.h"
|
||||||
#include "tests.h"
|
#include "tests.h"
|
||||||
|
#include "bootstrap.h"
|
||||||
|
|
||||||
|
|
||||||
/* Function that compare doubles for sorting:
|
/* Function that compare doubles for sorting:
|
||||||
@ -149,21 +150,24 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
/* Median comparison
|
/* Median comparison
|
||||||
*
|
*
|
||||||
* Compute the median of the sample
|
* Compute the median of the sample by bootstrapping
|
||||||
* and compare it with QDF(1/2).
|
* it and comparing it with the QDF(1/2).
|
||||||
*/
|
*/
|
||||||
fprintf(stderr, "\n\n# Median comparison\n");
|
fprintf(stderr, "\n\n# Median comparison\n");
|
||||||
double med_e = landau_qdf(0.5);
|
double med_e = landau_qdf(0.5);
|
||||||
double med_o = gsl_stats_median_from_sorted_data(
|
uncert med_o = bootstrap_median(r, sample, samples, 100);
|
||||||
sample, // sorted data
|
|
||||||
1, // array stride
|
|
||||||
samples); // number of elements
|
|
||||||
|
|
||||||
// print the results
|
// print the results
|
||||||
fprintf(stderr, "\n## Results\n");
|
fprintf(stderr, "\n## Results\n");
|
||||||
fprintf(stderr, "expected median: %.7f\n", med_e);
|
fprintf(stderr, "expected median: %.7f\n", med_e);
|
||||||
fprintf(stderr, "observed median: %.7f\n", med_o);
|
fprintf(stderr, "observed median: %.4f±%.4f\n", med_o.n, med_o.s);
|
||||||
|
|
||||||
|
double t = (med_e - med_o.n)/med_o.s;
|
||||||
|
double p = 1 - erf(t/sqrt(2));
|
||||||
|
fprintf(stderr, "\n## t-test\n");
|
||||||
|
fprintf(stderr, "t=%.3f\n", t);
|
||||||
|
fprintf(stderr, "p=%.3f\n", p);
|
||||||
|
|
||||||
|
|
||||||
// clean up and exit
|
// clean up and exit
|
||||||
gsl_histogram_free(hist);
|
gsl_histogram_free(hist);
|
||||||
|
2
makefile
2
makefile
@ -6,7 +6,7 @@ CCOMPILE = \
|
|||||||
mkdir -p $(@D); \
|
mkdir -p $(@D); \
|
||||||
$(CC) $(CFLAGS) $^ -o $@
|
$(CC) $(CFLAGS) $^ -o $@
|
||||||
|
|
||||||
ex-1/bin/main: ex-1/main.c ex-1/landau.c ex-1/tests.c
|
ex-1/bin/main: ex-1/main.c ex-1/landau.c ex-1/tests.c ex-1/bootstrap.c
|
||||||
$(CCOMPILE)
|
$(CCOMPILE)
|
||||||
ex-1/bin/pdf: ex-1/pdf.c
|
ex-1/bin/pdf: ex-1/pdf.c
|
||||||
$(CCOMPILE)
|
$(CCOMPILE)
|
||||||
|
Loading…
Reference in New Issue
Block a user