ex-1: add Hill estimator bootstrap

This commit is contained in:
Michele Guerini Rocco 2020-06-08 00:43:06 +02:00
parent ada2cdf03c
commit ec0af36d77
2 changed files with 91 additions and 7 deletions

View File

@ -41,7 +41,8 @@ size_t mean_index(gsl_vector *v, double x) {
/* Computes the half-sample mode (also called the Robertson-Cryer
* mode estimator) of the sample `x` containing `n` observations.
* mode estimator) of the sample `x` containing `n` observations.
* Important: the sample is assumed to be sorted.
*
* It is based on repeatedly finding the half modal interval (smallest
* interval containing half of the observations) of the sample.
@ -103,6 +104,26 @@ double hsm(double *x, size_t n) {
}
/* Computes the Hill estimator γ_k of the right
* tail-index of a distribution from a sample `x`
* of `n` observations by using the last k outliers.
*
* k is computed from t as k = n^`t`, so it is
* expected that t (0,1).
*
* Important: the sample is assumed to be sorted.
*/
double hill_estimator(double *x, double t, size_t n) {
double k = pow(n, t);
double sum = 0;
for (size_t i = n - k; i < n; i++) {
sum += log(x[i]) - log(x[n - (size_t)round(k)]);
}
return k / sum;
}
/* Parameters needed to compute the
* KDE function of a sample.
*/
@ -150,7 +171,7 @@ double gauss_kde(double x, void * params) {
uncert bootstrap_median(
const gsl_rng *r,
double *sample, size_t n,
int boots) {
size_t boots) {
/* We use a running statistics to not
* store the full resampled array.
@ -196,7 +217,7 @@ uncert bootstrap_median(
uncert bootstrap_mode(
const gsl_rng *r,
double *sample, size_t n,
int boots) {
size_t boots) {
double *values = calloc(boots, sizeof(double));
double *boot = calloc(n, sizeof(double));
@ -243,7 +264,7 @@ uncert bootstrap_fwhm(
const gsl_rng *r,
double min, double max,
double *sample, size_t n,
int boots) {
size_t boots) {
/* We use a running statistics to compute
* a trimmed mean/variance of the sample.
*/
@ -301,3 +322,49 @@ uncert bootstrap_fwhm(
return fwhm;
}
/* Computes an approximation to the asymptotic right
* tail-index and its standard deviation by bootstrapping
* (ie repeated resampling) the original `sample`, `boots`
* times. The tail-index is estimated by the Hill estimator,
* hence the name of the function. See `hill_estimator()`
* for the parameter `t` meaning.
*
* The functions returns an `uncert` pair of mean and
* stddev of the modes computed on each sample.
*/
uncert bootstrap_hill(
const gsl_rng *r,
double t,
double *sample, size_t n,
size_t boots) {
double *values = calloc(boots, sizeof(double));
double *boot = calloc(n, sizeof(double));
for (size_t i = 0; i < boots; i++) {
/* The sampling is simply done by generating
* an array index uniformely in [0, n-1].
*/
for (size_t j = 0; j < n; j++) {
size_t choice = gsl_rng_uniform_int(r, n);
boot[j] = sample[choice];
}
qsort(boot, n, sizeof(double), cmp_double);
values[i] = hill_estimator(boot, t, n);
}
/* Compute mean and stddev of the modes
* of each newly bootstrapped sample.
*/
uncert index;
index.n = gsl_stats_mean(values, 1, boots);
index.s = gsl_stats_sd(values, 1, boots);
// free memory
free(values);
free(boot);
return index;
}

View File

@ -30,7 +30,7 @@ int cmp_double (const void *xp, const void *yp);
uncert bootstrap_median(
const gsl_rng *r,
double *sample, size_t n,
int boots);
size_t boots);
/* Computes an approximation to the asymptotic mode
@ -44,7 +44,7 @@ uncert bootstrap_median(
uncert bootstrap_mode(
const gsl_rng *r,
double *sample, size_t n,
int boots);
size_t boots);
/* Computes an approximation to the asymptotic fwhm
@ -62,4 +62,21 @@ uncert bootstrap_fwhm(
const gsl_rng *r,
double min, double max,
double *sample, size_t n,
int boots);
size_t boots);
/* Computes an approximation to the asymptotic right
* tail-index and its standard deviation by bootstrapping
* (ie repeated resampling) the original `sample`, `boots`
* times. The tail-index is estimated by the Hill estimator,
* hence the name of the function. See `hill_estimator()`
* for the parameter `t` meaning.
*
* The functions returns an `uncert` pair of mean and
* stddev of the modes computed on each sample.
*/
uncert bootstrap_hill(
const gsl_rng *r,
double t,
double *sample, size_t n,
size_t boots);