ex-1: add Hill estimator bootstrap
This commit is contained in:
parent
ada2cdf03c
commit
ec0af36d77
@ -41,7 +41,8 @@ size_t mean_index(gsl_vector *v, double x) {
|
||||
|
||||
|
||||
/* Computes the half-sample mode (also called the Robertson-Cryer
|
||||
* mode estimator) of the sample `x` containing `n` observations.
|
||||
* mode estimator) of the sample `x` containing `n` observations.
|
||||
* Important: the sample is assumed to be sorted.
|
||||
*
|
||||
* It is based on repeatedly finding the half modal interval (smallest
|
||||
* interval containing half of the observations) of the sample.
|
||||
@ -103,6 +104,26 @@ double hsm(double *x, size_t n) {
|
||||
}
|
||||
|
||||
|
||||
/* Computes the Hill estimator γ_k of the right
|
||||
* tail-index of a distribution from a sample `x`
|
||||
* of `n` observations by using the last k outliers.
|
||||
*
|
||||
* k is computed from t as k = n^`t`, so it is
|
||||
* expected that t ∈ (0,1).
|
||||
*
|
||||
* Important: the sample is assumed to be sorted.
|
||||
*/
|
||||
double hill_estimator(double *x, double t, size_t n) {
|
||||
double k = pow(n, t);
|
||||
double sum = 0;
|
||||
for (size_t i = n - k; i < n; i++) {
|
||||
sum += log(x[i]) - log(x[n - (size_t)round(k)]);
|
||||
}
|
||||
|
||||
return k / sum;
|
||||
}
|
||||
|
||||
|
||||
/* Parameters needed to compute the
|
||||
* KDE function of a sample.
|
||||
*/
|
||||
@ -150,7 +171,7 @@ double gauss_kde(double x, void * params) {
|
||||
uncert bootstrap_median(
|
||||
const gsl_rng *r,
|
||||
double *sample, size_t n,
|
||||
int boots) {
|
||||
size_t boots) {
|
||||
|
||||
/* We use a running statistics to not
|
||||
* store the full resampled array.
|
||||
@ -196,7 +217,7 @@ uncert bootstrap_median(
|
||||
uncert bootstrap_mode(
|
||||
const gsl_rng *r,
|
||||
double *sample, size_t n,
|
||||
int boots) {
|
||||
size_t boots) {
|
||||
|
||||
double *values = calloc(boots, sizeof(double));
|
||||
double *boot = calloc(n, sizeof(double));
|
||||
@ -243,7 +264,7 @@ uncert bootstrap_fwhm(
|
||||
const gsl_rng *r,
|
||||
double min, double max,
|
||||
double *sample, size_t n,
|
||||
int boots) {
|
||||
size_t boots) {
|
||||
/* We use a running statistics to compute
|
||||
* a trimmed mean/variance of the sample.
|
||||
*/
|
||||
@ -301,3 +322,49 @@ uncert bootstrap_fwhm(
|
||||
|
||||
return fwhm;
|
||||
}
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic right
|
||||
* tail-index and its standard deviation by bootstrapping
|
||||
* (ie repeated resampling) the original `sample`, `boots`
|
||||
* times. The tail-index is estimated by the Hill estimator,
|
||||
* hence the name of the function. See `hill_estimator()`
|
||||
* for the parameter `t` meaning.
|
||||
*
|
||||
* The functions returns an `uncert` pair of mean and
|
||||
* stddev of the modes computed on each sample.
|
||||
*/
|
||||
uncert bootstrap_hill(
|
||||
const gsl_rng *r,
|
||||
double t,
|
||||
double *sample, size_t n,
|
||||
size_t boots) {
|
||||
|
||||
double *values = calloc(boots, sizeof(double));
|
||||
double *boot = calloc(n, sizeof(double));
|
||||
|
||||
for (size_t i = 0; i < boots; i++) {
|
||||
/* The sampling is simply done by generating
|
||||
* an array index uniformely in [0, n-1].
|
||||
*/
|
||||
for (size_t j = 0; j < n; j++) {
|
||||
size_t choice = gsl_rng_uniform_int(r, n);
|
||||
boot[j] = sample[choice];
|
||||
}
|
||||
qsort(boot, n, sizeof(double), cmp_double);
|
||||
values[i] = hill_estimator(boot, t, n);
|
||||
}
|
||||
|
||||
/* Compute mean and stddev of the modes
|
||||
* of each newly bootstrapped sample.
|
||||
*/
|
||||
uncert index;
|
||||
index.n = gsl_stats_mean(values, 1, boots);
|
||||
index.s = gsl_stats_sd(values, 1, boots);
|
||||
|
||||
// free memory
|
||||
free(values);
|
||||
free(boot);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ int cmp_double (const void *xp, const void *yp);
|
||||
uncert bootstrap_median(
|
||||
const gsl_rng *r,
|
||||
double *sample, size_t n,
|
||||
int boots);
|
||||
size_t boots);
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic mode
|
||||
@ -44,7 +44,7 @@ uncert bootstrap_median(
|
||||
uncert bootstrap_mode(
|
||||
const gsl_rng *r,
|
||||
double *sample, size_t n,
|
||||
int boots);
|
||||
size_t boots);
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic fwhm
|
||||
@ -62,4 +62,21 @@ uncert bootstrap_fwhm(
|
||||
const gsl_rng *r,
|
||||
double min, double max,
|
||||
double *sample, size_t n,
|
||||
int boots);
|
||||
size_t boots);
|
||||
|
||||
|
||||
/* Computes an approximation to the asymptotic right
|
||||
* tail-index and its standard deviation by bootstrapping
|
||||
* (ie repeated resampling) the original `sample`, `boots`
|
||||
* times. The tail-index is estimated by the Hill estimator,
|
||||
* hence the name of the function. See `hill_estimator()`
|
||||
* for the parameter `t` meaning.
|
||||
*
|
||||
* The functions returns an `uncert` pair of mean and
|
||||
* stddev of the modes computed on each sample.
|
||||
*/
|
||||
uncert bootstrap_hill(
|
||||
const gsl_rng *r,
|
||||
double t,
|
||||
double *sample, size_t n,
|
||||
size_t boots);
|
||||
|
Loading…
Reference in New Issue
Block a user