ex-1: add Hill estimator bootstrap
This commit is contained in:
parent
ada2cdf03c
commit
ec0af36d77
@ -42,6 +42,7 @@ size_t mean_index(gsl_vector *v, double x) {
|
|||||||
|
|
||||||
/* Computes the half-sample mode (also called the Robertson-Cryer
|
/* Computes the half-sample mode (also called the Robertson-Cryer
|
||||||
* mode estimator) of the sample `x` containing `n` observations.
|
* mode estimator) of the sample `x` containing `n` observations.
|
||||||
|
* Important: the sample is assumed to be sorted.
|
||||||
*
|
*
|
||||||
* It is based on repeatedly finding the half modal interval (smallest
|
* It is based on repeatedly finding the half modal interval (smallest
|
||||||
* interval containing half of the observations) of the sample.
|
* interval containing half of the observations) of the sample.
|
||||||
@ -103,6 +104,26 @@ double hsm(double *x, size_t n) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Computes the Hill estimator γ_k of the right
|
||||||
|
* tail-index of a distribution from a sample `x`
|
||||||
|
* of `n` observations by using the last k outliers.
|
||||||
|
*
|
||||||
|
* k is computed from t as k = n^`t`, so it is
|
||||||
|
* expected that t ∈ (0,1).
|
||||||
|
*
|
||||||
|
* Important: the sample is assumed to be sorted.
|
||||||
|
*/
|
||||||
|
double hill_estimator(double *x, double t, size_t n) {
|
||||||
|
double k = pow(n, t);
|
||||||
|
double sum = 0;
|
||||||
|
for (size_t i = n - k; i < n; i++) {
|
||||||
|
sum += log(x[i]) - log(x[n - (size_t)round(k)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return k / sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Parameters needed to compute the
|
/* Parameters needed to compute the
|
||||||
* KDE function of a sample.
|
* KDE function of a sample.
|
||||||
*/
|
*/
|
||||||
@ -150,7 +171,7 @@ double gauss_kde(double x, void * params) {
|
|||||||
uncert bootstrap_median(
|
uncert bootstrap_median(
|
||||||
const gsl_rng *r,
|
const gsl_rng *r,
|
||||||
double *sample, size_t n,
|
double *sample, size_t n,
|
||||||
int boots) {
|
size_t boots) {
|
||||||
|
|
||||||
/* We use a running statistics to not
|
/* We use a running statistics to not
|
||||||
* store the full resampled array.
|
* store the full resampled array.
|
||||||
@ -196,7 +217,7 @@ uncert bootstrap_median(
|
|||||||
uncert bootstrap_mode(
|
uncert bootstrap_mode(
|
||||||
const gsl_rng *r,
|
const gsl_rng *r,
|
||||||
double *sample, size_t n,
|
double *sample, size_t n,
|
||||||
int boots) {
|
size_t boots) {
|
||||||
|
|
||||||
double *values = calloc(boots, sizeof(double));
|
double *values = calloc(boots, sizeof(double));
|
||||||
double *boot = calloc(n, sizeof(double));
|
double *boot = calloc(n, sizeof(double));
|
||||||
@ -243,7 +264,7 @@ uncert bootstrap_fwhm(
|
|||||||
const gsl_rng *r,
|
const gsl_rng *r,
|
||||||
double min, double max,
|
double min, double max,
|
||||||
double *sample, size_t n,
|
double *sample, size_t n,
|
||||||
int boots) {
|
size_t boots) {
|
||||||
/* We use a running statistics to compute
|
/* We use a running statistics to compute
|
||||||
* a trimmed mean/variance of the sample.
|
* a trimmed mean/variance of the sample.
|
||||||
*/
|
*/
|
||||||
@ -301,3 +322,49 @@ uncert bootstrap_fwhm(
|
|||||||
|
|
||||||
return fwhm;
|
return fwhm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Computes an approximation to the asymptotic right
|
||||||
|
* tail-index and its standard deviation by bootstrapping
|
||||||
|
* (ie repeated resampling) the original `sample`, `boots`
|
||||||
|
* times. The tail-index is estimated by the Hill estimator,
|
||||||
|
* hence the name of the function. See `hill_estimator()`
|
||||||
|
* for the parameter `t` meaning.
|
||||||
|
*
|
||||||
|
* The functions returns an `uncert` pair of mean and
|
||||||
|
* stddev of the modes computed on each sample.
|
||||||
|
*/
|
||||||
|
uncert bootstrap_hill(
|
||||||
|
const gsl_rng *r,
|
||||||
|
double t,
|
||||||
|
double *sample, size_t n,
|
||||||
|
size_t boots) {
|
||||||
|
|
||||||
|
double *values = calloc(boots, sizeof(double));
|
||||||
|
double *boot = calloc(n, sizeof(double));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < boots; i++) {
|
||||||
|
/* The sampling is simply done by generating
|
||||||
|
* an array index uniformely in [0, n-1].
|
||||||
|
*/
|
||||||
|
for (size_t j = 0; j < n; j++) {
|
||||||
|
size_t choice = gsl_rng_uniform_int(r, n);
|
||||||
|
boot[j] = sample[choice];
|
||||||
|
}
|
||||||
|
qsort(boot, n, sizeof(double), cmp_double);
|
||||||
|
values[i] = hill_estimator(boot, t, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute mean and stddev of the modes
|
||||||
|
* of each newly bootstrapped sample.
|
||||||
|
*/
|
||||||
|
uncert index;
|
||||||
|
index.n = gsl_stats_mean(values, 1, boots);
|
||||||
|
index.s = gsl_stats_sd(values, 1, boots);
|
||||||
|
|
||||||
|
// free memory
|
||||||
|
free(values);
|
||||||
|
free(boot);
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
@ -30,7 +30,7 @@ int cmp_double (const void *xp, const void *yp);
|
|||||||
uncert bootstrap_median(
|
uncert bootstrap_median(
|
||||||
const gsl_rng *r,
|
const gsl_rng *r,
|
||||||
double *sample, size_t n,
|
double *sample, size_t n,
|
||||||
int boots);
|
size_t boots);
|
||||||
|
|
||||||
|
|
||||||
/* Computes an approximation to the asymptotic mode
|
/* Computes an approximation to the asymptotic mode
|
||||||
@ -44,7 +44,7 @@ uncert bootstrap_median(
|
|||||||
uncert bootstrap_mode(
|
uncert bootstrap_mode(
|
||||||
const gsl_rng *r,
|
const gsl_rng *r,
|
||||||
double *sample, size_t n,
|
double *sample, size_t n,
|
||||||
int boots);
|
size_t boots);
|
||||||
|
|
||||||
|
|
||||||
/* Computes an approximation to the asymptotic fwhm
|
/* Computes an approximation to the asymptotic fwhm
|
||||||
@ -62,4 +62,21 @@ uncert bootstrap_fwhm(
|
|||||||
const gsl_rng *r,
|
const gsl_rng *r,
|
||||||
double min, double max,
|
double min, double max,
|
||||||
double *sample, size_t n,
|
double *sample, size_t n,
|
||||||
int boots);
|
size_t boots);
|
||||||
|
|
||||||
|
|
||||||
|
/* Computes an approximation to the asymptotic right
|
||||||
|
* tail-index and its standard deviation by bootstrapping
|
||||||
|
* (ie repeated resampling) the original `sample`, `boots`
|
||||||
|
* times. The tail-index is estimated by the Hill estimator,
|
||||||
|
* hence the name of the function. See `hill_estimator()`
|
||||||
|
* for the parameter `t` meaning.
|
||||||
|
*
|
||||||
|
* The functions returns an `uncert` pair of mean and
|
||||||
|
* stddev of the modes computed on each sample.
|
||||||
|
*/
|
||||||
|
uncert bootstrap_hill(
|
||||||
|
const gsl_rng *r,
|
||||||
|
double t,
|
||||||
|
double *sample, size_t n,
|
||||||
|
size_t boots);
|
||||||
|
Loading…
Reference in New Issue
Block a user