ex-1: add Hill estimator bootstrap

2020-06-08 00:43:06 +02:00 · 2020-06-08 00:43:06 +02:00 · ec0af36d77
commit ec0af36d77
parent ada2cdf03c
2 changed files with 91 additions and 7 deletions
--- a/ex-1/bootstrap.c
+++ b/ex-1/bootstrap.c
@ -41,7 +41,8 @@ size_t mean_index(gsl_vector *v, double x) {


 /* Computes the half-sample mode (also called the Robertson-Cryer
- * mode estimator) of the sample `x` containing `n` observations. 
+ * mode estimator) of the sample `x` containing `n` observations.
+ * Important: the sample is assumed to be sorted.
 *
 * It is based on repeatedly finding the half modal interval (smallest
 * interval containing half of the observations) of the sample.
@ -103,6 +104,26 @@ double hsm(double *x, size_t n) {
 }


+/* Computes the Hill estimator γ_k of the right
+ * tail-index of a distribution from a sample `x`
+ * of `n` observations by using the last k outliers.
+ *
+ * k is computed from t as k = n^`t`, so it is
+ * expected that t ∈ (0,1).
+ *
+ * Important: the sample is assumed to be sorted.
+ */
+double hill_estimator(double *x, double t, size_t n) {
+  double k = pow(n, t);
+  double sum = 0;
+  for (size_t i = n - k; i < n; i++) {
+    sum += log(x[i]) - log(x[n - (size_t)round(k)]);
+  }
+
+  return k / sum;
+}
+
+
 /* Parameters needed to compute the
 * KDE function of a sample.
 */
@ -150,7 +171,7 @@ double gauss_kde(double x, void * params) {
 uncert bootstrap_median(
    const gsl_rng *r,
    double *sample, size_t n,
-    int boots) {
+    size_t boots) {

  /* We use a running statistics to not 
   * store the full resampled array.
@ -196,7 +217,7 @@ uncert bootstrap_median(
 uncert bootstrap_mode(
    const gsl_rng *r,
    double *sample, size_t n,
-    int boots) {
+    size_t boots) {

  double *values = calloc(boots, sizeof(double));
  double *boot = calloc(n, sizeof(double));
@ -243,7 +264,7 @@ uncert bootstrap_fwhm(
    const gsl_rng *r,
    double min, double max,
    double *sample, size_t n,
-    int boots) {
+    size_t boots) {
  /* We use a running statistics to compute 
   * a trimmed mean/variance of the sample.
   */
@ -301,3 +322,49 @@ uncert bootstrap_fwhm(

  return fwhm;
 }
+
+
+/* Computes an approximation to the asymptotic right
+ * tail-index and its standard deviation by bootstrapping
+ * (ie repeated resampling) the original `sample`, `boots`
+ * times. The tail-index is estimated by the Hill estimator,
+ * hence the name of the function. See `hill_estimator()`
+ * for the parameter `t` meaning.
+ *
+ * The functions returns an `uncert` pair of mean and
+ * stddev of the modes computed on each sample.
+ */
+uncert bootstrap_hill(
+    const gsl_rng *r,
+    double t,
+    double *sample, size_t n,
+    size_t boots) {
+
+  double *values = calloc(boots, sizeof(double));
+  double *boot = calloc(n, sizeof(double));
+
+  for (size_t i = 0; i < boots; i++) {
+    /* The sampling is simply done by generating
+     * an array index uniformely in [0, n-1].
+     */
+    for (size_t j = 0; j < n; j++) {
+      size_t choice = gsl_rng_uniform_int(r, n);
+      boot[j] = sample[choice];
+    }
+    qsort(boot, n, sizeof(double), cmp_double);
+    values[i] = hill_estimator(boot, t, n);
+  }
+
+  /* Compute mean and stddev of the modes
+   * of each newly bootstrapped sample.
+   */
+  uncert index;
+  index.n = gsl_stats_mean(values, 1, boots);
+  index.s = gsl_stats_sd(values, 1, boots);
+
+  // free memory
+  free(values);
+  free(boot);
+
+  return index;
+}
--- a/ex-1/bootstrap.h
+++ b/ex-1/bootstrap.h
@ -30,7 +30,7 @@ int cmp_double (const void *xp, const void *yp);
 uncert bootstrap_median(
    const gsl_rng *r,
    double *sample, size_t n,
-    int boots);
+    size_t boots);


 /* Computes an approximation to the asymptotic mode
@ -44,7 +44,7 @@ uncert bootstrap_median(
 uncert bootstrap_mode(
    const gsl_rng *r,
    double *sample, size_t n,
-    int boots);
+    size_t boots);


 /* Computes an approximation to the asymptotic fwhm
@ -62,4 +62,21 @@ uncert bootstrap_fwhm(
    const gsl_rng *r,
    double min, double max,
    double *sample, size_t n,
-    int boots);
+    size_t boots);
+
+
+/* Computes an approximation to the asymptotic right
+ * tail-index and its standard deviation by bootstrapping
+ * (ie repeated resampling) the original `sample`, `boots`
+ * times. The tail-index is estimated by the Hill estimator,
+ * hence the name of the function. See `hill_estimator()`
+ * for the parameter `t` meaning.
+ *
+ * The functions returns an `uncert` pair of mean and
+ * stddev of the modes computed on each sample.
+ */
+uncert bootstrap_hill(
+    const gsl_rng *r,
+    double t,
+    double *sample, size_t n,
+    size_t boots);