initial commit

2020-03-06 02:24:32 +01:00 · 2020-03-06 02:24:32 +01:00 · cf27610d53
commit cf27610d53
69 changed files with 6867 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 bin/
 *.pdf
 .stignore
 todo
--- a/ex-1/main.c
+++ b/ex-1/main.c
@ -0,0 +1,365 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <gsl/gsl_min.h>
 #include <gsl/gsl_sum.h>
 #include <gsl/gsl_roots.h>
 #include <gsl/gsl_randist.h>
 #include <gsl/gsl_histogram.h>
 #include <gsl/gsl_integration.h>
 /* Function that compare doubles for sorting:
 * x > y  ⇒ 1
 * x == y ⇒ 0
 * x < y  ⇒ -1
 */
 int cmp_double (const void *xp, const void *yp) {
  double x = *(double*)xp,
         y = *(double*)yp;
  return x > y ? 1 : -1;
 }
 /* This is a wrapper needed by `landau_cdf` because
 * the numerical integration expects a function
 * with parameters.
 */
 double landau_pdf(double x, void* params) {
  return gsl_ran_landau_pdf(x);
 }
 /* The cumulative function of the Landau distribution
 * calculated by numerical integration.
 */
 double landau_cdf(double x, void* params) {
  // create the integrand
  gsl_function pdf;
  pdf.function = &landau_pdf;
  pdf.params   = NULL;
  // set up the integration
  double res, err;
  size_t iter = 500;
  gsl_integration_workspace* w =
    gsl_integration_workspace_alloc(iter);
  // clip values too small
  if (x < -200) x = -200;
  // We integrate the pdf in [x, +∞) instead
  // of (-∞, x] to avoid a singularity and
  // then return 1 - P.
  gsl_integration_qagiu(
    &pdf,        // integrand
    x,           // lower bound
    1e-10, 1e-6, // abs and rel error
    iter,        // max iteration
    w,           // "workspace"
    &res, &err); // result, abs error
  // free the memory
  gsl_integration_workspace_free(w);
  return 1 - res;
 }
 /* Another wrapper: this time it is needed by
 * `landau_qdf` to solve the equation:
 * `landau_cdf(x) = p0` for x
 */
 double landau_cdf_root(double x, void* params) {
  double p0 = *(double*)params;
  return p0 - landau_cdf(x, NULL);
 }
 /* The quantile function (inverse CDF) of the Landau
 * distribution calculated by numerical root method.
 */
 double landau_qdf(double p0) {
  // create function
  gsl_function cdf;
  cdf.function = &landau_cdf_root;
  cdf.params   = &p0;
  // use the Brent method
  gsl_root_fsolver* s =
    gsl_root_fsolver_alloc(gsl_root_fsolver_brent);
  // search interval
  double low = -1000,   // lower value
         upp =  100000; // upper value
  gsl_root_fsolver_set(s, &cdf, low, upp);
  // iterative search
  size_t iter = 1000; // max iteration
  int stat = GSL_CONTINUE;
  for (size_t i=0; stat==GSL_CONTINUE && i<iter; i++) {
    stat = gsl_root_fsolver_iterate(s);
    low  = gsl_root_fsolver_x_lower(s);
    upp  = gsl_root_fsolver_x_upper(s);
    stat = gsl_root_test_interval(low, upp, 0, 0.01);
  }
  return gsl_root_fsolver_root(s);
 }
 /* Kolmogorov distribution CDF
 * for sample size n and statistic D
 */
 double kolmogorov_cdf(double D, int n) {
  double x = sqrt(n) * D;
  // trick to reduce estimate error
  x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
  // calculate the first n_terms of the series
  // Σ_k=1 exp(-(2k - 1)²π²/8x²)
  size_t n_terms = 30;
  double *terms = calloc(n_terms, sizeof(double));
  for (size_t k=1; k<n_terms; k++) {
    terms[k] = exp(-pow((2*(double)k - 1)*M_PI/x, 2) / 8);
  }
  // do a transform to accelerate the convergence
  double sum, abserr;
  gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
  gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
  fprintf(stderr, "\n# Kolmogorov CDF\n");
  fprintf(stderr, "accel sum: %f\n", sum);
  fprintf(stderr, "plain sum: %f\n", s->sum_plain);
  fprintf(stderr, "err: %f\n",   abserr);
  gsl_sum_levin_utrunc_free(s);
  free(terms);
  return sqrt(2*M_PI)/x * sum;
 }
 /* This is a wrapper needed by `numeric_mode` because
 * the minimization expects a function to be minimized and not
 * maximized.
 */
 double neg_landau_pdf(double x, void* params) {
  return (-1) * gsl_ran_landau_pdf(x);
 }
 /* Compute numerically the mode of the Landau distribution.
 */
 double numeric_mode (double min, double max) {
  // create funtion
  gsl_function pdf;
  pdf.function = &neg_landau_pdf;
  pdf.params   = NULL;
  // initialize minimization
  double guess = 0;
  int    iter = 0;
  int    max_iter = 100;
  double prec = 1e-7;
  int    status;
  const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
  gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
  gsl_min_fminimizer_set(s, &pdf, guess, min, max);
  // minimization
  do {
      iter++;
      status = gsl_min_fminimizer_iterate (s);
      guess = gsl_min_fminimizer_x_minimum (s);
      min = gsl_min_fminimizer_x_lower (s);
      max = gsl_min_fminimizer_x_upper (s);
      status = gsl_min_test_interval (min, max, prec, prec);
  } while (status == GSL_CONTINUE && iter < max_iter);
  // Free memory
  gsl_min_fminimizer_free (s);
  return guess;
 }
 /* This is the function to be minimized in `numeric_FWHM`.
 */
 double abs_landau_pdf(double x, void* params_) {
  double* params = ((double *) params_);
  return fabs(gsl_ran_landau_pdf(x) - params[0]);
 }
 /* Compute numerically the FWHM of the Landau distribution.
 */
 double numeric_fwhm (double min_lim, double mode, double max_lim) {
  // create funtion
  gsl_function pdf;
  pdf.function = &abs_landau_pdf;
  double params [1];
  params[0]= gsl_ran_landau_pdf(mode)/2;
  pdf.params = params;
  // initialize minimization for x₋
  double guess = mode - 1;
  double min, max;
  int    iter = 0;
  int    max_iter = 100;
  double prec = 1e-7;
  int    status;
  const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
  gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
  gsl_min_fminimizer_set(s, &pdf, guess, min_lim, mode);
  // minimization
  do {
      iter++;
      status = gsl_min_fminimizer_iterate (s);
      guess = gsl_min_fminimizer_x_minimum (s);
      min = gsl_min_fminimizer_x_lower (s);
      max = gsl_min_fminimizer_x_upper (s);
      status = gsl_min_test_interval (min, max, prec, prec);
  } while (status == GSL_CONTINUE && iter < max_iter);
  double x_low = guess;
  // initialize minimization for x₊
  guess = mode + 1;
  gsl_min_fminimizer_set(s, &pdf, guess, mode, max_lim);
  // minimization
  do {
      iter++;
      status = gsl_min_fminimizer_iterate (s);
      guess = gsl_min_fminimizer_x_minimum (s);
      min = gsl_min_fminimizer_x_lower (s);
      max = gsl_min_fminimizer_x_upper (s);
      status = gsl_min_test_interval (min, max, prec, prec);
  } while (status == GSL_CONTINUE && iter < max_iter);
  double x_upp = guess;
  // Free memory
  gsl_min_fminimizer_free (s);
  return x_upp - x_low;
 }
 /* Here we generate random numbers in a uniform 
 * range and by using the quantile we map them
 * to a Landau distribution. Then we generate an
 * histogram to check the correctness.
 */
 int main(int argc, char** argv) {
  // initialize an RNG
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  // prepare histogram
  size_t samples = 100000;
  double* sample = calloc(samples, sizeof(double));
  size_t bins = 40;
  double min = -20;
  double max = 20;
  gsl_histogram* hist = gsl_histogram_alloc(bins);
  gsl_histogram_set_ranges_uniform(hist, min, max);
  /* sample points from the Landau distribution
   * and fill the histogram
   */
  double x;
  for(size_t i=0; i<samples; i++) {
    x = gsl_ran_landau(r);
    sample[i] = x;
    gsl_histogram_increment(hist, x);
  }
  // sort the sample
  qsort(sample, samples, sizeof(double), &cmp_double);
  // calculate Kolmogorov-Smirnov D
  double D = 0;
  double d;
  for(size_t i=0; i<samples; i++) {
    d = fabs(landau_cdf(sample[i], NULL) - ((double)i+1)/samples);
    if (d > D)
      D = d;
  }
  // Kolmogorov-Smirnov test
  double beta = kolmogorov_cdf(D, samples);
  fprintf(stderr, "\n# K-S test results\n");
  fprintf(stderr, "D: %f\n", D);
  fprintf(stderr, "α: %g\n", 1 - beta);
  // Mode comparison
  //
  // find the bin with the maximum number of events
  double mode_o, maxbin = 0;
  double f_mode_o = 0;
  double m1, m2   = 0;
  for(size_t i=0; i<bins; i++) {
    m1 = hist->bin[i];
    if (m1 > m2){
      m2 = m1;
      maxbin = (double)i;
      f_mode_o = hist->bin[i];
    }
  }
  fprintf(stderr, "\nstep:%.2f\n ", (max - min)/bins);
  f_mode_o = f_mode_o/samples;
  mode_o = min + (maxbin + 0.5)*(max - min)/bins;
  // print the results
  double mode_e = numeric_mode(min, max);
  fprintf(stderr, "\n# Numeric mode\n");
  fprintf(stderr, "expected mode: %.7f\n", mode_e);
  fprintf(stderr, "observed mode: %.3f\n", mode_o);
  // FWHM comparison
  //
  // find the bins x₋ and x₊
  double half = f_mode_o*samples/2;
  m2 = samples;
  double x_low = 0;
  double x_upp = 0;
  double diff;
  for(size_t i=0; i<maxbin; i++) {
    m1 = hist->bin[i];
    diff = fabs(m1 - half);
    if (diff < m2){
      m2 = diff;
      x_low = (double)i;
    }
  }
  m2 = samples;
  for(size_t i=maxbin; i<bins; i++) {
    m1 = hist->bin[i];
    diff = fabs(m1 - half);
    if (diff < m2){
      m2 = diff;
      x_upp = (double)i;
    }
  }
  x_low = min + (x_low + 0.5)*(max - min)/bins;
  x_upp = min + (x_upp + 0.5)*(max - min)/bins;
  double fwhm_o = x_upp - x_low;
  // print the results
  fprintf(stderr, "\n# Numeric FWHM\n");
  double fwhm_e = numeric_fwhm(min, mode_e, max);
  fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e);
  fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o);
  // print the counts
  // gsl_histogram_fprintf(stdout, hist, "%g", "%g");
  // clean up and exit
  gsl_histogram_free(hist);
  gsl_rng_free(r);
  free(sample);
  return EXIT_SUCCESS;
 }
--- a/ex-1/pdf-plot.py
+++ b/ex-1/pdf-plot.py
@ -0,0 +1,9 @@
 #!/usr/bin/env python
 from pylab import *
 import sys
 x, y = loadtxt(sys.stdin, unpack=True)
 title('Landau distribution', loc='right')
 plot(x, y, color='#92182b')
 show()
--- a/ex-1/pdf.c
+++ b/ex-1/pdf.c
@ -0,0 +1,18 @@
 #include <stdio.h>
 #include <gsl/gsl_randist.h>
 int main(int argc, char** argv) {
  size_t n  = 1000;
  double
    left  = -10,
    right = +10,
    dx    = (right - left)/n;
  double x = left;
  for (size_t i = 0; i < n; i++) {
    x += dx;
    printf("%f %f\n", x, gsl_ran_landau_pdf(x));
  }
  return EXIT_SUCCESS;
 }
--- a/ex-1/plot.py
+++ b/ex-1/plot.py
@ -0,0 +1,10 @@
 #!/usr/bin/env python
 from pylab import *
 import sys
 a, b, f = loadtxt(sys.stdin, unpack=True)
 title('Landau distribution sampling', loc='right')
 hist(a, np.insert(b, 0, a[0]), weights=f/sum(f))
 annotate(f'N={sum(f)}\nbins={len(f)}', (50, 0.2))
 show()
--- a/ex-2/fancier.c
+++ b/ex-2/fancier.c
@ -0,0 +1,511 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <math.h>
 #include <gmp.h>
 /* The Euler-Mascheroni constant is here computed to
 * arbitrary precision using GMP rational numbers
 * with the formula:
 *
 *    γ = A(N)/B(N) - C(N)/B(N)² - log(N)
 *
 * with:
 *
 *    A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k)
 *    B(N) = Σ_(k = 0)^(k_max) (N^k/k!)
 *    C(N) = 1/4N Σ_(k = 0)^(2N) ((2k)!)^3/((k!)^4*(16N))^2k
 *    H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number
 *
 * where N is computed from D as written below and k_max is
 * currently 5*N, chosen in a completely arbitrary way.
 *
 * source: http://www.numberworld.org/y-cruncher/internals/formulas.html
 *
 */
 /* `mpq_dec_str(z, n)`
 * returns the decimal representation of the
 * rational z as a string, up to n digits.
 */
 char* mpq_dec_str(mpq_t z, size_t mantissa_size) {
  mpz_t num, den, quot, rem;
  mpz_inits(num, den, quot, rem, NULL);
  // calculate integral part
  mpq_get_num(num, z);
  mpq_get_den(den, z);
  mpz_tdiv_qr(quot, rem, num, den);
  // store the integral part
  size_t integral_size = mpz_sizeinbase(quot, 10);
  char* repr = calloc(integral_size + 1 + mantissa_size + 1, sizeof(char));
  mpz_get_str(repr, 10, quot);
  // add decimal point
  repr[integral_size] = '.';
  // calculate the mantissa
  mpz_t cur;
  mpz_init(cur);
  size_t digit;
  for (size_t i = 0; i < mantissa_size; i++) {
    // calculate i-th digit as:
    // digit = (remainder * 10) / denominator
    // new_ramainder = (remainder * 10) % denominator
    mpz_set(cur, rem);
    mpz_mul_ui(cur, rem, 10);
    mpz_tdiv_qr(quot, rem, cur, den);
    // store digit in mantissa
    digit = mpz_get_ui(quot);
    sprintf(repr + integral_size + 1 + i, "%ld", digit);
  }
  // free memory
  mpz_clears(num, den, quot, rem, cur, NULL);
  return repr;
 }
 void mpq_fprintf(FILE *fp, size_t digits, char* fmt, ...) {
  va_list args;
  va_start(args, fmt);
  char* str;
  char fmtc[2] = "%x";
  for (char* p = fmt; *p; p++) {
    if (*p != '%') {
      fprintf(fp, "%c", *p);
      continue;
    }
    switch (*++p) {
      case 's':
        str = va_arg(args, char*);
        fputs(str, fp);
        break;
      case 'l':
        if (*++p == 'd')
          fprintf(fp, "%ld", va_arg(args, size_t));
        break;
      case 'q':
        str = mpq_dec_str(*va_arg(args, mpq_t*), digits);
        fputs(str, fp);
        free(str);
        break;
      default:
        fmtc[1] = *p;
        gmp_fprintf(fp, fmtc, *va_arg(args, mpq_t*));
    }
  }
  va_end(args);
 }
 /* `mpq_log2(z, D)`
 * calculate log(2) to D decimal places
 * using the series
 *
 *   log(2) = Σ_{k=1} 1/(k 2^k)
 *
 * The error of the n-th partial sum is
 * estimated as
 *
 *   Δn = 1/(n(n+2) 2^(n-1)).
 *
 * So to fix the first D decimal places
 * we evaluate up to n terms such that
 *
 *   Δn < 1/10^D
 */
 void mpq_log2(mpq_t rop, size_t digits) {
  mpq_t sum, term;
  mpq_inits(sum, term, NULL);
  fprintf(stderr, "[mpq_log2] decimal digits: %ld\n", digits);
  /* calculate n, the number of required terms */
  size_t n;
  mpz_t temp, prec, width; mpz_inits(temp, prec, width, NULL);
  // prec = 10^digits
  mpz_ui_pow_ui(prec, 10, digits + 2);
  for (n = 1;; n++) {
    // width = n*(n + 2)*2^(n - 1)
    mpz_set_ui(width, n);
    mpz_set_ui(temp, n);
    mpz_add_ui(temp, temp, 2);
    mpz_mul(width, width, temp);
    mpz_sub_ui(temp, temp, 3);
    mpz_mul_2exp(width, width, mpz_get_ui(temp));
    if (mpz_cmp(width, prec) > 0) break;
  }
  fprintf(stderr, "[mpq_log2] series terms: %ld\n", n);
  /* calculate series */
  for (size_t i=1; i<n+1; i++) {
    mpq_set_ui(term, 1, 1);
    mpz_ui_pow_ui(mpq_denref(term), 2, i);
    mpz_mul_ui(mpq_denref(term), mpq_denref(term), i);
    mpq_add(sum, sum, term);
  }
  mpq_fprintf(stderr, digits > 50 ? 50 : digits,
              "[mpq_log2] log(2)=%q...\n", &sum);
  mpq_set(rop, sum);
  // free memory
  mpq_clears(sum, term, NULL);
  mpz_clears(temp, prec, width, NULL);
 }
 /* `mpq_log_ui(z, x, D)`
 * calculates the logarithm of x up
 * to D decimal places using the formula
 *
 *   log(x) = log(a(x) * 2^(n-1))
 *          = log(a) + (n-1) log(2)
 *
 * where 1<a<2 and n is the number of binary
 * digits of x.
 *
 * log(a) is the computed from
 *
 *   log((1+y)/(1-y)) = 2Σ_{k=0} y^(2k+1)/(2k+1)
 *
 * where y = (a-1)/(a+1).
 *
 * The error of the n-th partial sum is
 * estimated as:
 *
 *   Δ(n, y) = 4y^(2n + 3) /
 *        (4n^2y^4 - 8n^2y^2 + 4n^2 + 4ny^4 - 12ny^2 + 8n + y^4 - 4y^2 + 3)
 *
 * So to fix the first D decimal places
 * we evaluate up to n terms such that
 *
 *   Δ(n, y) < 1/10^D
 */
 void mpq_log_ui(mpq_t rop, size_t x, size_t digits) {
  /* calculate `a` such that x = a⋅2^(m-1) */
  mpq_t a;         mpq_init(a);
  mpz_t xz, power; mpz_inits(xz, power, NULL);
  mpz_set_ui(xz, x);
  // m = digits in base 2 of x
  size_t m = mpz_sizeinbase(xz, 2);
  mpz_ui_pow_ui(power, 2, m - 1);
  mpq_set_num(a, xz);
  mpq_set_den(a, power);
  fprintf(stderr, "[mpq_log_ui] binary digits: %ld\n", m);
  fprintf(stderr, "[mpq_log_ui] a=%g\n", mpq_get_d(a));
  /* calculate y = (a-1)/(a+1) */
  mpq_t y, temp; mpq_inits(y, temp, NULL);
  mpq_set(temp, a);
  // decrement: a/b -= b/b
  mpz_submul_ui(mpq_numref(temp),
                mpq_denref(temp), 1);
  mpq_set(y, temp);
  mpq_set(temp, a);
  // increment: a/b += b/b
  mpz_addmul_ui(mpq_numref(temp),
                mpq_denref(temp), 1);
  mpq_div(y, y, temp);
  fprintf(stderr, "[mpq_log_ui] y=%g\n", mpq_get_d(y));
  /* calculate number of required terms:
   * find smallest n such that Δ(n, y) < 1/10^digits.
   */
  mpq_t y4, y2, y2n, t1, t2;
  mpq_inits(y4, y2, y2n, t1, t2, NULL);
  // y4 = y^4
  mpz_pow_ui(mpq_numref(y4), mpq_numref(y), 4);
  mpz_pow_ui(mpq_denref(y4), mpq_denref(y), 4);
  // y2 = y^2
  mpz_pow_ui(mpq_numref(y2), mpq_numref(y), 2);
  mpz_pow_ui(mpq_denref(y2), mpq_denref(y), 2);
  size_t n;
  size_t p1, p2, p3;
  for(n = 0;; n++) {
    // polynomials in n
    p1 = 4*pow(n, 2) + 4*n + 1;
    p2 = 8*pow(n, 2) + 12*n - 4;
    p3 = 4*pow(n, 2) + 3;
    // t1 = y4 * p1
    mpz_mul_ui(mpq_numref(t1), mpq_numref(y4), p1);
    mpq_canonicalize(t1);
    // t2 = y2 * p2
    mpz_mul_ui(mpq_numref(t2), mpq_numref(y2), p2);
    mpq_canonicalize(t2);
    // t1 -= t2 
    mpq_sub(t1, t1, t2);
    // t1 += p3
    mpz_addmul_ui(mpq_numref(t1), mpq_denref(t1), p3);
    // y2n = 4*y^(2n + 3)
    mpq_set(y2n, y);
    mpz_pow_ui(mpq_numref(y2n), mpq_numref(y2n), 2*n+3);
    mpz_pow_ui(mpq_denref(y2n), mpq_denref(y2n), 2*n+3);
    mpz_mul_ui(mpq_numref(y2n), mpq_numref(y2n), 4);
    mpq_canonicalize(y2n);
    // t2 = 10^digits * y2n
    mpq_set_ui(t2, 1, 1);
    mpz_ui_pow_ui(mpq_numref(t2), 10, digits + 1);
    mpq_mul(t2, t2, y2n);
    if (mpq_cmp(t1, t2) > 0) break;
  }
  fprintf(stderr, "[mpq_log_ui] series terms: %ld\n", n);
  fprintf(stderr, "[mpq_log_ui] decimal digits: %ld\n", digits);
  /* calculate the series of
   * log(A) = log((1+y)/(1-y)) = 2 Σ_{k=0} y^(2k+1)/(2k+1)
   */
  mpq_t sum, term; mpq_inits(sum, term, NULL);
  for (size_t i = 0; i <= n; i++) {
    // numerator: y^(2i+1)
    mpq_set(temp, y);
    mpz_pow_ui(mpq_numref(temp), mpq_numref(temp), 2*i + 1);
    mpz_pow_ui(mpq_denref(temp), mpq_denref(temp), 2*i + 1);
    mpq_canonicalize(temp);
    mpq_set(term, temp);
    // denominator: 2i+1
    mpq_set_ui(temp, 2*i + 1, 1);
    mpq_div(term, term, temp);
    // sum the term
    mpq_add(sum, sum, term);
  }
  // multiply by 2
  mpz_mul_ui(mpq_numref(sum), mpq_numref(sum), 2);
  mpq_canonicalize(sum);
  // calculate log(2)*(m-1)
  mpq_t log2; mpq_init(log2);
  mpq_log2(log2, digits);
  mpz_mul_ui(mpq_numref(log2), mpq_numref(log2), m-1);
  mpq_canonicalize(log2);
  // series += log(2)*(m-1)
  mpq_add(sum, sum, log2);
  mpq_set(rop, sum);
  mpq_fprintf(stderr, digits > 50 ? 50 : digits,
          "[mpq_log_ui] log(%ld)=%q...\n", x, sum);
  // free memory
  mpq_clears(a, y, temp, sum, term, log2,
             y4, y2, y2n, t1, t2, NULL);
  mpz_clears(xz, power, NULL);
 }
 /* A series */
 void a_series(mpq_t rop, size_t N) {
  mpq_t sum, term, factq, harm, recip;
  mpz_t fact, power, stop;
  mpq_inits(sum, term, factq, harm, recip, NULL);
  mpz_inits(fact, power, stop, NULL);
  mpz_set_ui(stop, N);
  for (size_t k = 0; k < 5*N; k++) {
    mpz_fac_ui(fact, k);
    mpz_pow_ui(power, stop, k);
    mpq_set_z(term, power);
    mpq_set_z(factq, fact);
    mpq_div(term, term, factq);
    mpq_mul(term, term, term);
    if (k > 0)
      mpq_set_ui(recip, 1, k);
    mpq_add(harm, harm, recip);
    mpq_mul(term, term, harm);
    mpq_add(sum, sum, term);
  }
  mpq_set(rop, sum);
  // free memory
  mpq_clears(sum, term, factq, harm, recip, NULL);
  mpz_clears(fact, power, stop, NULL);
 }
 /* B series */
 void b_series(mpq_t rop, size_t N){
  mpq_t sum, term, factq;
  mpz_t fact, power, stop;
  mpq_inits(sum, term, factq, NULL);
  mpz_inits(fact, power, stop, NULL);
  mpz_set_ui(stop, N);
  for (size_t k = 0; k < 5*N; k++) {
    mpz_fac_ui(fact, k);
    mpz_pow_ui(power, stop, k);
    mpq_set_z(term, power);
    mpq_set_z(factq, fact);
    mpq_div(term, term, factq);
    mpq_mul(term, term, term);
    mpq_add(sum, sum, term);
  }
  mpq_set(rop, sum);
  // free memory
  mpq_clears(sum, term, factq, NULL);
  mpz_clears(fact, power, stop, NULL);
 }
 /* C series */
 void c_series(mpq_t rop, size_t N) {
  mpq_t sum, term, stop2, fact2q;
  mpz_t fact1, fact2, power, stop1;
  mpq_inits(sum, term, stop2, fact2q, NULL);
  mpz_inits(fact1, fact2, power, stop1, NULL);
  mpq_set_ui(stop2, 4*N, 1);
  for (size_t k = 0; k <= 2*N; k++) {
    mpz_fac_ui(fact1, 2*k);
    mpz_pow_ui(fact1, fact1, 3);
    mpz_fac_ui(fact2, k);
    mpz_pow_ui(fact2, fact2, 4);
    mpz_set_ui(stop1, 16*N);
    mpz_pow_ui(stop1, stop1, 2*k);
    mpz_mul(fact2, fact2, stop1);
    mpq_set_z(term, fact1);
    mpq_set_z(fact2q, fact2);
    mpq_div(term, term, fact2q);
    mpq_add(sum, sum, term);
  }
  mpq_div(sum, sum, stop2);
  mpq_set(rop, sum);
  // free memory
  mpq_clears(sum, term, stop2, fact2q, NULL);
  mpz_clears(fact1, fact2, power, stop1, NULL);
 }
 int main(int argc, char** argv) {
  /* if no argument is given, show the usage */
  if (argc != 2) {
    fprintf(stderr, "usage: %s D\n", argv[0]);
    fprintf(stderr, "Computes γ up to D decimal places.\n");
    return EXIT_FAILURE;
  }
  mpq_t exact, indicator;
  mpq_inits(exact, indicator, NULL);
  // value of γ up to 500 digits
  mpq_set_str(
    exact,
    "47342467929426395252720948664321583815681737620205"
    "78046578043524355342995304421105234825234769529727"
    "60710541774258570118818437511973945213031180449905"
    "77067110892216494927227098092412641670668896104172"
    "59380467495735805599918127376547804186055507379270"
    "09659478275476110597673948659342592173551684412254"
    "47940426607883070678658320829056222506449877887289"
    "02099638646695565284675455912794915138438516540912"
    "22061900908485016328626399040675802962645141413722"
    "567840383761005003563012969560659130635723002086605/"
    "82018681765164048732047980836753451023877954010252"
    "60062364748361667340168652059998708337602423525120"
    "45225158774173869894826877890589130978987229877889"
    "33367849273189687823618289122425446493605087108634"
    "04387981302669131224273324182166778131513056804533"
    "58955006355665628938266331979307689540884269372365"
    "76288367811322713649805442241450184023209087215891"
    "55369788474437679223152173114447113970483314961392"
    "48250188991402851129033493732164230227458717486395"
    "514436574417275149404197774547389507462779807727616",
    10);
  // number with decimal expansion equal to
  // 0123456789 periodic: useful to count digits
  mpq_set_str(
      indicator,
      "137174210/1111111111",
      10);
  // number of decimal places
  size_t D = atol(argv[1]);
  // number of terms
  size_t N = floor(1.0/4 * log(10) * (double)D);
  /* calculate series */
  mpq_t A, B, C;
  mpq_inits(A, B, C, NULL);
  a_series(A, N);
  b_series(B, N);
  c_series(C, N);
  /* calculate γ... */
  mpq_t gamma, corr, logn;
  mpq_inits(gamma, corr, logn, NULL);
  mpq_log_ui(logn, N, D);
  mpq_div(gamma, A, B);
  mpq_div(corr, C, B);
  mpq_div(corr, corr, B);
  mpq_sub(gamma, gamma, corr);
  mpq_sub(gamma, gamma, logn);
  /* calculate difference */
  mpq_t diff;
  mpq_init(diff);
  mpq_sub(diff, gamma, exact);
  /* print results */
  size_t d = D+1 > 50 ? 50 : D+1;
  fprintf(stderr, "[main] N: %ld\n", N);
  mpq_fprintf(stderr, d, "[main] approx:\t%q...\n", gamma);
  mpq_fprintf(stderr, d, "[main] exact:\t%q...\n",  exact);
  mpq_fprintf(stderr, d, "[main] diff:\t%q...\n",   diff);
  mpq_fprintf(stderr, d, "[main] digits:\t%q\n",    indicator);
  mpq_fprintf(stdout, D+1, "%q\n", gamma);
  // free memory
  mpq_clears(A, B, C, gamma, corr, logn,
             exact, indicator, diff, NULL);
  return EXIT_SUCCESS;
 }
--- a/ex-2/fancy.c
+++ b/ex-2/fancy.c
@ -0,0 +1,89 @@
 #include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
 // The Euler-Mascheroni constant is computed through the formula:
 //
 //    γ = A(N)/B(N) - ln(N)
 //
 // with:
 //
 //    A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k)
 //    B(N) = Σ_(k = 0)^(k_max) (N^k/k!)
 //    H(k) = Σ_(j = 1)^(k) (1/k)
 //
 // where N is computed from D as written below and k_max is the value
 // at which there is no difference between two consecutive terms of
 // the sum because of double precision.
 //
 // source: http://www.numberworld.org/y-cruncher/internals/formulas.html
 // Partial harmonic sum h
 double harmonic_sum(double n) {
  double sum = 0;
  for (double k = 1; k < n+1; k++) {
    sum += 1/k;
  }
  return sum;
 }
 // A series 
 double a_series(int N) {
  double sum = 0;
  double prev = -1;
  for (double k = 1; sum != prev; k++) {
    prev = sum;
    sum += pow(((pow(N, k))/(tgamma(k+1))), 2) * harmonic_sum(k);
  }
  return sum;
 }
 // B series
 double b_series(int N){
  double sum = 0;
  double prev = -1;
  for (double k = 0; sum != prev; k++) {
    prev = sum;
    sum += pow(((pow(N, k))/(tgamma(k+1))), 2);
  }
  return sum;
 }
 double c_series(int N) {
  double sum = 0;
  for (double k = 0; k < N; k++) {
    sum += pow(tgamma(2*k + 1), 3)/(pow(tgamma(k + 1), 4) * pow(16.0*N, (int)2*k));
  }
  return sum/(4.0*N);
 }
 // Takes in input the number D of desired correct decimals
 // Best result obtained with D = 15, N = 10
 int main(int argc, char** argv) {
  double exact =
    0.57721566490153286060651209008240243104215933593992;
  // If no argument is given is input, an error signal is displayed
  // and the program quits
  if (argc != 2) {
    fprintf(stderr, "usage: %s D\n", argv[0]);
    fprintf(stderr, "Computes γ up to D decimal places.\n");
    return EXIT_FAILURE;
  }
  int N = floor(2.0 + 1.0/4 * log(10) * (double)atoi(argv[1]));
  double A = a_series(N);
  double B = b_series(N);
  double C = c_series(N);
  double gamma = A/B - C/(B*B) - log(N);
  printf("N: %d\n", N);
  printf("approx:\t%.30f\n", gamma);
  printf("true:\t%.30f\n", exact);
  printf("diff:\t%.30f\n", fabs(gamma - exact));
  printf("\t  123456789 123456789 123456789\n");
  return EXIT_SUCCESS;
 }
--- a/ex-2/limit.c
+++ b/ex-2/limit.c
@ -0,0 +1,43 @@
 #include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
 /* Compute the Euler_Mascheroni constant through the formula:
 *
 * γ = lim_{M → +inf} sum_{k  = 1}^{M} bin{M, k} (-1)^k / k * ln(Γ(k+1))
 *
 * Best result is obtained for M = 41.
 * Values from 1 to 100 were checked.
 *
 */
 double exact = 0.577215664901532860;
 int main() {
  double best = 0, num = 0;
  double sum = 0, prev;
  for (double M = 1; M < 60; M++) {
    prev = sum;
    sum = 0;
    for (double k = 1; k <= M; k++) {
      sum += (tgamma(M + 1))/(tgamma(k + 1)*tgamma(M - k + 1)) * pow(-1, k)/k
        * log(tgamma (k + 1));
    }
    printf("M: \t%d\t", (int)M);
    printf("diff:\t%.15f\n", fabs(sum - exact));
    if ((fabs(sum - exact) < fabs(prev - exact)) &&
        (fabs(sum - exact) < fabs(best - exact))){
      best = sum;
      num = M;
    }
  }
  printf("M: %d\n", (int)num);
  printf("approx:\t%.15f\n", best);
  printf("true:\t%.15f\n", exact);
  printf("diff:\t%.15f\n", fabs(best - exact));
  printf("\t  123456789_12345\n");
  return EXIT_SUCCESS;
 }
--- a/ex-2/naive.c
+++ b/ex-2/naive.c
@ -0,0 +1,57 @@
 #include <stdio.h>
 #include <math.h>
 /* Naive approach using the definition
 * of γ as the difference between log(n)
 * and the Rieman sum of its derivative (1/n).
 *
 * The result is:
 *
 * approx:	0.57721 56648 77325
 * true: 	  0.57721 56649 01533
 * diff:	  0.00000 00000 24207
 *
 * The convergence is logarithmic and so
 * slow that the double precision runs out
 * at the 10th decimal digit.
 *
 */
 double exact = 0.577215664901532860;
 double partial_sum(size_t n) {
  double sum = 0;
  for (double k = 1; k <= n; k++) {
    sum += 1/k;
  }
  return sum - log(n);
 }
 int main () {
  size_t prev_n, n = 2;
  double prev_diff, diff=1;
  double approx;
  /* Compute the approximation
   * and the difference from the true value.
   * Stop when the difference starts increasing
   * due to roundoff errors.
   *
   */
  do {
    prev_n = n;
    n *= 10;
    prev_diff = diff;
    approx = partial_sum(n);
    diff = fabs(exact - approx);
    printf("n:\t%.0e\t", (double)n);
    printf("diff:\t%e\n", diff);
  } while (diff < prev_diff);
  printf("n:\t%.0e\n", (double)prev_n);
  printf("approx:\t%.15f\n", approx);
  printf("true:\t%.15f\n", exact);
  printf("diff:\t%.15f\n", prev_diff);
  printf("\t  123456789_12345\n");
 }
--- a/ex-2/recip.c
+++ b/ex-2/recip.c
@ -0,0 +1,63 @@
 #include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
 /* Compute the Euler-Mascheroni constant through the formula
 * of the reciprocal Γ function:
 *
 *   1/Γ(z) = z*e^{γz} pro_{k = 1}^{+ inf} ((1 + z/k) e^{- z/k})
 *
 * Thus:
 *
 *   γ = - 1/z * ln(z*Γ(z)*pro{k = 1}^{+ inf} (1 + z/k) e^{- z/k})
 *
 * Product stops when there is no difference between two consecutive
 * terms of the productory due to limited precision.
 *
 * Range of z given in input as min, max and step.
 *
 */
 double exact = 0.577215664901532860;
 int main(int argc, char** argv) {
  double min = (double)atof(argv[1]);
  double max = (double)atof(argv[2]);
  double step = (double)atof(argv[3]);
  double prev_gamma, gamma = 0;
  double Z;
  double best = 0;
  for (double z = min; z <= max; z += step) {
    prev_gamma = gamma;
    double pro = 1;
    double prev = -1;
    for (double k = 1; pro != prev; k++) {
      prev = pro;
      pro *= (1 + z/k) * exp(- z/k);
      if (z == 9 && pro == prev) printf("k:\t%.0f\n", k);
    }
    double gamma = - 1/z * log(z*tgamma(z)*pro);
    printf("z:\t%.2f\t", z);
    printf("diff:\t%.20f\n", fabs(gamma - exact));
    if ((fabs(gamma - exact) < fabs(prev_gamma - exact)) &&
        (fabs(gamma - exact) < fabs(best - exact))){
      best = gamma;
      Z = z;
    }
  }
  printf("z:\t%.2f\n", Z);
  printf("approx:\t%.20f\n", best);
  printf("true:\t%.20f\n", exact);
  printf("diff:\t%.20f\n", best - exact);
  printf("\t   123456789 123456789 123456789\n");
  return EXIT_SUCCESS;
 }
--- a/ex-3/chisquared.c
+++ b/ex-3/chisquared.c
@ -0,0 +1,292 @@
 #include "common.h"
 #include "chisquared.h"
 #include <gsl/gsl_multifit_nlinear.h>
 #include <gsl/gsl_blas.h>
 /* Minimisation function
 *
 * The χ² is defined as
 * 
 * χ² = |f|²
 *    = Σi fi²
 *
 * where fi = (Oi - Ei)/√Ei are the residuals.
 *
 * This function takes the parameters (α,β,γ) as
 * a gsl_vector `par`, the sample `data` and a
 * gsl_vector `f`, that will store the results
 * `fi`.
 */
 int chisquared_f(const gsl_vector *par,
                 void *data_, gsl_vector *f) {
  struct hist data = *((struct hist*) data_);
  struct event event;
  double max, min;
  double delta_th, delta_ph;
  double expected, observed;
  /* Loop over the (i,j) bins of the 2D histogram.
   * The index k of the k-th component of f is actually:
   *
   *   k = i * φbins + j
   *
   * Note: this is called the C-order.
   */
  for (size_t i = 0; i < data.h->nx; i++)
    for (size_t j = 0; j < data.h->ny; j++) {
      /* Get the bin ranges and width for θ,φ.
       * The event is supposed to happen in the
       * midpoint of the range.
       */
      gsl_histogram2d_get_xrange(data.h, i, &max, &min);
      event.th = (max + min)/2;
      delta_th = (max - min);
      gsl_histogram2d_get_yrange(data.h, j, &max, &min);
      event.ph = (max + min)/2;
      delta_ph = (max - min);
      /* O = observed number of events in the k-th bin
       * E = expected number of events in k-th bin
       *
       * E is given by:
       *
       *        /---> total number of events
       *       /
       *   E = N  F(α,β,γ; θ,φ) Δθ Δφ sin(θ)
       *         \__________________________/
       *                      |
       *                      `-> probability
       */
      observed = gsl_histogram2d_get(data.h, i, j);
      expected = data.n * distr(par, &event)
               * delta_th * delta_ph * sin(event.th);
      /* Return an error (invalid domain) if
       * the current bin is empty. That would
       * be division by zero.
       */
      if (expected < 0) {
        //fprintf(stderr, "[warning] bin %ld:%ld p<0 (%.2g, %.2g, %.2g)\n",
        //        i, j, gsl_vector_get(par, 0),
        //              gsl_vector_get(par, 1),
        //              gsl_vector_get(par, 2));
        expected = 1e-6;
      }
      gsl_vector_set(
        f, i * data.h->ny + j,
        (observed - expected)/sqrt(expected));
    }
  return GSL_SUCCESS; 
 }
 /* Jacobian function
 *
 * The gradient of the χ² function is:
 *
 *   ∇χ² = ∇ |f|²
 *       = 2 J^T⋅f
 *
 * where J is the jacobian of the (vector)
 * function f. Its entries are given by
 *
 *  2 Jij = 2 ∂fi/∂xj
 *        = -(Oi + Ei)/√Ei 1/Fi (∇Fi)j
 *
 */
 int chisquared_df(const gsl_vector *par,
                  void *data_, gsl_matrix *jac) {
  struct hist data = *((struct hist*) data_);
  struct event event;
  double max, min;
  double delta_th, delta_ph, prob;
  double expected, observed;
  gsl_vector *grad = gsl_vector_alloc(3);
  for (size_t i = 0; i < data.h->nx; i++)
    for (size_t j = 0; j < data.h->ny; j++) {
      /* Get the bin ranges for θ,φ.
       * The event is supposed to happen in the
       * midpoint of the range.
       */
      gsl_histogram2d_get_xrange(data.h, i, &max, &min);
      event.th = (max + min)/2;
      delta_th = (max - min);
      gsl_histogram2d_get_yrange(data.h, j, &max, &min);
      event.ph = (max + min)/2;
      delta_ph = (max - min);
      prob = distr(par, &event);
      observed = gsl_histogram2d_get(data.h, i, j);
      expected = data.n * prob * delta_th * delta_ph * sin(event.th);
      if (expected < 0)
        expected = 1e-6;
      if (observed == 0)
        observed = 1;
      /* Compute the gradient of F(α,β,γ; θi,φi),
       * then rescale it and set it to the i-th row
       * of the jacobian.
       */
      grad_distr(par, &event, grad);
      gsl_vector_scale(
          grad,
          -0.5*(observed + expected)/sqrt(expected) * 1/prob);
      gsl_matrix_set_row(jac, i * data.h->ny + j, grad);
    }
  // free memory
  gsl_vector_free(grad);
  return GSL_SUCCESS;
 }
 /* This is a callback function called during
 * the minimisation to show the current progress.
 * It prints:
 *   1. the condition number cond(J) of the jacobian
 *   2. the reduced χ² value
 *   3. the current parameters
 */
 void callback(const size_t iter, void *params,
              const gsl_multifit_nlinear_workspace *w) {
  gsl_vector *f = gsl_multifit_nlinear_residual(w);
  gsl_vector *x = gsl_multifit_nlinear_position(w);
  //if (iter % 4 != 0)
  //  return;
  /* Compute the condition number of the
   * jacobian and the reduced χ² (χ²/d).
   */
  double rcond, chi2;
  int d = w->fdf->n - w->fdf->p;
  gsl_multifit_nlinear_rcond(&rcond, w);
  gsl_blas_ddot(f, f, &chi2);
  fprintf(stderr, "%2ld\t", iter);
  vector_fprint(stderr, x);
  fprintf(
    stderr, "\tcond(J)=%.4g, χ²/d=%.4g\n\n",
    1.0/rcond,
    chi2/d);
 }
 /* Minimum χ² estimation of of the parameters
 * α,β,γ.
 */
 min_result minChiSquared(struct hist data) {
  /* Initialise the function to be minimised */
  gsl_multifit_nlinear_fdf chisquared;
  chisquared.f      = chisquared_f;             // function
  chisquared.df     = chisquared_df;            // gradient
  chisquared.fvv    = NULL;                     // something for geodesic accel.
  chisquared.n      = data.h->nx * data.h->ny;  // numeber of data points
  chisquared.p      = 3;                        // numeber of data points
  chisquared.params = &data;                    // histogram data
  /* Initialise the minimisation workspace */
  gsl_multifit_nlinear_parameters options =
    gsl_multifit_nlinear_default_parameters();
  gsl_multifit_nlinear_workspace *w = 
    gsl_multifit_nlinear_alloc(
      gsl_multifit_nlinear_trust,  // minimisation method
      &options,                    // minimisation options
      data.h->nx * data.h->ny,     // number of data points
      3);                          // number of (unknown) params
  /* Set the starting point of the 
   * minimisation.
   */
  gsl_vector *par = gsl_vector_alloc(3);
  gsl_vector_set(par, 0,  0.50);  // α
  gsl_vector_set(par, 1,  0.01);  // β
  gsl_vector_set(par, 2, -0.10);  // γ
  gsl_multifit_nlinear_init(par, &chisquared, w);
  /* Configure the solver and run the minimisation
   * using the high-level driver.
   */
  fputs("\n# least χ²\n\n", stderr);
  int status, info;
  status = gsl_multifit_nlinear_driver(
    100,      // max number of iterations
    1e-8,     // tolerance for step test: |δ| ≤ xtol(|x| + xtol)
    1e-8,     // tolerance for gradient test: max |∇i⋅ xi| ≤ gtol⋅xi
    1e-8,     // tolerance for norm test
    callback, // function called on each iteration
    NULL,     // callback parameters 
    &info,    // stores convergence information
    w);       // minimisation workspace
  fprintf(stderr, "status: %s\n", gsl_strerror(status));
  if (status != GSL_SUCCESS)
    fprintf(stderr, "info: %s\n", gsl_strerror(info));
  fprintf(stderr, "iterations: %ld\n", gsl_multifit_nlinear_niter(w));
  /* Store results in the min_result type.
   * Note: We allocate a new vector/matrix for the
   * parameters because `w->x` will be freed
   * along with the workspace `w`.
   */
  min_result res;
  res.par = gsl_vector_alloc(3);
  res.err = gsl_vector_alloc(3);
  res.cov = gsl_matrix_alloc(3, 3);
  gsl_vector_memcpy(res.par, gsl_multifit_nlinear_position(w));
  /* Compute the covariance of the fit parameters.
   * The covariance Σ is estimated by
   *
   *   Σ = H⁻¹
   *
   * where H is the hessian of the χ², which is
   * itself approximated by the jacobian as
   *
   *   H = J^T⋅J
   *
   */
  gsl_matrix *jac = gsl_multifit_nlinear_jac(w);
  gsl_multifit_nlinear_covar(jac, 0.0, res.cov);
  /* Compute the standard errors
   * from the covariance matrix.
   */
  gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
  gsl_vector_memcpy(res.err, &diagonal.vector);
  vector_map(&sqrt, res.err);
  /* Compute the reduced χ² */
  double chi2;
  gsl_vector *f = gsl_multifit_nlinear_residual(w);
  gsl_blas_ddot(f, f, &chi2);
  chi2 /= chisquared.n - chisquared.p;
  /* Print the results */
  fputs("\n## results\n", stderr);
  fprintf(stderr, "\n* χ²/d: %.3f\n", chi2);
  fputs("\n* parameters:\n    ", stderr);
  vector_fprint(stderr, res.par);
  fputs("\n* covariance:\n", stderr);
  matrix_fprint(stderr, res.cov);
  // free memory
  gsl_multifit_nlinear_free(w);
  gsl_vector_free(par);
  return res;
 }
--- a/ex-3/chisquared.h
+++ b/ex-3/chisquared.h
@ -0,0 +1,17 @@
 #include "common.h"
 #include <gsl/gsl_histogram2d.h>
 /* Structure containing the histogram
 * for the χ² minimisation.
 * n: number of events
 * h: 2D histogram
 */
 struct hist {
  size_t n;
  gsl_histogram2d *h;
 };
 /* Minimum χ² estimation of of the parameters
 * α,β,γ.
 */
 min_result minChiSquared(struct hist data);
--- a/ex-3/common.c
+++ b/ex-3/common.c
@ -0,0 +1,126 @@
 #include "common.h"
 /* `vector_fprint(fp, x)` prints to file `fp`
 * the components of a gsl_vector.
 */
 void vector_fprint(FILE *fp, const gsl_vector *x) {
  fprintf(fp, "[");
  for (size_t i = 0; i < x->size; i++) {
    fprintf(fp, i < x->size -1 ? "%10.4g, " : "%10.4g",
            gsl_vector_get(x, i));
  }
  fprintf(fp, "]\n");
 }
 /* `matrix_fprint(fp, x)` prints to file `fp`
 * the components of a gsl_vector.
 */
 void matrix_fprint(FILE *fp, const gsl_matrix *x) {
  gsl_vector row;
  for (size_t i = 0; i < x->size1; i++) {
    row = gsl_matrix_const_row(x, i).vector;
    fputs("    ", fp);
    vector_fprint(fp, &row);
  }
 }
 /* `vector_map(f, v)` applies the function
 * `f` to each element of the vector `v`, inplace.
 * The function return a GSL exit code to either
 * report success or a failure.
 */
 int vector_map(double (*f)(double x), gsl_vector *v) {
  double y;
  for (size_t i = 0; i < v->size; i++ ) {
    y = (*f)(gsl_vector_get(v, i));
    gsl_vector_set(v, i, y);
  }
  return GSL_SUCCESS;
 }
 /* Angular distribution function F(θ,φ; α,β,γ)
 * Takes three parameters α,β,γ and the angles θ,φ.
 */
 double distr(const gsl_vector *par,
             const struct event *event){
  double a = gsl_vector_get(par, 0),
         b = gsl_vector_get(par, 1),
         c = gsl_vector_get(par, 2);
  double th = event->th,
         ph = event->ph;
  double A = 3 /(4 * M_PI);
  double B = 0.5 * (1 - a);
  double C = 0.5 * (3 * a - 1) * pow(cos(th), 2);
  double D = b * pow(sin(th), 2) * cos(2 * ph);
  double E = sqrt(2) * c * sin(2 * th) * cos(ph);
  return A * (B + C - D - E);
 }
 /* `grad_distr(par, event, grad)`
 * computes the gradient of F(α,β,γ; α,β) as:
 *
 * ⎡ 3/2⋅cos²(θ) - 1/2 ⎤
 * ⎢                   ⎥ 
 * ⎢  sin²(θ)⋅cos(2φ)  ⎥ 3/(4π) 
 * ⎢                   ⎥
 * ⎣ √2⋅sin(2θ)⋅cos(φ) ⎦
 *
 * where `par` is [α,β,γ], `event` is the angle
 * structure. The results is stored in the
 * gsl_vector `grad`
 */
 void grad_distr(const gsl_vector *par,
                const struct event *event,
                gsl_vector *grad) {
  double th = event->th,
         ph = event->ph;
  gsl_vector_set(grad, 0, 1.5*pow(cos(th), 2) - 0.5);
  gsl_vector_set(grad, 1, -pow(sin(th), 2) * cos(2*ph));
  gsl_vector_set(grad, 2, -sqrt(2)*sin(2*th) * cos(ph));
  gsl_vector_scale(grad, 3.0/(4.0 * M_PI));
 }
 /* Test the compatibility between the original
 * parameters `def` and the estimates `est.par`
 * with standard errors `est.err`.
 */
 void compatibility(gsl_vector *def,
            min_result est) {
  char* names[] = { "α", "β", "γ" };
  double a, b;
  double t, sigma, alpha;
  fprintf(stderr, "| par | σ        | α-value  |\n");
  fprintf(stderr, "|-----|----------|----------|\n");
  for (size_t i = 0; i < def->size; i++) {
    // discrepancy
    a = gsl_vector_get(def, i);
    b = gsl_vector_get(est.par, i);
    sigma = gsl_vector_get(est.err, i);
    // normal t-statistic
    t = fabs(a - b)/sigma;
    // α-value / significance
    alpha = 1 - erf(t/sqrt(2));
    fprintf(stderr, "|  %s  | %-8.2g | %-8.2g |\n",
            names[i], sigma, alpha);
  }
 }
 /* Free a min_result structure.
 */
 void min_result_free(min_result x) {
  gsl_vector_free(x.par);
  gsl_matrix_free(x.cov);
 }
--- a/ex-3/common.h
+++ b/ex-3/common.h
@ -0,0 +1,88 @@
 #pragma once
 #include <math.h>
 #include <gsl/gsl_vector.h>
 #include <gsl/gsl_matrix.h>
 /* Structure that holds the θ,φ angles 
 * of a single event of the sample.
 */
 struct event {
  double th;
  double ph;
 };
 /* Structure that contains the sample events
 * and its size.
 */
 struct sample {
  const size_t size;
  struct event *events;
 };
 /* This structure contains the result of a
 * minimisation method: estimated parameters,
 * standard error and their covariance.
 */
 typedef struct {
  gsl_vector *par;
  gsl_vector *err;
  gsl_matrix *cov;
 } min_result;
 /* `vector_fprint(fp, x)` prints to file `fp`
 * the components of a gsl_vector.
 */
 void vector_fprint(FILE *fp, const gsl_vector *x);
 /* `matrix_fprint(fp, x)` prints to file `fp`
 * the components of a gsl_vector.
 */
 void matrix_fprint(FILE *fp, const gsl_matrix *x);
 /* `vector_map(f, v)` applies the function
 * `f` to each element of the vector `v`, inplace.
 * The function return a GSL exit code to either
 * report success or a failure.
 */
 int vector_map(double (*f)(double x), gsl_vector *v);
 /* Angular distribution function F(θ,φ; α,β,γ)
 * Takes three parameters α,β,γ and the angles θ,φ.
 */
 double distr(const gsl_vector *par,
             const struct event *event);
 /* `grad_distr(par, event, grad)`
 * computes the gradient of F(α,β,γ; α,β) as:
 *
 * ⎡ 1/2 - 3/2⋅cos²(θ) ⎤
 * ⎢                   ⎥ 
 * ⎢  sin²(θ)⋅cos(2φ)  ⎥ 3/(4π) 
 * ⎢                   ⎥
 * ⎣ √2⋅sin(2θ)⋅cos(φ) ⎦
 *
 * where `par` is [α,β,γ], `event` is the angle
 * structure. The results is stored in the
 * gsl_vector `grad`
 */
 void grad_distr(const gsl_vector *par,
                const struct event *event,
                gsl_vector *grad);
 /* Test the compatibility between the original
 * parameters `def` and the estimates `est.par`
 * with standard errors `est.err`.
 */
 void compatibility(gsl_vector *def, min_result est);
 /* Free a min_result structure.
 */
 void min_result_free(min_result x);
--- a/ex-3/likelihood.c
+++ b/ex-3/likelihood.c
@ -0,0 +1,239 @@
 #include "likelihood.h"
 #include <gsl/gsl_errno.h>
 #include <gsl/gsl_multimin.h>
 #include <gsl/gsl_linalg.h>
 /* `f_logL(par, &sample)`
 * gives the negative log-likelihood for the sample `sample`.
 */
 double f_logL(const gsl_vector *par, void *sample_) {
  struct sample sample = *((struct sample*) sample_);
  double sum = 0;
  for (size_t i = 0; i < sample.size; i++)
    sum += log(fabs(distr(par, &sample.events[i])));
  /* Rescale the function to avoid
   * round-off errors during minimisation
   */
  sum *= 1e-5;
  return -sum;
 }
 /* `fdf_logL(par, &sample, fun, grad)`
 * simultaneously computes the gradient and the value of
 * the negative log-likelihood for the sample `sample`
 * and stores the results in `fun` and `grad` respectively.
 */
 void fdf_logL(const gsl_vector *par, void *sample_,
              double *fun, gsl_vector *grad) {
  struct sample sample = *((struct sample*) sample_);
  double prob;
  struct event *event;
  gsl_vector *term = gsl_vector_alloc(3);
  // Note: fun/grad are *not* guaranteed to be 0
  gsl_vector_set_zero(grad);
  *fun = 0;
  for (size_t i = 0; i < sample.size; i++) {
    event = &sample.events[i];
    prob = distr(par, event);
    /* The gradient of log(F(α,β,γ)) is:
     * 1/F(α,β,γ) ∇F(α,β,γ)
     */
    grad_distr(par, event, term);
    gsl_vector_scale(term, -1.0/prob);
    gsl_vector_add(grad, term);
    // compute function
    *fun += log(fabs(prob));
  }
  /* Rescale the function to avoid
   * round-off errors during minimisation
   */
  *fun = -*fun*1e-5;
  gsl_vector_scale(grad, 1e-5);
  // free memory
  gsl_vector_free(term);
 }
 /* `df_logL(par, &sample, grad)`
 * gives the gradient of the negative log-likelihood
 * for the sample `sample`. The result is stored in
 * the `grad` gsl_vector.
 */
 void df_logL(const gsl_vector *par, void *sample, gsl_vector *grad) {
  double fun;
  fdf_logL(par, sample, &fun, grad);
 }
 /* `hf_logL(par, &sample, hess)`
 * gives the hessian matrix of the negative log-likelihood
 * for the sample `sample`. The result is stored in
 * the `hess` gsl_matrix.
 */
 gsl_matrix* hf_logL(const gsl_vector *par, void *sample_) {
  struct sample sample = *((struct sample*) sample_);
  gsl_vector *grad = gsl_vector_calloc(3);
  gsl_matrix *term = gsl_matrix_calloc(3, 3);
  gsl_matrix *hess = gsl_matrix_calloc(3, 3);
  double prob;
  struct event *event;
  for (size_t n = 0; n < sample.size; n++) {
    /* Compute gradient and value of F */
    event = &sample.events[n];
    prob = distr(par, event);
    grad_distr(par, event, grad);
    /* Compute the hessian matrix of -log(F):
     * H_ij = 1/F² ∇F_i ∇F_j
     */
    for (size_t i = 0; i < 3; i++)
    for (size_t j = 0; j < 3; j++) {
      gsl_matrix_set(
        term, i, j,
        gsl_vector_get(grad, i) * gsl_vector_get(grad, j));
    }
    gsl_matrix_scale(term, 1.0/pow(prob, 2));
    gsl_matrix_add(hess, term);
  }
  // free memory
  gsl_vector_free(grad);
  gsl_matrix_free(term);
  return hess;
 }
 /* Maximum likelihood estimation of the parameters
 * α,β,γ.
 */
 min_result maxLikelihood(struct sample *sample) {
  /* Starting point: α,β,γ = "something close
   * to the solution", because the minimisation
   * seems pretty unstable.
   */
  gsl_vector *par = gsl_vector_alloc(3);
  gsl_vector_set(par, 0, 0.79);
  gsl_vector_set(par, 1, 0.02);
  gsl_vector_set(par, 2, -0.17);
  /* Initialise the minimisation */
  gsl_multimin_function_fdf likelihood;
  likelihood.n      = 3;
  likelihood.f      = f_logL;
  likelihood.df     = df_logL;
  likelihood.fdf    = fdf_logL;
  likelihood.params = sample;
  /* The minimisation technique used
   * is the conjugate gradient method. */
  const gsl_multimin_fdfminimizer_type *type =
    gsl_multimin_fdfminimizer_conjugate_fr;
  gsl_multimin_fdfminimizer *m = 
    gsl_multimin_fdfminimizer_alloc(type, 3);
  gsl_multimin_fdfminimizer_set(
    m,            // minimisation technique
    &likelihood,  // function to minimise
    par,          // initial parameters/starting point
    1e-5,         // first step length
    0.1);         // accuracy
  size_t iter;
  int status = GSL_CONTINUE;
  /* Iterate the minimisation until the gradient
   * is smaller that 10^-4 or fail.
   */
  fputs("\n# maximum likelihood\n\n", stderr);
  for (iter = 0; status == GSL_CONTINUE && iter < 100; iter++) {
    status = gsl_multimin_fdfminimizer_iterate(m);
    if (status)
      break;
    status = gsl_multimin_test_gradient(m->gradient, 1e-5);
    /* Print the iteration, current parameters
     * and gradient of the log-likelihood.
     */
    if (iter % 5 == 0 || status == GSL_SUCCESS) {
      fprintf(stderr, "%2ld\t", iter);
      vector_fprint(stderr, m->x);
      fputs("\t", stderr);
      vector_fprint(stderr, m->gradient);
      putc('\n', stderr);
    }
  }
  fprintf(stderr, "status: %s\n", gsl_strerror(status));
  fprintf(stderr, "iterations: %ld\n", iter);
  /* Store results in the min_result type.
   * Note: We allocate a new vector for the
   * parameters because `m->x` will be freed
   * along with m.
   */
  min_result res;
  res.par = gsl_vector_alloc(3);
  res.err = gsl_vector_alloc(3);
  res.cov = gsl_matrix_alloc(3, 3);
  gsl_vector_memcpy(res.par, m->x);
  /* Compute the standard errors of the estimates.
   * The Cramér–Rao bound states that the covariance
   * matrix of the estimates is
   * 
   *   Σ ≥ I⁻¹ = (-H)⁻¹
   *
   * where:
   *   - I is called the Fisher information,
   *   - H is the hessian of log(L) at
   *     the maximum likelihood.
   */
  res.cov = hf_logL(m->x, sample);
  /* Invert H by Cholesky decomposition:
   *
   * H = LL^T ⇒ H⁻¹ = L^T⁻¹L⁻¹
   *
   * Note: H is positive defined (because -logL is
   * minimised) and symmetric so this is valid.
   *
   */
  gsl_linalg_cholesky_decomp(res.cov);
  gsl_linalg_cholesky_invert(res.cov);
  /* Compute the standard errors
   * from the covariance matrix.
   */
  gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
  gsl_vector_memcpy(res.err, &diagonal.vector);
  vector_map(&sqrt, res.err);
  fputs("\n## results\n", stderr);
  fputs("\n* parameters:\n    ", stderr);
  vector_fprint(stderr, res.par);
  fputs("\n* covariance (Cramér–Rao lower bound):\n", stderr);
  matrix_fprint(stderr, res.cov);
  // free memory
  gsl_multimin_fdfminimizer_free(m);
  return res;
 }
--- a/ex-3/likelihood.h
+++ b/ex-3/likelihood.h
@ -0,0 +1,6 @@
 #include "common.h"
 /* Maximum likelihood estimation of the parameters
 * α,β,γ.
 */
 min_result maxLikelihood(struct sample *sample);
--- a/ex-3/main.c
+++ b/ex-3/main.c
@ -0,0 +1,158 @@
 #include <stdlib.h>
 #include <string.h>
 #include "common.h"
 #include "likelihood.h"
 #include "chisquared.h"
 #include <gsl/gsl_rng.h>
 #include <gsl/gsl_histogram2d.h>
 /* Options for the program */
 struct options {
  int show_hist;      // whether to print the 2D histogram
  int only_hist;      // whether to skip the minimisations
  size_t num_events;  // number of generated events
  size_t bins_theta;  // number of bins for θ
  size_t bins_phi;    // number of bins for φ
 };
 int main(int argc, char **argv) {
  /* Set default options */
  struct options opts;
  opts.num_events = 50000;
  opts.show_hist  = 0;
  opts.only_hist  = 0;
  opts.bins_theta = 30;
  opts.bins_phi   = 60;
  /* Process CLI arguments */
  for (size_t i = 1; i < argc; i++) {
         if (!strcmp(argv[i], "-i")) opts.show_hist  = 1;
         if (!strcmp(argv[i], "-I")) opts.only_hist  = 1;
    else if (!strcmp(argv[i], "-n")) opts.num_events = atol(argv[++i]);
    else if (!strcmp(argv[i], "-t")) opts.bins_theta = atol(argv[++i]);
    else if (!strcmp(argv[i], "-p")) opts.bins_phi   = atol(argv[++i]);
    else {
      fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]);
      fprintf(stderr, "\t-h\tShow this message.\n");
      fprintf(stderr, "\t-i\tPrint the histogram to stdout.\n");
      fprintf(stderr, "\t-I\tPrint *only* the histogram.\n");
      fprintf(stderr, "\t-n N\tThe number of events to generate.\n");
      fprintf(stderr, "\t-t N\tThe number of θ bins.\n");
      fprintf(stderr, "\t-p N\tThe number of φ bins.\n");
      return EXIT_FAILURE;
    }
  }
  /* Initialize an RNG. */
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  /* Initialise the sample data structure with
   * the number of events and allocate an array
   * for the events.
   */
  struct sample s = { opts.num_events, NULL };
  s.events = calloc(s.size, sizeof(struct event));
  /* Prepare a 2D histogram of the sample
   * with 30x60 bins.
   */
  gsl_histogram2d* hist =
    gsl_histogram2d_alloc(opts.bins_theta, opts.bins_phi);
  // set the ranges θ∈[0, π] and φ∈[0, 2π]
  gsl_histogram2d_set_ranges_uniform(hist, 0,M_PI, 0,2*M_PI);
  /* F(θ.φ) parameters we will generate
   * a sample with, and later try to recover.
   * Note: for this choice max F(θ,φ) ≈ 0.179.
   */
  gsl_vector *def_par = gsl_vector_alloc(3);
  gsl_vector_set(def_par, 0,  0.65);  // α
  gsl_vector_set(def_par, 1,  0.06);  // β
  gsl_vector_set(def_par, 2, -0.18);  // γ
  /* Generate the points (θ, φ) on the unit sphere
   * using the inverse transform:
   *   θ = acos(1 - 2X)
   *   φ = 2πY
   * where X,Y are random uniform variables in [0,1).
   * This ensures that θ,φ are uniformly distributed, then we
   * proceed to reject the events as follow:
   *   1. generate a y ∈ [0, max F(θ,φ)] uniformly
   *   2. reject the sample if y > F(θ,φ) else save it.
   */
  fputs("# event sampling\n\n", stderr);
  fprintf(stderr, "generating %ld events...", opts.num_events);
  struct event *e;
  for (size_t i = 0; i < s.size; i++){
    e = &s.events[i];
    do {
      e->th = acos(1 - 2*gsl_rng_uniform(r));
      e->ph = 2 * M_PI * gsl_rng_uniform(r);
    } while(0.2 * gsl_rng_uniform(r) > distr(def_par, e));
    // update the histogram
    gsl_histogram2d_increment(hist, e->th, e->ph);
  }
  fputs("done\n", stderr);
  // print histogram to stdout
  if (opts.show_hist || opts.only_hist) {
    fputs("\n# histogram\n", stderr);
    gsl_histogram2d_fprintf(stdout, hist, "%g", "%g");
  }
  if (!opts.only_hist) {
    /* Estimate the parameters α,β,γ by
     * the maximum likelihood method.
     */
    min_result like = maxLikelihood(&s);
    /* Estimate the parameters α,β,γ by 
     * a minimum χ² method.
     */
    struct hist data;
    data.h = hist;
    data.n = s.size;
    min_result chi2 = minChiSquared(data);
    /* Compare the results with the original ones
     * and check whether they are compatible.
     */
    fputs("\n# vector decay compatibility\n", stderr);
    fputs("\n## likelihood results\n\n", stderr);
    compatibility(def_par, like);
    fputs("\n## χ² results\n\n", stderr);
    compatibility(def_par, chi2);
    /* Test the isotropic decay hypothesys
     * with the χ² results
     */
    fputs("\n# isotropic hypotesys\n\n", stderr);
    /* If the decay is isotropic then F = 1/4π.
     * Solving for α,β,γ yields α=1/3,β=γ=0.
     */
    gsl_vector *iso_par = gsl_vector_alloc(3);
    gsl_vector_set(iso_par, 0, 0.33);  // α
    gsl_vector_set(iso_par, 1, 0.00);  // β
    gsl_vector_set(iso_par, 2, 0.00);  // γ
    compatibility(iso_par, chi2);
    // free memory
    min_result_free(like);
    min_result_free(chi2);
  };
  // free memory
  gsl_rng_free(r);
  free(s.events);
  gsl_histogram2d_free(hist);
  gsl_vector_free(def_par);
  return EXIT_SUCCESS;
 }
--- a/ex-3/plot.py
+++ b/ex-3/plot.py
@ -0,0 +1,31 @@
 #!/usr/bin/env python
 from pylab import *
 from mpl_toolkits.mplot3d import Axes3D
 from matplotlib import cm
 import sys
 xedges, yedges, counts = loadtxt(sys.stdin, unpack=True, usecols=[0,2,4])
 bins = (30, 60)
 counts = counts.reshape(bins)
 plt.rcParams['font.size'] = 15
 suptitle('Angular decay distribution')
 # subplot2grid((1, 3), (0, 0), colspan=2, aspect='equal')
 xlabel(r'$\phi$ (radians)')
 ylabel(r'$\theta$ (radians)')
 pcolor(yedges[:bins[1]], xedges[::bins[1]], counts)
 norm = colorbar(fraction=0.023, pad=0.04).norm
 show()
 # Ax = subplot2grid((1, 3), (0, 2), projection='3d')
 # Θ, φ = mgrid[0:pi:bins[0]*1j, 0:2*pi:bins[1]*1j]
 # X = 5 * sin(θ) * cos(φ)
 # Y = 5 * sin(θ) * sin(φ)
 # Z = 5 * cos(θ)
 # Ax.plot_surface(
 #     x, y, z, rstride=1, cstride=1,
 #     facecolors=cm.viridis(norm(counts)))
 # Axis('off')
 # show()
--- a/ex-3/results.md
+++ b/ex-3/results.md
@ -0,0 +1,85 @@
 # event sampling
 generating 50000 events...done
 # maximum likelihood
 0	[      0.79,       0.02,      -0.17]
 	[      0.18,   -0.06812,   0.001955]
 5	[    0.7894,    0.02022,      -0.17]
 	[    0.1789,   -0.06748,   0.002162]
 10	[    0.7709,    0.02725,    -0.1702]
 	[    0.1465,   -0.04898,   0.007551]
 15	[    0.6525,    0.06317,    -0.1725]
 	[ -0.002576,  -0.002313,     0.0161]
 20	[     0.654,    0.06135,    -0.1763]
 	[-2.098e-06,  0.0001837,  0.0001225]
 24	[    0.6541,    0.06125,    -0.1763]
 	[ -3.24e-06, -7.905e-07, -1.432e-06]
 status: success
 iterations: 25
 ## results
 * parameters:
    [         0,    -0.5106,      1.089]
 * covariance (Cramér–Rao lower bound):
    [ 6.604e-06, -3.474e-11, -1.798e-11]
    [-3.474e-11,  1.227e-05, -1.267e-10]
    [-1.798e-11, -1.267e-10,  9.564e-05]
 # least χ²
 0	[       0.5,       0.01,       -0.1]
 	cond(J)=inf, χ²/d=3.32
 4	[    0.6537,    0.05976,    -0.1736]
 	cond(J)=2.725, χ²/d=1.017
 status: success
 iterations: 7
 ## results
 * χ²/d: 1.017
 * parameters:
    [    0.6537,    0.05972,    -0.1736]
 * covariance:
    [   0.00304,  -1.66e-06,  2.383e-07]
    [ -1.66e-06,    0.00212,  1.505e-06]
    [ 2.383e-07,  1.505e-06,   0.001637]
 # vector decay compatibility
 ## likelihood results
 | par | σ        | α-value  |
 |-----|----------|----------|
 |  α  | 6.6e-06  | 0        |
 |  β  | 1.2e-05  | 0        |
 |  γ  | 9.6e-05  | 0        |
 ## χ² results
 | par | σ        | α-value  |
 |-----|----------|----------|
 |  α  | 0.003    | 0.22     |
 |  β  | 0.0021   | 0.89     |
 |  γ  | 0.0016   | 0.0001   |
 # isotropic hypotesys
 | par | σ        | α-value  |
 |-----|----------|----------|
 |  α  | 0.003    | 0        |
 |  β  | 0.0021   | 0        |
 |  γ  | 0.0016   | 0        |
--- a/ex-4/dip.c
+++ b/ex-4/dip.c
@ -0,0 +1,117 @@
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <gsl/gsl_rng.h>
 int main(int argc, char **argv)
  {
  // Set default options.
  //
  size_t N = 50000;          // number of events.
  size_t n = 50;             // number of bins.
  double p_max = 10;         // maximum value of momentum module.
  // Process CLI arguments.
  //
  for (size_t i = 1; i < argc; i++)
    {
         if (!strcmp(argv[i], "-N")) N     = atol(argv[++i]);
    else if (!strcmp(argv[i], "-n")) n     = atol(argv[++i]);
    else if (!strcmp(argv[i], "-p")) p_max = atof(argv[++i]);
    else
      {
      fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]);
      fprintf(stderr, "\t-h\tShow this message.\n");
      fprintf(stderr, "\t-N integer\tThe number of events to generate.\n");
      fprintf(stderr, "\t-n integer\tThe number of bins of the histogram.\n");
      fprintf(stderr, "\t-p float\tThe maximum value of momentum.\n");
      return EXIT_FAILURE;
      }
    }
  // Initialize an RNG.
  //
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  // Generate the angle θ uniformly distributed on a sphere using the
  // inverse transform:
  //
  //   θ = acos(1 - 2X)
  //
  // where X is a random uniform variable in [0,1), and the module p of
  // the vector:
  //
  //   p² = p_v² + p_h²
  // 
  // uniformly distributed between 0 and p_max. The two components are
  // then computed as:
  //
  //   p_v = p⋅cos(θ)
  //   p_h = p⋅sin(θ)
  //
  // The histogram will be updated this way.
  // The j-th bin where p_h goes in is given by:
  //
  //   step = p_max / n
  //   j = floor(p_h / step)
  //
  // Thus an histogram was created and a structure containing the number of
  // entries in each bin and the sum of |p_v| in each of them is created and
  // filled while generating the events.
  //
  struct bin
    {
    size_t amo;  // Amount of events in the bin.
    double sum;  // Sum of |p_v|s of all the events in the bin.
    };
  struct bin *histo = calloc(n, sizeof(struct bin));
  // Some useful variables.
  //
  double step = p_max / n;
  struct bin *b;
  double theta;
  double p;
  double p_v;
  double p_h;
  size_t j;
  for (size_t i = 0; i < N; i++)
    {
    // Generate the event.
    // 
    theta = acos(1 - 2*gsl_rng_uniform(r));
    p = p_max * gsl_rng_uniform(r);
    // Compute the components.
    //
    p_v = p * cos(theta);
    p_h = p * sin(theta);
    // Update the histogram.
    //
    j = floor(p_h / step);
    b = &histo[j];
    b->amo++;
    b->sum += fabs(p_v);
    }
  // Compute the mean value of each bin and print it to stodut
  // together with other useful things to make the histogram.
  //
  printf("bins: \t%ld\n", n);
  printf("step: \t%.5f\n", step);
  for (size_t i = 0; i < n; i++)
    {
    histo[i].sum = histo[i].sum / histo[i].amo;
    printf("\n%.5f", histo[i].sum);
    };
  // free memory
  gsl_rng_free(r);
  free(histo);
  return EXIT_SUCCESS;
  }
--- a/ex-4/plot.py
+++ b/ex-4/plot.py
@ -0,0 +1,23 @@
 #!/usr/bin/env python
 import matplotlib.pyplot as plt
 import numpy as np
 import sys
 bins = input()
 step = input()
 bins = int(bins.split("\t")[1])
 step = float(step.split("\t")[1])
 counts = np.loadtxt(sys.stdin)
 edges = np.linspace(0, bins*step, bins+1)
 plt.rcParams['font.size'] = 17
 plt.hist(edges[:-1], edges, weights=counts,
         color='#eacf00', edgecolor='#3d3d3c')
 plt.title('Vertical component distribution', loc='right')
 plt.xlabel(r'$p_h$')
 plt.ylabel(r'$\left<|p_v|\right>$')
 plt.show()
--- a/ex-5/casino.c
+++ b/ex-5/casino.c
@ -0,0 +1,37 @@
 #include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
 #include <gsl/gsl_rng.h>
 int main(int argc, char** argv){
  size_t size = atoi(argv[1]);             // size of the generated sample
  // Initialize an RNG.
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  // Random points (x, y) generation: x in [0,1), y in [0,3)
  // get if   y < exp(x)
  // leave if y > exp(x)
  size_t in = 0;
  size_t total = 0;
  do{
    double x = gsl_rng_uniform(r);
    double y = 3 * gsl_rng_uniform(r);
    if (y <= exp(x)){
      in++;
    }
    total++;
  } while (total < size);
  // The integral is esitmated thorugh the proportion:
  // integral : 3 = in : total
  double integral = 3.0*(double)in/(double)total;
  printf("∫exp(x)dx in [0,1]:\t%.07f\n", integral);
  // Free memory
  gsl_rng_free(r);
  return EXIT_SUCCESS;
 }
--- a/ex-5/manual.c
+++ b/ex-5/manual.c
@ -0,0 +1,31 @@
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 /* This program evaluates the integral of exp(x) in [0;1] through the
 * trapezoidal rule.
 * The integration step is given in input as d in step = 1e-d.
 *
 */
 int main(int argc, char** argv){
  double left = 0;                         // left limit
  double point = left;                     // points at the current position
  double right = 1;                        // right limit
  double step = pow(10, -atoi(argv[1]));   // step of integration
  double sum = 0;
  // Integral evaluation: stops when point goes over the right edge of the
  // range.
  do {
    sum += step * 0.5 * (exp(point) + exp(point + step));
    point += step;
  } while (point < right);
  // Print out the result
  printf("∫exp(x)dx in [0,1]:\t%.7f\n", sum);
  return EXIT_SUCCESS;
 }
--- a/ex-5/trifecta.c
+++ b/ex-5/trifecta.c
@ -0,0 +1,89 @@
 #include <gsl/gsl_monte_plain.h>
 #include <gsl/gsl_monte_miser.h>
 #include <gsl/gsl_monte_vegas.h>
 #include <gsl/gsl_monte.h>
 #include <math.h>
 // Exact value of the integral.
 double exact = 1.71828182845905;
 // Function which must be passed to the gsl_function.
 double function (double * x, size_t dim, void * params){
  // Avoid warning when no parameters are passed to the function.
  (void)(params);
  return exp(x[0]); 
 }
 // Function which prints out the results when called.
 void results (char *title, double result, double error)
 {
  printf ("---------------------------\n%s\n", title);
  printf ("result:  % .10f\n", result);
  printf ("sigma:   % .10f\n", error);
  printf ("exact:   % .10f\n", exact);
 } 
 int main(){
  // Some useful variables.
  double integral, error;
  size_t calls = 50000000;
  size_t dims = 1;
  double lower[1] = {0};    // Integration lower limit
  double upper[1] = {1};    // Integration upper limit
  // Initialize an RNG.
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  // Define GSL function.
  gsl_monte_function expo = {&function, dims, NULL};
  expo.f = &function;
  expo.dim = 1;
  expo.params = NULL;
  {
    gsl_monte_vegas_state *s = gsl_monte_vegas_alloc (dims);
    gsl_monte_vegas_integrate (&expo, lower, upper, dims, calls,
                               r, s, &integral, &error);
    do { gsl_monte_vegas_integrate (&expo, lower, upper, dims,
                                    calls/5, r, s, &integral, &error);
    } while (fabs (gsl_monte_vegas_chisq (s) - 1.0) > 0.5);
    // Free memory.
    gsl_monte_vegas_free(s);
    // Display results
    results("Vegas", integral, error);
  }
  {
    gsl_monte_miser_state *s = gsl_monte_miser_alloc (dims);
    gsl_monte_miser_integrate (&expo, lower, upper, dims, calls,
                               r, s, &integral, &error);
    // Free memory.
    gsl_monte_miser_free (s);
    // Display results
    results ("Miser", integral, error);
  }
  {
    gsl_monte_plain_state *s = gsl_monte_plain_alloc (dims);
    gsl_monte_plain_integrate (&expo, lower, upper, dims, calls,
                               r, s, &integral, &error);
    // Display results
    results ("Montecarlo", integral, error);
    // Free memory.
    gsl_monte_plain_free (s);
  }
  // Free memory.
  gsl_rng_free(r);
  return EXIT_SUCCESS;
 }
--- a/ex-6/fft.c
+++ b/ex-6/fft.c
@ -0,0 +1,196 @@
 #include "fft.h"
 #include <string.h>
 #include <gsl/gsl_fft_halfcomplex.h>
 /* These macros are used to extract the 
 * real and imaginary parts of a complex
 * vector */
 #define REAL(a,stride,i) ((a)[2*(stride)*(i)])
 #define IMAG(a,stride,i) ((a)[2*(stride)*(i)+1])
 /* Computes the (forward) real DFT of a vector
 * and returns the results as a complex vector
 */
 gsl_vector_complex* vector_fft(gsl_vector *data) {
  /* Create a copy of the input data to
   * preserve it: GSL functions work in-place.
   */
  gsl_vector *fdata = gsl_vector_alloc(data->size);
  gsl_vector_memcpy(fdata, data);
  /* Compute the forward Fourier transform */
  gsl_fft_real_wavetable *rtable =
    gsl_fft_real_wavetable_alloc(fdata->size);
  gsl_fft_real_workspace *wspace =
    gsl_fft_real_workspace_alloc(fdata->size);
  gsl_fft_real_transform(
    fdata->data,   // array of data to transform
    fdata->stride, // stride of the array, i.e. # steps between elements
    fdata->size,   // number of elements
    rtable,        // wavetable (real)
    wspace);       // workspace
  /* The data in `data` is in the half-complex
   * packed format, which reduces memory usage
   * by using the fact that
   * 
   *   z_i = z̅_(n-i)
   *
   * We unpack it to the normal packing and use
   * the array to define a gsl_vector_complex.
   */
  gsl_vector_complex *cdata = gsl_vector_complex_calloc(fdata->size);
  gsl_fft_halfcomplex_unpack(
      fdata->data,
      cdata->data,
      cdata->stride,
      fdata->size);
  // clear memory
  gsl_fft_real_wavetable_free(rtable);
  gsl_fft_real_workspace_free(wspace);
  gsl_vector_free(fdata);
  return cdata;
 }
 /* Inverse function of gsl_fft_halfcomplex_unpack.
 * `gsl_fft_halfcomplex_pack(c, hc, stride, n)`
 * creates a halfcomplex packed version of a
 * complex packaged array `hc` with stride `stride`
 * and `n` elements.
 */
 int gsl_fft_halfcomplex_pack(
  const gsl_complex_packed_array c,
  double hc[],
  const size_t stride, const size_t n) {
  hc[0] = REAL(c, stride, 0);
  size_t i;
  for (i = 1; i < n - i; i++) {
    hc[(2 * i - 1) * stride] = REAL(c, stride, i);
    hc[2 * i * stride]       = IMAG(c, stride, i);
  }
  if (i == n - i)
    hc[(n - 1) * stride] = REAL(c, stride, i);
  return 0;
 }
 /* `fft_deconvolve(data, kernel)` tries to deconvolve
 * `kernel` from `data` by factoring out the `kernel`
 * in the Fourier space.
 */
 gsl_histogram* fft_deconvolve(
  gsl_histogram *data,
  gsl_histogram *kernel) {
  /* Size of the original data
   * before being convoluted.
   *
   * Notation of sizes:
   *  - original: m
   *  - kernel: n
   *  - "full" convolution: m + n - 1
   */
  size_t orig_size = data->n - kernel->n + 1;
  /* Create vector views */
  gsl_vector vdata =
    gsl_vector_view_array(data->bin, data->n).vector;
  gsl_vector vkernel =
    gsl_vector_view_array(kernel->bin, kernel->n).vector;
  /* 0-pad the kernel to make it the
   * same size as the convoluted data,
   * which is m+n-1:
   *
   * 1. create a new vector
   * 2. create a subvector view to the center
   * 3. copy the kernel to the view
   */
  gsl_vector *vpadded = gsl_vector_calloc(data->n);
  gsl_vector center = gsl_vector_subvector(
    vpadded,            // vector: padded kernel (n+m-1)
    orig_size/2,        // offset: half of original data size (m/2)
    kernel->n).vector;  // length: kernel size (n)
  gsl_vector_memcpy(&center, &vkernel);
  /* Compute the DFT of the data and
   * divide it by the DFT of the kernel.
   */
  gsl_vector_complex *fdata   = vector_fft(&vdata);
  gsl_vector_complex *fkernel = vector_fft(vpadded);
  gsl_vector_complex_div(fdata, fkernel);
  /* Pack the result in the half-complex
   * format before computing the inverse DFT
   */
  double *res = calloc(fdata->size, sizeof(double));
  gsl_fft_halfcomplex_pack(fdata->data, res,
                           fdata->stride, fdata->size);
  /* Compute the inverse DFT of the result */
  gsl_fft_halfcomplex_wavetable *htable =
    gsl_fft_halfcomplex_wavetable_alloc(data->n);
  gsl_fft_real_workspace *wspace =
    gsl_fft_real_workspace_alloc(data->n);
  gsl_fft_halfcomplex_inverse(
    res,           // array of data to transform
    fdata->stride, // stride of the array
    fdata->size,   // number of elements
    htable,        // wavetable (complex)
    wspace);       // workspace
  /* Restore the natural order of frequency
   * in the result of the DFT (negative→positive).
   * To do that roll over the array by a half:
   *   1. create a temp array of half size
   *   2. copy first half over to temp
   *   3. copy second half to the first
   *   4. copy back the first to the second half
   */
  size_t half_size = data->n/2 * sizeof(double);
  double *temp = malloc(half_size);
  memcpy(temp, res, half_size);
  memcpy(res, res + data->n/2, half_size);
  memcpy(res + data->n/2, temp, half_size);
  /* Create a histogram with the same edges
   * as `data`, but with the original size,
   * to return the cleaned result
   */
  gsl_histogram *hist = gsl_histogram_calloc(orig_size);
  /* Set the same bin edges as `data`*/
  double max = gsl_histogram_max(data);
  double min = gsl_histogram_min(data);
  gsl_histogram_set_ranges_uniform(hist, min, max);
  /* Remove the extra size of the convolution
   * (where the overlap is partial) when copying
   * over the result to the histogram.
   * This amounts to (n-1)/2 on each end of the
   * vector.
   */
  memcpy(
    hist->bin,
    res + (kernel->n - 1)/2,
    orig_size * sizeof(double));
  // free memory
  gsl_vector_free(vpadded);
  gsl_vector_complex_free(fdata);
  gsl_vector_complex_free(fkernel);
  gsl_fft_halfcomplex_wavetable_free(htable);
  gsl_fft_real_workspace_free(wspace);
  free(res);
  free(temp);
  return hist;
 }
--- a/ex-6/fft.h
+++ b/ex-6/fft.h
@ -0,0 +1,15 @@
 #include <gsl/gsl_fft_real.h>
 #include <gsl/gsl_histogram.h>
 #include <gsl/gsl_vector.h>
 /* Computes the (forward) real DFT of a vector
 * and returns the results as a complex vector
 */
 gsl_vector_complex* vector_fft(gsl_vector *data);
 /* `fft_deconvolve(data, noise)` tries to deconvolve
 * `noise` from `data` by factoring out the `noise`
 * in the Fourier space.
 */
 gsl_histogram* fft_deconvolve(gsl_histogram *data, gsl_histogram *noise);
--- a/ex-6/main.c
+++ b/ex-6/main.c
@ -0,0 +1,232 @@
 #include <math.h>
 #include <string.h>
 #include <gsl/gsl_rng.h>
 #include <gsl/gsl_randist.h>
 #include <gsl/gsl_sf.h>
 #include "fft.h"
 #include "rl.h"
 /* Program options */
 struct options {
  int convolved;
  int deconvolved;
  int original;
  size_t num_events;
  size_t bins;
  double sigma;
  size_t rounds;
  const char* mode;
  double noise;
 };
 /* Intensity function parameters */
 struct param {
  double a;  // aperture radius
  double k;  // wavenumber
  double e;  // wave amplitude
  double l;  // screen distance
 };
 /* Intensity of the EM field at a large distance
 * from a circular aperture diffraction of a plane
 * wave.
 *
 * The intensity is given as a function of θ,
 * the diffraction angle, all the other physical
 * parameter are controlled by the structure `param`.
 *
 * See Fraunhöfer diffraction on "Hect - Optics" for 
 * a derivation of the formula.
 */
 double intensity(double theta, struct param p) {
  double x = p.k * p.a * sin(theta);
  double R = p.l / cos(theta);
  /* The intensity function has an eliminable
   * discontinuoty at the origin, which must
   * be handled separately.
   */
  if (fabs(x) < 1e-15) {
    return 0.5 * pow(p.e/R * M_PI * pow(p.a, 2), 2);
  }
  double y = 2*M_PI*pow(p.a, 2) * gsl_sf_bessel_J1(x) / x;
  return 0.5 * pow(p.e * y/R, 2);
 }
 /* `gaussian_for(hist, sigma)` generates a histogram
 * with the same bin width of `hist` but smaller,
 * with a gaussian PDF with μ at the central bin
 * and σ equal to `sigma`.
 */
 gsl_histogram* gaussian_for(gsl_histogram *hist, double sigma) {
  /* Set the size to 6% of the histogram.
   * Always choose n even to correctly center the
   * gaussian in the middle.
   */
  size_t n = ((double) hist->n * 6) / 100;
  n = n % 2 ? n+1 : n;
  /* Calculate the (single) bin width assuming
   * the ranges are uniformely spaced
   */
  double min, max;
  gsl_histogram_get_range(hist, 0, &min, &max);
  double dx = max - min;
  gsl_histogram *res = gsl_histogram_alloc(n);
  gsl_histogram_set_ranges_uniform(res, 0, dx * n);
  /* The histogram will be such that
   * the zero falls in the central bin.
   */
  long int offset = res->n/2;
  for (long int i = 0; i < (long int)res->n; i++)
    res->bin[i] = gsl_ran_gaussian_pdf(
        ((double)(i - offset) + 0.5) * dx, sigma * dx);
  return res;
 }
 /* Convolves two histograms while keeping the
 * original bin edges. This is a simple wrapper
 * function around `gsl_vector_convolve`.
 */
 gsl_histogram* histogram_convolve(gsl_histogram *a, gsl_histogram *b) {
  gsl_histogram *res = gsl_histogram_calloc(a->n + b->n - 1);
  /* Set the same edges as `a`*/
  double max = gsl_histogram_max(a);
  double min = gsl_histogram_min(a);
  gsl_histogram_set_ranges_uniform(res, min, max);
  /* Create vector views for everybody */
  gsl_vector_view va  = gsl_vector_view_array(a->bin, a->n);
  gsl_vector_view vb  = gsl_vector_view_array(b->bin, b->n);
  gsl_vector_view vres = gsl_vector_view_array(res->bin, res->n);
  gsl_vector_convolve(&va.vector, &vb.vector, &vres.vector);
  return res;
 }
 int main(int argc, char **argv) {
  struct options opts;
  /* Set default options */
  opts.convolved   = 0;
  opts.deconvolved = 0;
  opts.original    = 1;
  opts.num_events  = 50000;
  opts.bins        = 150;
  opts.sigma       = 0.8;
  opts.rounds      = 3;
  opts.mode        = "fft";
  opts.noise       = 0;
  /* Process CLI arguments */
  for (int i = 1; i < argc; i++) {
         if (!strcmp(argv[i], "-c")) opts.convolved   = 1;
    else if (!strcmp(argv[i], "-d")) opts.deconvolved = 1;
    else if (!strcmp(argv[i], "-o")) opts.original    = 1;
    else if (!strcmp(argv[i], "-e")) opts.num_events  = atol(argv[++i]);
    else if (!strcmp(argv[i], "-b")) opts.bins        = atol(argv[++i]);
    else if (!strcmp(argv[i], "-s")) opts.sigma       = atof(argv[++i]);
    else if (!strcmp(argv[i], "-r")) opts.rounds      = atol(argv[++i]);
    else if (!strcmp(argv[i], "-m")) opts.mode        = argv[++i];
    else if (!strcmp(argv[i], "-n")) opts.noise       = atof(argv[++i]);
    else {
      fprintf(stderr, "Usage: %s -[cdoebsh]\n", argv[0]);
      fprintf(stderr, "\t-h\t\tShow this message.\n");
      fprintf(stderr, "\t-c\t\tPrint the convolved histogram to stdout.\n");
      fprintf(stderr, "\t-d\t\tAttempt and print the deconvolved histogram.\n");
      fprintf(stderr, "\t-o\t\tPrint the original histogram to stdout.\n");
      fprintf(stderr, "\t-e N\t\tThe number of events.\n");
      fprintf(stderr, "\t-b N\t\tThe number of θ bins.\n");
      fprintf(stderr, "\t-s SIGMA\tThe sigma of gaussian kernel.\n");
      fprintf(stderr, "\t-r N\t\tThe number of RL deconvolution rounds.\n");
      fprintf(stderr, "\t-n MU\t\tThe mean (μ) of Poisson noise to add to the convolution.\n");
      return EXIT_FAILURE;
    }
  }
  /* Initialize an RNG. */
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  gsl_histogram* hist = gsl_histogram_alloc(opts.bins);
  gsl_histogram_set_ranges_uniform(hist, 0, M_PI/2);
  double theta;
  struct param p = { 0.01, 0.0001, 1e4, 1 };
  /* Sample events following the intensity
   * of a circular aperture diffraction I(θ)
   * while producing a histogram.
   */
  fputs("# event sampling\n", stderr);
  fprintf(stderr, "max itensity: %.2f\n", intensity(0, p));
  fprintf(stderr, "1. generating %ld events...", opts.num_events);
  for (size_t i = 0; i < opts.num_events; i++){
    do {
      theta = acos(1 - gsl_rng_uniform(r));
    } while(intensity(0, p) * gsl_rng_uniform(r) > intensity(theta, p));
    gsl_histogram_increment(hist, theta);
  }
  fputs("done\n", stderr);
  /* Generate the gaussian kernel, convolve the
   * sample histogram with it and try to deconvolve it.
   */
  fputs("\n# convolution\n", stderr);
  fputs("1. generating gaussian kernel...", stderr);
  gsl_histogram *kernel = gaussian_for(hist, opts.sigma);
  fputs("done\n", stderr);
  fputs("2. convolving...", stderr);
  gsl_histogram *conv = histogram_convolve(hist, kernel);
  fputs("done\n", stderr);
  /* Add Poisson noise with μ=opts.noise to
   * the convolution.
   */
  if (opts.noise > 0) {
    fputs("2.1 adding poisson noise...", stderr);
    for (size_t i = 0; i < conv->n; i++)
      conv->bin[i] += gsl_ran_poisson(r, opts.noise);
    fputs("done\n", stderr);
  }
  fprintf(stderr, "3. %s deconvolution...", opts.mode);
  gsl_histogram *clean;
  if (!strcmp(opts.mode, "fft"))
    clean = fft_deconvolve(conv, kernel);
  else if (!strcmp(opts.mode, "rl"))
    clean = rl_deconvolve(conv, kernel, opts.rounds);
  else {
    fputs("\n\nerror: invalid mode. select either 'fft' or 'rl'\n", stderr);
    return EXIT_FAILURE;
  }
  fputs("done\n", stderr);
  /* Print the selected histogram*/
  fputs("\n# histogram \n", stderr);
  gsl_histogram *showing;
       if (opts.convolved)   showing = conv;
  else if (opts.deconvolved) showing = clean;
  else if (opts.original)    showing = hist;
  gsl_histogram_fprintf(stdout, showing, "%g", "%g");
  // free memory
  gsl_rng_free(r);
  gsl_histogram_free(hist);
  gsl_histogram_free(kernel);
  gsl_histogram_free(conv);
  gsl_histogram_free(clean);
  return EXIT_SUCCESS;
 }
--- a/ex-6/plot.py
+++ b/ex-6/plot.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python
 from pylab import *
 import sys
 a, b, f = loadtxt(sys.stdin, unpack=True)
 suptitle('Fraunhofer diffraction')
 title(sys.argv[1] if len(sys.argv) > 1 else "", loc='right')
 hist(a, np.insert(b, 0, a[0]), weights=f/sum(f),
     color='#dbbf0d', edgecolor='#595856', linewidth=0.5)
 show()
--- a/ex-6/rl.c
+++ b/ex-6/rl.c
@ -0,0 +1,152 @@
 #include "rl.h"
 #include <gsl/gsl_blas.h>
 #include <math.h>
 /* `gsl_vector_convolv(a, b, res)` computes the linear
 * convolution of two vectors. The resulting vector
 * has size `a->n + b->n - 1` and is stored in `res`.
 */
 int gsl_vector_convolve(gsl_vector *a, gsl_vector *b, gsl_vector *res) {
  size_t m = a->size;
  size_t n = b->size;
  /* Vector views for the overlap dot product */
  gsl_vector_view va, vb;
  /* Reverse `b` before convolving */
  gsl_vector_reverse(b);
  double dot;
  size_t start_a, start_b, many;
  for (size_t i = 0; i < m + n - 1; i++) {
    /* Calculate the starting indices
     * of the two vectors a,b overlap.
     */
    if (i < n) start_a = 0;
    else       start_a = i - (n - 1);
    if (i < n) start_b = (n - 1) - i;
    else       start_b = 0;
    /* Calculate how many elements overlap */
         if (i < n)  many = i + 1;
    else if (i >= m) many = n - (i - m) - 1;
    else             many = n;
    /* Take the dot product of the overlap
     * and store it in the resulting histogram
     */
    va = gsl_vector_subvector(a, start_a, many);
    vb = gsl_vector_subvector(b, start_b, many);
    gsl_blas_ddot(&va.vector, &vb.vector, &dot);
    gsl_vector_set(res, i, dot);
  }
  /* Put b back together */
  gsl_vector_reverse(b);
  return GSL_SUCCESS;
 }
 /* Performs the Richardson-Lucy deconvolution.
 * In pseudo-python:
 *
 *   def rl_deconvolve(data, kernel, rounds):
 *     est = np.full(data, 0.5)
 *     for _ in range(rounds):
 *       est_conv = convolve(est, kernel)
 *       est *= convolve(data / est_conv, reversed)
 *
 */
 gsl_histogram* rl_deconvolve(
  gsl_histogram *data,
  gsl_histogram *kernel,
  size_t rounds) {
  /* Size of the original data
   * before being convoluted.
   *
   * Notation of sizes:
   *  - original: m
   *  - kernel: n
   *  - "full" convolution: m + n - 1
   */
  size_t orig_size = data->n - kernel->n + 1;
  /* Create a histogram with the same edges
   * as `data`, but with the original size,
   * to return the cleaned result
   */
  gsl_histogram *hist = gsl_histogram_calloc(orig_size);
  /* Set the same bin edges as `data`*/
  double max = gsl_histogram_max(data);
  double min = gsl_histogram_min(data);
  gsl_histogram_set_ranges_uniform(hist, min, max);
  /* Vector views of the result, kernel
   * and data. These are used to perform
   * vectorised operations on histograms.
   */
  gsl_vector est =
    gsl_vector_view_array(hist->bin, hist->n).vector;
  gsl_vector vkernel =
    gsl_vector_view_array(kernel->bin, kernel->n).vector;
  gsl_vector vdata =
    gsl_vector_view_array(data->bin, data->n).vector;
  gsl_vector center;
  /* Create a flipped copy of the kernel */
  gsl_vector *vkernel_flip = gsl_vector_alloc(kernel->n);
  gsl_vector_memcpy(vkernel_flip, &vkernel);
  gsl_vector_reverse(vkernel_flip);
  /* More vectors to store partial
   * results
   */
  gsl_vector* est_conv = gsl_vector_alloc(data->n);
  gsl_vector* rel_blur = gsl_vector_alloc(data->n);
  /* The zero-order estimate is simply
   * all elements at 0.5 */
  gsl_vector_set_all(&est, 0.5);
  for (size_t iter = 0; iter < rounds; iter++) {
    /* The current estimated convolution is the
     * current estimate of the data with
     * the kernel */
    gsl_vector_convolve(&est, &vkernel, est_conv);
    /* Divide the data by the estimated
     * convolution to calculate the "relative blur".
     */
    gsl_vector_memcpy(rel_blur, &vdata);
    gsl_vector_div(rel_blur, est_conv);
    /* Set NaNs to zero */
    for (size_t i = 0; i < rel_blur->size; i++)
      if (isnan(gsl_vector_get(rel_blur, i))) {
        fprintf(stderr, "gotcha: %ld!!\n", i);
        double y = (i > 0)? gsl_vector_get(rel_blur, i - 1) : 1;
        gsl_vector_set(rel_blur, i, y); 
      }
    /* Convolve the blur by the kernel
     * and multiply the current estimate
     * of the data by it.
     */
    center = gsl_vector_subvector(rel_blur, (kernel->n-1)/2, orig_size).vector;
    gsl_vector_convolve(&center, vkernel_flip, est_conv);
    center = gsl_vector_subvector(est_conv, (kernel->n-1)/2, orig_size).vector;
    gsl_vector_mul(&est, &center);
  }
  // free memory
  gsl_vector_free(est_conv);
  gsl_vector_free(rel_blur);
  gsl_vector_free(vkernel_flip);
  return hist;
 }
--- a/ex-6/rl.h
+++ b/ex-6/rl.h
@ -0,0 +1,20 @@
 #include <gsl/gsl_histogram.h>
 #include <gsl/gsl_vector.h>
 /* `rl_deconvolve(data, noise, rounds)`
 * tries to deconvolve `noise` from
 * `data` in `rounds` rounds.
 * */
 gsl_histogram* rl_deconvolve(
  gsl_histogram *data,
  gsl_histogram *noise,
  size_t rounds);
 /* `convolve(a, b)` computes the linear convolution
 * of two histograms. The convolted histogram
 * has length `a->n + b->n - 1`.
 */
 int gsl_vector_convolve(
  gsl_vector *a,
  gsl_vector *b,
  gsl_vector *res);
--- a/ex-7/main.c
+++ b/ex-7/main.c
@ -0,0 +1,433 @@
 #include <stdio.h>
 #include <math.h>
 #include <gsl/gsl_rng.h>
 #include <gsl/gsl_randist.h>
 #include <gsl/gsl_matrix.h>
 #include <gsl/gsl_linalg.h>
 /* Parameters for bivariate
 * gaussian PDF
 */
 struct par {
  double x0;       // x mean
  double y0;       // y mean
  double sigma_x;  // x standard dev
  double sigma_y;  // y standard dev
  double rho;      // correlation: cov(x,y)/σx⋅σy
 };
 /* A sample of N 2D points is an
 * N×2 matrix, with vectors as rows.
 */
 typedef struct {
  struct par p;
  gsl_matrix *data;
 } sample_t;
 /* Create a sample of `n` points */
 sample_t* sample_t_alloc(size_t n, struct par p) {
  sample_t *x = malloc(sizeof(sample_t));
  x->p    = p;
  x->data = gsl_matrix_alloc(n, 2);
  return x;
 }
 /* Delete a sample */
 void sample_t_free(sample_t *x) {
  gsl_matrix_free(x->data);
  free(x);
 }
 /* `generate_normal(r, n, p)` will create
 * a sample of `n` points, distributed
 * according to a bivariate gaussian distribution
 * of parameters `p`.
 */
 sample_t* generate_normal(
  gsl_rng *r, size_t n, struct par *p) {
  sample_t *s = sample_t_alloc(n, *p);
  for (size_t i = 0; i < n; i++) {
    /* Generate a vector (x,y) with
     * a standard (μ = 0) bivariate
     * gaussian PDF.
     */
    double *x = gsl_matrix_ptr(s->data, i, 0);
    double *y = gsl_matrix_ptr(s->data, i, 1);
    gsl_ran_bivariate_gaussian(
      r,
      p->sigma_x, p->sigma_y, p->rho,               
      x, y);
    /* Shift the vector to (x₀,y₀) */
    *x += p->x0;
    *y += p->y0;
  }
  return s;
 }
 /* Builds the covariance matrix Σ
 * from the standard parameters (σ, ρ)
 * of a bivariate gaussian.
 */
 gsl_matrix* normal_cov(struct par *p) {
  double var_x  = pow(p->sigma_x, 2);
  double var_y  = pow(p->sigma_y, 2);
  double cov_xy = p->rho * p->sigma_x * p->sigma_y;
  gsl_matrix *cov = gsl_matrix_alloc(2, 2);
  gsl_matrix_set(cov, 0, 0, var_x);
  gsl_matrix_set(cov, 1, 1, var_y);
  gsl_matrix_set(cov, 0, 1, cov_xy);
  gsl_matrix_set(cov, 1, 0, cov_xy);
  return cov;
 }
 /* Builds the mean vector of
 * a bivariate gaussian.
 */
 gsl_vector* normal_mean(struct par *p) {
  gsl_vector *mu = gsl_vector_alloc(2);
  gsl_vector_set(mu, 0, p->x0);
  gsl_vector_set(mu, 1, p->y0);
  return mu;
 }
 /* `fisher_proj(c1, c2)` computes the optimal
 * projection map, which maximises the separation
 * between the two classes.
 * The projection vector w is given by
 *
 *   w = Sw⁻¹ (μ₂ - μ₁)
 * 
 * where Sw = Σ₁ + Σ₂ is the so-called within-class
 * covariance matrix.
 */
 gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) {
  /* Construct the covariances of each class... */
  gsl_matrix *cov1 = normal_cov(&c1->p);
  gsl_matrix *cov2 = normal_cov(&c2->p);
  /* and the mean values */
  gsl_vector *mu1 = normal_mean(&c1->p);
  gsl_vector *mu2 = normal_mean(&c2->p);
  /* Compute the inverse of the within-class
   * covariance Sw⁻¹.
   * Note: by definition Σ is symmetrical and
   * positive-definite, so Cholesky is appropriate.
   */
  gsl_matrix_add(cov1, cov2);
  gsl_linalg_cholesky_decomp(cov1);
  gsl_linalg_cholesky_invert(cov1);
  /* Compute the difference of the means. */
  gsl_vector *diff = gsl_vector_alloc(2);
  gsl_vector_memcpy(diff, mu2);
  gsl_vector_sub(diff, mu1);
  /* Finally multiply diff by Sw.
   * This uses the rather low-level CBLAS
   * functions gsl_blas_dgemv:
   *
   *  ___ double          ___ 1 ___ nothing
   * /                   /     /      
   * dgemv computes y := α op(A)x + βy
   *  \  \__matrix-vector           \____ 0
   *   \__ A is symmetric
   */
  gsl_vector *w = gsl_vector_alloc(2);
  gsl_blas_dgemv(
    CblasNoTrans,  // do nothing on A
    1,             // α = 1
    cov1,          // matrix A
    diff,          // vector x
    0,             // β = 0
    w);            // vector y
  // free memory
  gsl_matrix_free(cov1);
  gsl_matrix_free(cov2);
  gsl_vector_free(mu1);
  gsl_vector_free(mu2);
  gsl_vector_free(diff);
  return w;
 }
 /* Computes the determinant from the
 * Cholesky decomposition of matrix.
 * In this case it's simply the product
 * of the diagonal elements, squared.
 */
 double gsl_linalg_cholesky_det(gsl_matrix *LL) {
  gsl_vector diag = gsl_matrix_diagonal(LL).vector;
  double det = 1;
  for (size_t i = 0; i < LL->size1; i++)
    det *= gsl_vector_get(&diag, i);
  return det * det;
 }
 /* `fisher_cut(ratio, w, c1, c2)` computes
 * the threshold (cut), on the line given by
 * `w`, to discriminates the classes `c1`, `c2`;
 * with `ratio` being the ratio of their prior
 * probabilities.
 *
 * The cut is fixed by the condition of
 * conditional probability being the
 * same for each class:
 *
 *  P(c₁|x)    p(x|c₁)⋅p(c₁)
 *  ------- = --------------- = 1;
 *  P(c₂|x)    p(x|c₁)⋅p(c₂)
 *
 * together with x = t⋅w.
 *
 * If p(x|c) is a bivariate normal PDF the
 * solution is found to be:
 *
 *   t = (b/a) + √((b/a)² - c/a);
 *
 * where
 *
 *  1. a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
 *  2. b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
 *  3. c = (μ₁, Σ₁⁻¹μ₁) - (μ₂, Σ₂⁻¹μ₂) + log|Σ₂|/log|Σ₁| - 2 log(α)
 *  4. α = p(c₁)/p(c₂)
 *
 */
 double fisher_cut(
  double ratio,
  gsl_vector *w,
  sample_t *c1, sample_t *c2) {
  /* Construct the covariances of each class... */
  gsl_matrix *cov1 = normal_cov(&c1->p);
  gsl_matrix *cov2 = normal_cov(&c2->p);
  /* and the mean values */
  gsl_vector *mu1 = normal_mean(&c1->p);
  gsl_vector *mu2 = normal_mean(&c2->p);
  /* Temporary vector/matrix for
   * intermediate results.
   */
  gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2);
  gsl_vector *vtemp = gsl_vector_alloc(w->size);
  /* Invert Σ₁ and Σ₂ in-place:
   * we only need the inverse matrices
   * in the steps to follow.
   */
  gsl_linalg_cholesky_decomp(cov1);
  gsl_linalg_cholesky_decomp(cov2);
  // store determinant for later
  double det1 = gsl_linalg_cholesky_det(cov1);
  double det2 = gsl_linalg_cholesky_det(cov2);
  gsl_linalg_cholesky_invert(cov1);
  gsl_linalg_cholesky_invert(cov2);
  /* Compute the first term:
   *
   *  a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
   *
   */
  // mtemp = cov1 - cov2
  gsl_matrix_memcpy(mtemp, cov1);
  gsl_matrix_sub(mtemp, cov2);
  // vtemp = mtemp ⋅ vtemp
  gsl_vector_memcpy(vtemp, w);
  gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp);
  // a = (w, vtemp)
  double a; gsl_blas_ddot(w, vtemp, &a);
  /* Compute the second term:
   *
   *  b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
   *
   */
  // vtemp = cov1 ⋅ mu1
  // vtemp = cov2 ⋅ mu2 - vtemp
  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp);
  // b = (w, vtemp)
  double b; gsl_blas_ddot(w, vtemp, &b);
  /* Compute the last term:
   *
   *  c = log|Σ₂|/|Σ₁| + (μ₂, Σ₂⁻¹μ₂) - (μ₁, Σ₁⁻¹μ₁)
   *
   */
  double c, temp;
  c = log(det1 / det2) - 2*log(ratio);
  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1,  0, vtemp);
  gsl_blas_ddot(mu1, vtemp, &temp);
  c += temp;
  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
  gsl_blas_ddot(mu2, vtemp, &temp);
  c -= temp;
  /* To get the thresold value we have to
   * multiply t by |w| if not normalised
   */
  double norm; gsl_blas_ddot(w, w, &norm);
  // free memory
  gsl_vector_free(mu1);
  gsl_vector_free(mu2);
  gsl_vector_free(vtemp);
  gsl_matrix_free(cov1);
  gsl_matrix_free(cov2);
  gsl_matrix_free(mtemp);
  return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm;
 }
 /* `fisher_cut2(ratio, w, c1, c2)` computes
 * the threshold (cut), on the line given by
 * `w`, to discriminates the classes `c1`, `c2`;
 * with `ratio` being the ratio of their prior
 * probabilities.
 *
 * The cut is fixed by the condition of
 * conditional probability being the
 * same for each class:
 *
 *  P(c₁|x)    p(x|c₁)⋅p(c₁)
 *  ------- = --------------- = 1;
 *  P(c₂|x)    p(x|c₁)⋅p(c₂)
 *
 * where p(x|c) is the probability for point x
 * along the fisher projection line. If the classes
 * are bivariate gaussian then p(x|c) is simply
 * given by a normal distribution:
 *
 *   Φ(μ=(w,μ), σ=(w,Σw))
 *
 * The solution is then
 *
 *   t = (b/a) + √((b/a)² - c/a);
 *
 * where
 *
 *  1. a = S₁² - S₂²
 *  2. b = M₂S₁² - M₁S₂²
 *  3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α)
 *  4. α = p(c₁)/p(c₂)
 *
 */
 double fisher_cut2(
  double ratio,
  gsl_vector *w,
  sample_t *c1, sample_t *c2) {
  gsl_vector *vtemp = gsl_vector_alloc(w->size);
  /* Construct the covariances of each class... */
  gsl_matrix *cov1 = normal_cov(&c1->p);
  gsl_matrix *cov2 = normal_cov(&c2->p);
  /* and the mean values */
  gsl_vector *mu1 = normal_mean(&c1->p);
  gsl_vector *mu2 = normal_mean(&c2->p);
  /* Project the distribution onto the
   * w line to get a 1D gaussian
   */
  // mean
  double m1; gsl_blas_ddot(w, mu1, &m1);
  double m2; gsl_blas_ddot(w, mu2, &m2);
  // variances
  gsl_blas_dgemv(CblasNoTrans, 1, cov1, w,  0, vtemp);
  double var1; gsl_blas_ddot(w, vtemp, &var1);
  gsl_blas_dgemv(CblasNoTrans, 1, cov2, w,  0, vtemp);
  double var2; gsl_blas_ddot(w, vtemp, &var2);
  double a = var1 - var2;
  double b = m2*var1 + m1*var2;
  double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio);
  // free memory
  gsl_vector_free(mu1);
  gsl_vector_free(mu2);
  gsl_vector_free(vtemp);
  gsl_matrix_free(cov1);
  gsl_matrix_free(cov2);
  return (b/a) + sqrt(pow(b/a, 2) - c/a);
 }
 int main(int argc, char **args) {
  // initialize RNG
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
  /* Generate two classes of normally
   * distributed 2D points with different
   * paramters: signal and noise.
   */
  struct par par_sig   = { 0, 0, 0.3, 0.3, 0.5 };
  struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 };
  // sample sizes
  size_t nsig   =  800;
  size_t nnoise = 1000;
  sample_t *signal = generate_normal(r, nsig, &par_sig);
  sample_t *noise  = generate_normal(r, nnoise, &par_noise);
  /* Fisher linear discriminant
   *
   * First calculate the direction w onto
   * which project the data points. Then the
   * cut which determines the class for each
   * projected point.
   */
  gsl_vector *w = fisher_proj(signal, noise);
  double t_cut  = fisher_cut2(nsig / (double)nnoise,
                              w, signal, noise);
  fputs("# Linear Fisher discriminant\n\n", stderr);
  fprintf(stderr, "* w: [%.3f, %.3f]\n",
      gsl_vector_get(w, 0),
      gsl_vector_get(w, 1));
  fprintf(stderr, "* t_cut: %.3f\n", t_cut);
  gsl_vector_fprintf(stdout, w, "%g");
  printf("%f\n", t_cut);
  /* Print data to stdout for plotting.
   * Note: we print the sizes  to be able
   * to set apart the two matrices.
   */
  printf("%ld %ld %d\n", nsig, nnoise, 2);
  gsl_matrix_fprintf(stdout, signal->data, "%g");
  gsl_matrix_fprintf(stdout, noise->data,  "%g");
  // free memory
  gsl_rng_free(r);
  sample_t_free(signal);
  sample_t_free(noise);
  return EXIT_SUCCESS;
 }
--- a/ex-7/plot.py
+++ b/ex-7/plot.py
@ -0,0 +1,26 @@
 #!/usr/bin/env python
 from pylab import *
 import sys
 w = loadtxt(sys.stdin, max_rows=2)
 cut = float(input())
 n, m, d = map(int, input().split())
 data  = loadtxt(sys.stdin).reshape(n + m, d)
 signal, noise = data[:n].T, data[n:].T
 figure()
 scatter(*signal, c='xkcd:grey blue', label='signal')
 scatter(*noise,  c='xkcd:baby blue', label='noise')
 scatter(*(cut * w/np.dot(w,w)), c='red')
 arrow(*(-abs(w)), *4*abs(w))
 legend()
 figure()
 sig_proj   = np.dot(w, signal)
 noise_proj = np.dot(w, noise)
 hist(sig_proj,   color='xkcd:grey blue')
 hist(noise_proj, color='xkcd:baby blue')
 axvline(cut, c='r')
 show()
--- a/33
+++ b/33
@ -0,0 +1,33 @@
 GMP_FLAGS = -L$(GMP_PATH) -lgmp
 GSL_FLAGS = $(shell pkg-config --libs gsl) 
 CFLAGS := -Wall --debug $(GMP_FLAGS) $(GSL_FLAGS)
 CCOMPILE = \
 	mkdir -p $(@D); \
 	$(CC) $(CFLAGS) $^ -o $@
 ex-1/bin/%: ex-1/%.c
 	$(CCOMPILE)
 ex-2/bin/%: ex-2/%.c
 	$(CCOMPILE)
 ex-3/bin/main: ex-3/common.c ex-3/likelihood.c ex-3/chisquared.c
 	$(CCOMPILE)
 ex-4/bin/main: ex-4/main.c
 	$(CCOMPILE)
 ex-5/bin/%: ex-5/%.c
 	$(CCOMPILE)
 ex-6/bin/main: ex-6/rl.c ex-6/fft.c
 	$(CCOMPILE)
 ex-7/main: ex-7/main.c
 	$(CCOMPILE)
 misc/pdfs: misc/pdfs.c
 clean:
 	rm -rf ex-*/bin
--- a/misc/lessons/images/binomiale.png
+++ b/misc/lessons/images/binomiale.png
--- a/misc/lessons/images/chi2.png
+++ b/misc/lessons/images/chi2.png
--- a/misc/lessons/images/correlazione.png
+++ b/misc/lessons/images/correlazione.png
--- a/misc/lessons/images/exponential.png
+++ b/misc/lessons/images/exponential.png
--- a/misc/lessons/images/fischer.png
+++ b/misc/lessons/images/fischer.png
--- a/misc/lessons/images/gaussian.png
+++ b/misc/lessons/images/gaussian.png
--- a/misc/lessons/images/kurtosis.png
+++ b/misc/lessons/images/kurtosis.png
--- a/misc/lessons/images/landau.png
+++ b/misc/lessons/images/landau.png
--- a/misc/lessons/images/logistic.png
+++ b/misc/lessons/images/logistic.png
--- a/misc/lessons/images/poisson.png
+++ b/misc/lessons/images/poisson.png
--- a/misc/lessons/images/skewness.png
+++ b/misc/lessons/images/skewness.png
--- a/misc/lessons/images/student.png
+++ b/misc/lessons/images/student.png
--- a/misc/lessons/images/uniform.png
+++ b/misc/lessons/images/uniform.png
--- a/misc/lessons/images/wigner.png
+++ b/misc/lessons/images/wigner.png
--- a/misc/lessons/makefile
+++ b/misc/lessons/makefile
@ -0,0 +1,17 @@
 sections = $(shell find sections/ -name '*.md' | sort -V)
 pandoc = \
 	@ pandoc $(1)         \
 	--toc                 \
 	--standalone          \
 	--pdf-engine=xelatex  \
 	-F pandoc-crossref    \
 	-o $(2).pdf
 all: lessons.pdf
 clean:
 	rm lessons.pdf
 lessons.pdf: $(sections) images
 	$(call pandoc, $(sections), lessons)
--- a/misc/lessons/sections/0.md
+++ b/misc/lessons/sections/0.md
@ -0,0 +1,23 @@
 ---
 title: Analisi statistica
 subtitle: Riassunto delle slides
 numbersections: true
 documentclass: article
 fontsize: 12pt
 geometry:
  - width=150mm
  - top=20mm
  - bottom=30mm
 header-includes: |
  ```{=latex}
  % start new page with each section
  \usepackage{etoolbox}
  \usepackage{tikz}
  \usepackage{amsmath}
  \usepackage{amssymb}
  \usepackage{siunitx}
  \pretocmd{\section}{\clearpage}{}{}
  ```
 ---
--- a/misc/lessons/sections/modulo-1.md
+++ b/misc/lessons/sections/modulo-1.md
@ -0,0 +1,140 @@
 # Calcolo numerico
 ## Compilare un programma
 Per eseguire un programma, un computer fa questa cosa: legge le informazioni
 dall'input e le mette nella memoria; poi da qui legge le istruzioni
 sequenzialmente e se c'è qualche calcolo da fare, lo fa fare alla ALU
 (aritmetic logic unit) e memorizza il risultato nella memoria; alla fine passa
 tutto in output.  
 Per essere eseguibile, un programma deve essere scritto in codice macchina
 e per questo si usano i linguaggi di programmazione che, attraverso il
 compilatore, vengono tradotti in codice macchina:
 - codice sorgente,
 - compilatore,
 - codice oggetto,
 - linker (aggiunge le librerie),
 - codice eseguibile.
 ## Errori di implementazione
 Quando si passa dal modello astratto a quello implementato, si distinguono
 tre tipi di errori:
 - errori analitici: causati dalla necessità di una aritmetica discreta,
  cioè il fatto che non si possa rappresentare una cosa continua e quindi
  la si fa a punti;
 - errori inerenti o algoritmici (o di round-off): causati dal numero finito di
  cifre significative. Dipendono dall'algoritmo utilizzato, perché ci sono
  metodi migliori di altri per evitare questo tipo di problemi.
 ## Rappresentazione di un numero
 L'obiettivo è quello di tenere sotto controllo gli errori di round-off.
 Per rappresentare un numero, esistono diverse rappresentazioni, tra cui quella
 di Von Neumann: si dedicano $n$ cifre significative alla mantissa e poi un tot
 anche alla caratteristica, cioè all'ordine di grandezza (che generalmente è in
 base 2).  
 Con $n$ cifre significative in base 2, il numero più alto rappresentabile è
 $2^n -1$ perché con due cifre (0 e 1), ci sono $2^n$ possibili numeri
 rappresentabili e, se posti in ordine crescente, ognuno dista dal precedente
 un'unità, con il minimo che vale 0 (tutti 0), per cui il numero più alto che si
 può rappresentare è $2^n -1$:  
 - 0 è l'1,
 - 1 e il 2,
 - ...$2^n -1$ è il $2^n$-esimo.
 questo numero corrisponde a un numero in base dieci di $m$ cifre significative,
 per cui:
 $$
  m = \log(2^n -1)
 $$
 quindi per un numero con 24 bit per la mantissa, si hanno circa 7 cifre
 significative in base 10.  
 In precisione semplice (floating point):
 - un bit per il segno,
 - 23 bit per la mantissa (quindi 7 cifre significative),
 - 8 bits per la caratteristica.
 In precisione doppia (duble) si raddoppiano i bite alla mantissa e le cifre
 significative diventano 17, però il tempo per le moltiplicazioni è triplicato
 e si occupa più memoria, quindi non è che convenga moltissimo.
 Esiste anche la rappresentazione fixed-point, in cui c'è un bit per il segno
 e i restanti per il numero in sé, in cui da qualche parte c'è la virgola.
 Nel caso del fixed pointd, l'errore relativo può variare moltissimo a seconda
 del numero rappresentato, mentre per i float è lo stesso per ogni numero: la
 densità relativa non è costante.  
 Quando un insieme di rappresentazione non è chiuso rispetto ad un'operazione,
 il risultato presenta sicuramente errore di round-off e deve essere
 approssimato.
 ## Errori algoritmici
 Per numeri troppo grandi o troppo piccoli che non si riesce a
 rappresentare, si parla rispettivamente di overflow o underflow. Per questo
 motivo, somme e soprattutto differenze di numeri grandi portano grandi errori
 dovuti all'arrotondamento: bisogna cercare di evitarlo se possibile. Per
 esempio, per calcolare un polinomio, conviene usare il metodi di
 Ruffini-Horner perché riduce il numero di operazioni da effettuare:
 - metodo semplice:
  $$
    p(x) = a_0 + a_1x + a_2x^2 + ... +a_nx^n
  $$
  che sono n addizioni e n(n+1)/2 moltiplicazioni, quindi $\sim n^2$.
 - metodo di R-H:
  $$
  p (x) = (((((a_n)x +a_{n-1})x +a_{n-2}x +...
  $$
  che sono n addizioni e n moltiplicazioni, quindi $\sim n$.
 Il problema si verifica anche se si sottraggono due numeri molto simili tra
 loro, perché il risultato è un numero molto piccolo che potrebbe finire in
 underflow (si parla di "cancellazione catastrofica").
 \textcolor{orange}{oss:} Il costo computazionale della risoluzione di un
 sistema lineare in $n$ equazioni è asintoticamente uguale al costo del
 prodotto di due matrici $nxn$. Esistono algoritmi che non richiedono più di
 $k \cdot n^{\alpha}$ operazioni, col il più piccolo valore noto di $\alpha$
 pari a 0.2375.
 Alcuni algoritmi sono definiti "instabili": accade quando gli errori di
 round-off si accumulano fino a portare a risultati completamente errati.  
 Esistono problemi detti "mal condizionati" che con qualsiasi algoritmo danno
 errori talmente elevati da rendere il risultato privo di significato. In
 questi casi, piccole variazioni dei dati iniziali portano a grandi variazioni
 nei risultati. Si chiama "numero di condizionamento del problema" il seguente
 rapporto:
 $$
  \frac{\Delta r}{\Delta \alpha} =
  \frac{\text{\% errore risultato}}{\text{\% errore dato iniziale}}
 $$
 che può quindi essere espresso in questo modo:
 $$
  \Delta \alpha = \frac{x + h - x}{x} = \frac{h}{x}
  \hspace{100pt}
  \Delta r = \frac{f(x + h)-f(x)}{f(x)}
 $$
 $$
  \frac{\Delta r}{\Delta \alpha} =
  \frac{f(x + h)-f(x)}{h} \cdot \frac{x}{f(x)} =
  f'(x) \cdot \frac{x}{f(x)}
 $$
 Se questo numero è $<< 1$, allora il problema è poco sensibile ai dati
 iniziali.
--- a/misc/lessons/sections/modulo-2-1.md
+++ b/misc/lessons/sections/modulo-2-1.md
@ -0,0 +1,257 @@
 # Probabilità
 ## Concetto di probabilità
 Consideriamo un insieme S di eventi detto "spazio campionario" e due eventi
 casuali A e B. Essi sono soggetti agli assiomi di Kolmogorov:
 $\forall A \subseteq S,  0 \leqslant P(A) \leqslant 1$  
 $P(S) = 1$  
 $A \cap B = \varnothing \Longrightarrow P(A \cup B) = P(A) + P(B)$
 Si definisce probabilità condizionata di A dato B:
 $$
  P(A|B) = \frac{P(A \cap B)}{P(B)}
 $$
 Se due eventi sono indipendenti, allora vale che:
 $$
  P(A \cap B) = P(A) \cdot P(B) \Longrightarrow
  P(A|B) = \frac{P(A) \cdot P(B)}{P(B)} = P(A)
 $$
 Esistono diversi approcci per definire la probabilità:
 - approccio frequentista:
  $$
    P(A) = \lim_{n \rightarrow +\infty} \frac{\#A}{n}
  $$
  ma c'è il problema che è impossibile fare un numero infinito di tentativi.
 - approccio bayesiano:  
  utilizzare il teorema di Bayes, per esempio:
  $$
    P(\text{teoria|dati}) \propto P(\text{dati|teoria}) \cdot P(\text{teoria})
  $$
  si tratta quindi di un approccio "soggettivo", almeno per l'ultimo termine
  dell'equazione qua sopra, perché la probabilità della validità della teoria
  può essere data sulla base di ragionamenti e osservazioni riguardanti il
  fenomeno di cui si sta parlando.
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ### Esempio di probabilità condizionata
 Si osservano dei decadimenti e se ne misurano indipendentemente $N_a$ e $N_b$;
 in comune ne sono stati misurati $N_{ab}$. Possiamo stimare il numero totale
 di eventi $N$ e l'efficienza totale $\epsilon$.
 $$
  P(a) = \frac{N_a}{N} \hspace{100pt} P(b) = \frac{N_b}{N}
 $$
 $$
  P(ab) = \frac{N_{ab}}{N} = P(a) \cdot P(b) = \frac{N_a \cdot N_b}{N^2}
  \Longrightarrow N = \frac{N_a \cdot N_b}{N_{ab}}
 $$
 $$
  \epsilon = P(a \cup b) = P(a) + P(b) -P(a \cap b) =
  \frac{N_a + N_b - N_{ab}}{N}
 $$
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ## Teorema di Bayes
 $$
  P(A|B) = \frac{P(A \cap B)}{P(B)}
  \hspace{100pt}
  P(B|A) = \frac{P(B \cap A)}{P(A)}
 $$
 $$
  P(A|B) = \frac{P(B|A) \cdot P(A)}{P(B)}
 $$
 Dal teorema di Bayes è possibile dedurre il teorema della probabilità totale:
 consideriamo un insieme $S$ diviso in sottoinsiemi disgiunti $A_i$ la cui
 unione dà l'insieme di partenza:
 $$
  \cup_i A_i = S
 $$
 e consideriamo un insieme $B$ anch'esso interno ad $S$:
 $$
  B = B \cap S = B \cap (\cup_i A_i) = \cup_i (B \cap A_i)
 $$
 $$
  \Longrightarrow P(B) = \sum_i P(B \cap A_i)
 $$
 $$
  \Longrightarrow P(B) = \sum_i P(B|A_i)P(A_i)
 $$
 da cui, per uno specifico insieme $A_j$, attraverso il teorema di Bayes:
 $$
  P(A_j|B) = \frac{P(B|A_j) \cdot P(A_j)}{\sum_i P(B|A_i)P(A_i)}
 $$
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ### Esempio di probabilità totale: segnale e fondo
 Un rilevatore misura segnale e fondo con relative efficienze $P(R|S)$ e
 $P(R|F)$. Se sono note a priori la probabilità di segnale e di fondo
 $P(S)$ e $P(F)$, allora si può risalire alla probabilità, data una
 misurazione, di aver misurato il segnale:
 $$
  P(S|R) = \frac{P(R|S) \cdot P(S)}{P(R|S) \cdot P(S) + P(R|F) \cdot P(F)}
 $$
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ### Esempio di probabilità totale: AIDS 
 $$
  P(\text{AIDS}) = 0.001 \hspace{50pt} P(\text{no AIDS}) = 0.999
 $$
 $$
  P(\text{+|AIDS}) = 0.98 \hspace{50pt} P(\text{-|AIDS}) = 0.02
 $$
 $$
  P(\text{+|no AIDS}) = 0.03 \hspace{50pt} P(\text{-|no AIDS}) = 0.97
 $$
 Quindi se il test è positivo, la probabilità di avere davvero preso l'AIDS è:
 $$
  P(\text{AIDS|+}) = \frac{P(\text{+|AIDS}) \cdot P(\text{AIDS})}
  {P(\text{+|AIDS}) \cdot P(\text{AIDS}) + P(\text{+|no AIDS}) \cdot
  P(\text{no AIDS})} = 0.032
 $$
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ### Approccio bayesiano con una teoria
 Cominciamo com un esempio: con un esperimento è stata misurata la massa di
 un elettrone e sono stati trovati i valori {$m_i$}, di cui il valore medio
 è $m_e = \SI{520 \pm 10}{KeV}$. Si assume quindi che il valore vero sia
 compreso tra \SI{510}{KeV} e \SI{530}{KeV} con una probabilità del 68% data
 dal confidence level. Per cui, la probabilità che la massa vera sia proprio
 \SI{520}{KeV} è data da:
 $$
  P(m_e|{m_i}) = \frac{}{}
 $$
 \textcolor{red}{**rivedere negli appunti questa formula**}
 Quando si fa una misurazione, si misurano $N$ valori e si calcolano il valore
 medio $\bar{x}$ e la deviazione standard $\sigma$ e si dice che il risultato è:
 $$
  \mu = \bar{x} \pm \frac{\sigma}{\sqrt{N}}
 $$
 e di solito lo si interpreta dicendo che:
 $$
  P \left( \bar{x} - \frac{\sigma}{\sqrt{N}} \leqslant \mu \leqslant
  \bar{x} + \frac{\sigma}{\sqrt{N}} \right) = 68\%
 $$
 ma in realtà quello che sappiamo è solo che:
 $$
  P \left( \mu - \frac{\sigma_{true}}{\sqrt{N}} \leqslant \bar{x} \leqslant
  \mu + \frac{\sigma_{true}}{\sqrt{N}} \right) = 68\%
 $$
 **\textcolor{red}{WHAT?!?}**
--- a/misc/lessons/sections/modulo-2-2.md
+++ b/misc/lessons/sections/modulo-2-2.md
@ -0,0 +1,480 @@
 # Statistica
 ## Distribuzioni di probabilità
 Una funzione di densità di probabilità $f$ è definita in modo che la probabilità
 che una variabile $x$ sia compresa tra $x$ e $x + dx$ sia data da:
 $$
  P(x \subset [x, x + dx]) = f(x)dx
 $$
 dunque vale che:
 $$
  \int\limits_{- \infty}^{+ \infty} dx f(x) = 1
 $$
 Si definisce funzione cumulante:
 $$
  F(x) = \int\limits_{- \infty}^x dx' f(x')
 $$
 e quantile di ordine $\alpha$ il valore di $x$ per cui $F(x) = \alpha$.  
 Nel caso multidimensionale in cui si abbiano due o più variabili, si parla di
 joint pdf:
 $$
  f(x, y) \hspace{30pt} \Longrightarrow \hspace{30pt} 
  \int\limits_{- \infty}^{+ \infty} \int\limits_{- \infty}^{+ \infty}
  dx dy f(x, y) = 1
 $$
 e si definiscono due distribuzioni marginali:
 $$
  f_x (x) = \int\limits_{- \infty}^{+ \infty} dy f(x, y)
  \hspace{50pt}
  f_y (y) = \int\limits_{- \infty}^{+ \infty} dx f(y, x)
 $$
 dunque due variabili $x$ e $y$ sono indipendenti se $f(x) = f_x(x) \cdot
 f_y(y)$. Ora, se $A$ è l'evento di probabilità $f_x(x)dx$, mentre $B$ ha
 probabilità $f_y(y)dy$, allora si possono definire le pdf condizionali come
 segue:
 $$
  P(B|A) = \frac{P (A \cap B)}{P(A)} = \frac{f(x, y)dxdy}{f_x(x)dx}
  \hspace{20pt} \Longrightarrow \hspace{20pt} h(y|x) = \frac{f(x, y)}{f_x(x)}
 $$
 per cui il teorema di Bayes diventa:
 $$
  g(x|y) = \frac{h(y|x)f_x(x)}{f_y(y)}
 $$
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ### Paradosso di Borel-Kolmogorov
 Si considerino dei punti distribuiti uniformemente sulla superficie del pianeta
 Terra: ci si aspetterebbe che i punti siano uniformemente distribuiti anche
 lungo un parallelo o un meridiano... ma consideriamo un meridiano: esso giace
 per il 25% a nord del 45'esimo parallelo e quindi, secondo la logica di prima,
 anche il 25% dei punti che si trovano su di esso. Però non è vero che il 45%
 della superficie terrestre è al di sopra del 45'esimo parallelo!  
 Il paradosso è risolto perché non ci si può basare su un insieme di misura
 nulla quale il meridiano (perché è unidimensionale). Lo si vede chiaramente
 adottando la terminologia poc'anzi introdotta:  
 Se la distribuzione è uniforme, la probabilità di trovare un punto in una
 certa superficie è dato dal rapporto tra l'angolo solido descritto da tale
 superficie e l'angolo solido totale:
 $$
  f(\theta, \phi) d\theta d\phi= \frac{d\phi d\theta \cos(\theta)}{4 \pi}
 $$
 da cui è possibile determinare la due probabilità marginali:
 $$
  f_{\phi}(\phi) = \int\limits_{0}^{\pi} f(\theta, \phi) d\theta =
  \int\limits_{0}^{\pi} \frac{\cos(\theta)}{4 \pi} = \frac{\cos(\theta)}{2}
 $$
 $$
  f_{\theta}(\theta) = \int\limits_{0}^{2 \pi} f(\theta, \phi) d\phi =
  \frac{1}{2 \pi}
 $$
 per cui si tratta di due costanti rispetto alle rispettive variabili. Da ciò
 si può dunque dedurre che, mentre la densità lungo un parallelo è
 effettivamente costante, lo stesso non si può dire riguardo a un meridiano.
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 Una funzione di una variabile casuale è essa stessa una variabile casuale.
 Consideriamo la pdf $f(x)$ e una funzione $a(x)$ di cui si vuole trovare la pdf
 $g(a)$. Nel caso in cui l'inversa di $a(x)$ sia univoca, definita $dS$ la
 regione delle $x$ per cui $a \subset [a, a +da]$:
 $$
  g(a)da = \int\limits_{dS} dxf(x)
  = \left| \int\limits_{x(a)}^{x(a +da)} f(x')dx' \right|
  = \int\limits_{x(a) +  \left| \frac{dx}{da} \right| da}^{x(a +da)} f(x')dx'
 $$
 Ovvero:
 $$
  g(a) = f(x(a)) \left| \frac{dx}{da} \right|
 $$
 e se $x(a)$ non è univoca, allora bisogna considerare tutti gli intervalli $dS$
 di $dx$ che corrispondono a $da$.  
 Nel caso di funzioni di $N$ variabili, siccome vale che:
 $$
  g(a')da' = \int \dots \int\limits_{dS} f(x_1 \dots x_N) dx_1 \dots dx_N
 $$
 con $dS$ regione dello spazio delle $x$ compreso tra le isosuperfici:
 $$
  a(\vec{x}) = a' \hspace{10pt} \wedge \hspace{10pt} a(\vec{x}) = a' + da'
 $$
 Nel caso in cui $z = x \cdot y$, si trova la convoluzione di Mellin:
 $$
  g(z)dz = \int\limits_{dS} dxdy f(x, y)
         = \int\limits_{-\infty}^{+\infty} dx
            \int\limits_{\frac{z}{x}}^{\frac{z + dz}{x}} dy f(x, y)
 $$
 **\textcolor{red}{Non ho capito questa parte...}**
 ## Propagazione degli errori
 Consideriamo una variabile $x$ con pdf $f(x)$. Si definisce valore di
 aspettazione o media (e lo si indica spesso con $\mu$):
 $$
  E[x] = \int dx f(x) x
 $$
 Nel caso di una variabile $y(x)$ con pdf $g(x)$, invece:
 $$
  E[y] = \int dy \cdot y \cdot g(y) = \int dx f(x) g(x)
 $$
 Mentre si definisce varianza (e la si indica spesso con $\sigma^2$, mentre
 con deviazione standard si intende $\sigma$):
 $$
  V[x] = E[x - E[x]^2] = E[x^2] - \mu^2
 $$
 Più in generale si definiscono 'momenti algebrici' $E[x^n] =\mu'_n$ con
 $\mu'_1 = \mu$ e 'momenti centrali' $E[(x -\mu)^n] = \mu_n$ con $\mu_2 =
 \sigma^2$.  
 Si definiscono inoltre due grandezze di correlazione. La covarianza:
 $$
  \text{cov} [x, y] = E[xy] - E[x]E[y] = E[xy] - \mu_x \mu_y
 $$
 che equivale a:
 \begin{align*}
  \text{cov} [x, y]
  &= E[(x -\mu_x)(y -\mu_y)]                       \\
  &= E[xy -x\mu_y -y\mu_x + \mu_x\mu_Y]            \\
  &= E[xy] -\mu_y E[x] -\mu_x E[y] + \mu_x \mu_y   \\
  &= E[xy] -\mu_y mu_x - \mu_x \mu_y + \mu_x \mu_y \\
  &= E[xy] - \mu_x \mu_y
 \end{align*}
 Notare che se $x$ e $y$ sono indipendenti, allora $f(x, y) = f_x(x)f_y(y)$,
 perciò:
 $$
  E[xy] = \int dx \int dy xy f(x, y) = \mu_x \mu_y
  \hspace{20pt} \Longrightarrow \hspace{20pt} \text{cov} [x, y] = 0
 $$
 e il coefficiente di correlazione:
 $$
  \rho_{xy} = \frac{\text{cov} [xy]}{\sigma_x \sigma_y}
 $$
 ![Esempio di correlazione tra due
 grandezze.](images/correlazione.png){width=70%}
 Anche se la $f(\vec{x})$ non è completamente nota, è comunque possibile stimare
 il valore medio e la varianza di una grandezza $y(\vec{x})$ conoscendo solo le
 stime di media e varianza della pdf. Espandiamo attraverso la serie di
 Taylor:
 $$
  y(\vec{x}) = y(\vec{\mu}) + \sum_{i= 1}^N \left[
  \frac{\partial y}{\partial x_i} \right]_{\vec{x}
  = \vec{\mu}} (x_i - \mu_i)
 $$
 $$
  \Longrightarrow \hspace{20pt} E[y] = y(\vec{\mu})
  \Longleftarrow \hspace{20pt} E[x_i] = \mu_i
 $$
 Mentre per la varianza servono $E[y^2]$ ed $E[y]$. Sempre passando
 attraverso uno sviluppo di Taylor attorno al valore medio:
 \begin{align*}
  E[y^2] &= y^2(\vec{\mu}) + 2y(\vec{\mu}) \sum_{i = 1}^{N}
  \left[ \frac{\partial y} {\partial x_i} \right]_{\vec{x} = \vec{\mu}}
  E[x_i - \mu_i] \\
  &+ E \left[ \left( \sum_{i_1}^N
  \left[ \frac{\partial y}{\partial x_i} \right]_{\vec{x} = \vec{\mu}}
  (x_i - \mu_i) \right) \left( \sum_{j = 1}^N \left[ \frac{\partial y}
  {\partial x_i} \right]_{\vec{x} = \vec{\mu}} (x_j - \mu_j) \right) \right]
 \end{align*}
 Siccome il secondo termine si annulla sempre perché $E[x_i] = \mu_i$, allora
 rimane che:
 $$
  V[y] = E[y^2] - E[y]^2 =  \sigma_y^2 = \sum_{i,j = 1}^N \left[
  \frac{\partial y}{\partial x_i}
  \frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij}
 $$
 Con $V_{ij}$ che è la matrice di covarianza, che ha come entrate:
 $$
  V_{ij} = E[(x_i - \mu_i)(x_j - \mu_j)] = \rho_{ij} \sigma_i \sigma_j
 $$
 e quindi, nel caso in cui le variabili siano scorrelate, si ottiene che:
 $$
  V_{ij} = \sigma_i^2 \delta_{ij}
  \hspace{20pt} \Longrightarrow \hspace{20pt}
  \sigma_y^2 = \sum_{i = 1}^N \left[ \frac{\partial y}{\partial x_i}
  \right]_{\vec{x} = \vec{\mu}}^2 \sigma_i^2
 $$
 Cioè dice quanto cambia la $y$ al variare del 'dato iniziale' $\vec{x}$.
 Ma quindi, per quanto visto prima:
 $$
  \text{cov} [x_i, x_j] = E[(x_i - \mu_i)(x_j - \mu_j)] = V_{ij}
 $$
 Più in generale, date $\vec{y}$ variabili dipendenti da $\vec{x}$, vale che:
 $$
  U = AVA^T \hspace{30pt} \text{con} \hspace{30pt} A_{ij} = \left[
  \frac{\partial y_i}{\partial x_j} \right]_{\vec{x} = \vec{\mu}}
  \hspace{30pt} \text{e con} \hspace{30pt} U_{kl} = \text{cov}[y_k, y_l]
 $$
 dove $U$ è detta matrice di covarianza delle $y$.  
 Attenzione: quanto detto fin'ora, che descrive in che modo gli errori di
 $\vec{x}$ influenzano $y$, vale solo nel caso in cui $y$ sia lineare nelle $x$.
 Quindi, in casi come $y(x) = 1/x$, non si può fare questo discorso.
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ### Esempio
 Consideriamo:
 $$
  y = x_1 - x_2
 $$
 $$
 \text{con} \hspace{30pt} \mu_1 = \mu_2 = 10 \hspace{30pt} \wedge
 \hspace{30pt} \sigma_1 = \sigma_2 = 1
 $$
 allora abbiamo che $y = y(x_1, x_2)$, quindi:
 $$
  E[y] = y(\mu_1, \mu_2) = 10 - 10 = 0
 $$
 $$
  V[y] = \sum_{i, j = 1}^2 \left[ \frac{\partial y}{\partial x_i}
  \frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij} =
  1 \cdot V_{11} + 1 \cdot V_{22} -1 \cdot 2 \cdot V_{12}
 $$
 Se le correlazioni sono nulle, allora $V_{12} = 0 \Longrightarrow V[y] = 2
 \Longrightarrow \sigma_y = 1.4$, se invece $x_1$ e $x_2$ sono correlate, nel
 caso in cui il coefficiente di correlazione sia unitario si ha che $V[y] =
 0$. Quindi la correlazione può cambiare di molto le cose.
 \begin{tikzpicture}
  \draw [thick, pink] (0,0) -- (1,0);
  \draw [red] (1.5,0) circle [radius=0.1];
  \draw [thick, pink] (2,0) -- (3,0);
  \draw [red] (3.5,0) circle [radius=0.1];
  \draw [thick, pink] (4,0) -- (5,0);
  \draw [red] (5.5,0) circle [radius=0.1];
  \draw [thick, pink] (6,0) -- (7,0);
  \draw [red] (7.5,0) circle [radius=0.1];
  \draw [thick, pink] (8,0) -- (9,0);
  \draw [red] (9.5,0) circle [radius=0.1];
  \draw [thick, pink] (10,0) -- (11,0);
  \draw [red] (11.5,0) circle [radius=0.1];
  \draw [thick, pink] (12,0) -- (13,0);
  \draw [red] (13.5,0) circle [radius=0.1];
  \draw [thick, pink] (14,0) -- (15,0);
 \end{tikzpicture}
 ### Errori sistematici
 Consideriamo due grandezze $x_1$ e $x_2$ con un errore sistematico in comune
 $S$:
 \begin{align*}
  &x_1 = x_{1_0} + x_{1_s} \\
  &x_2 = x_{2_0} + x_{2_s}
 \end{align*}
 si avrà che i termini con pedice $0$ sono indipendenti tra loro, mentre gli
 altri due saranno correlati. Dato che gli errori si sommano in quadratura, la
 matrice di covarianza sarà quindi:
 $$
 \text{cov}[x_1, x_2] = S^2 \hspace{30pt} \Longrightarrow \hspace{30pt}
 V = \begin{pmatrix}
 \sigma_1^2 + S^2 & S^2\\
 S^2 & \sigma_2^2 + S^2
 \end{pmatrix}
 $$
 perché:
 \begin{align*}
  \text{cov}[x_1, x_2] &= E[x_1 x_2] - E[x_1]E[x_2] = \\
  &= E[(x_{1_0} + x_{1_s})(x_{2_0} + x_{2_s})] - E[x_{1_0} + x_{1_s}]
  E[x_{2_0} + x_{2_s}] =  \\
  &= E[x_{1_0}x_{2_0}] + E[x_{1_0}x_{2_s}] + E[x_{1_s}x_{2_0}]
  + E[x_{1_s}x_{2_s}] +   \\
  &\hspace{13pt} - E[x_{1_0}]E[x_{2_0}] - E[x_{1_0}]E[x_{2_2}]
  - E[x_{1_s}]E[x_{2_0}] - E[x_{1_s}]E[x_{2_s}] = \\
  &= \mu_1 \mu_2 + \mu_1E[x_{2_s}] + E[x_{1_s}]\mu_2 +E[x_{1_s}x_{2_s}] + \\
  &\hspace{13pt} - \mu_1 \mu_2 - \mu_1 E[x_{2_s}] - E[x_{1_s}] \mu_2
  - E[x_{1_s}] E[x_{2_s}] = \\
  &= E[x_{1_s} x_{2_s}] - E[x_{1_s}] E[x_{2_s}] = \text{cov}[x_{1_s}, x_{2_s}]
 \end{align*}
 ### Trasformazione ortogonale
 Può tornare utile fare un cambio di variabile che permetta di ottenere una
 matrice di covarianza delle $y$ diagonale.  
 Consideriamo le solite variabili $Y_i$ legate linearmente alle $x_j$:
 $$
  y_i = \sum_j = A^i_j x_j
  \hspace{30pt} \Longrightarrow \hspace{30pt}
  U_{ij} = \sum_{k,l} A_{ik} V_{kl} A^T_{lj}
 $$
 Si tratta quindi di diagonalizzare la matrice $U$: la soluzione è semplice,
 la matrice $A$ è quella formata dagli autovalori di $V$. Questo concetto è
 utile nel caso della scelta delle coordinate da utilizzare.  
 Se immaginiamo di star utilizzando le coordinate polari $\vec{x} = (x, y)$, la
 matrice di covarianza sarà:
 $$
 V = \begin{pmatrix}
 \sigma_1^2 & \rho \sigma_1 \sigma_2 \\
 \rho \sigma_1 \sigma_2 & \sigma_2^2
 \end{pmatrix}
 $$
 Diagonaliziamola: prima di tutto troviamo gli autovalori della matrice $V -
 \lambda I$:
 $$
  \begin{vmatrix}
  \sigma_1^2  - \lambda & \rho \sigma_1 \sigma_2 \\
  \rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda
  \end{vmatrix}
  = (\sigma_1^2 - \lambda) (\sigma_2^2 - \lambda) - \rho^2 \sigma_1^2
  \sigma_2^2 = 0
 $$
 \begin{align*}
  &\Longrightarrow \hspace{30pt}
  \lambda^2 -(\sigma_1^2 + \sigma_2^2) \lambda + \sigma_1^2 \sigma_2^2
  (1 - \rho^2) = 0 \\
  &\Longrightarrow \hspace{30pt}
  \lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm
  \sqrt{\sigma_1^4 + \sigma_2^4 +2 \sigma_1^2 \sigma_2^2
  - 4 \sigma_1^2 \sigma_2^2 + 4\rho^2 \sigma_1^2 \sigma_2^2}}{2} = \\
  &\Longrightarrow \hspace{30pt}
  \lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm
  \sqrt{(\sigma_1^2 - \sigma_2^2)^2 +4 \rho^2 \sigma_1^2 \sigma_2^2}}{2}
 \end{align*}
 e ora calcoliamo gli autovettori:
 $$
  (V - \lambda I)\vec{r} = 0
  \hspace{30pt} \Longrightarrow \hspace{30pt}
  \begin{pmatrix}
  \sigma_1^2  - \lambda & \rho \sigma_1 \sigma_2 \\
  \rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda
  \end{pmatrix}
  \begin{pmatrix}
  r_1 \\
  r_2
  \end{pmatrix}
  = 
  \begin{pmatrix}
  r_1 \\
  r_2
  \end{pmatrix}
 $$
 $$
  \Longrightarrow \hspace{30pt}
  \begin{cases}
  (\sigma_1^2 - \lambda)r_1 + \rho \sigma_1 \sigma_2 r_2 = r_1 \\
  \rho \sigma_1 \sigma_2 r_1 + (\sigma_2^2 - \lambda)r_2 = r_2
  \end{cases}
  \hspace{30pt} \Longrightarrow \hspace{30pt}
  r_1 = \frac{}{} r_2
 $$
 eccetera eccetera...
--- a/misc/lessons/sections/modulo-2-3.md
+++ b/misc/lessons/sections/modulo-2-3.md
@ -0,0 +1,466 @@
 Distribuzioni di probabilità
 ## Distribuzione binomiale
 ![Distribuzione binomiale: p = 0.3, n = 10,
 N = 1000.](images/binomiale.png)
 Si considerino $N$ tentativi di un esperimento che può avere come esiti
 soltanto successo o fallimento e che la probabilità di ogni successo sia $p$.
 Definiamo $n$ il numero dei successi. Dunque la probabilità di ottenere $n$
 successi su $N$ tentativi totali è data da:
 $$
  P(N, n, p) = \binom{N}{n} p^n (1 - p)^{N -n}
 $$
 perché:
 - la probabilità che un successo si verifichi è $p$;
 - la probabilità che $k$ successi si verifichino è data dal prodotto di tutte
  le probabilità: $p^n$;
 - lo stesso discorso vale per gli insuccessi: ognuno ha probabilità $(1_p)$ e
  se ne verificano $N -n$;
 - il termine binomiale rappresenta tutte le possibili permutazioni: il
  concetto è semplice se si immagina di posizionare successi e fallimenti
  all'interno di una griglia con $N$ possibili posizioni: un successo è un
  pallino bianco e un fallimento è un pallino nero. In quanti posti posso
  mettere il primo successo? $N$. E il secondo? $N -1$. E il terzo? E così
  via, finché ho messo tutti i successi, che occupano $n$ posizioni, l'ultima
  delle quali è stata scelta tra $N - (n -1)$ posizioni, per cui:
  $$
    N \cdot (N -1) \dots (N -n + 1) =
    \frac{N \cdot (N -1) \dots (N -n + 1) \cdot 2 \cdot 1}
    {(N -n) \cdot (N - n -1) \dots 2 \cdot 1} = \frac{N!}{(N -n)!}
  $$
  ma non bisogna considerare che poi tutte le posizioni delle palline nere
  sono uguali, quindi va ulteriormente diviso per $n!$ per le stesse ragioni.
  Da cui:
  $$
  \binom{N}{n} = \frac{N!}{n! (N -n)!}
  $$
 Per la normalizzazione, vale che:
 $$
  \sum_{n = 0}^N P(N, n, p) = 1
 $$
 Possiamo definire un valore di aspettazione e una varianza:
 \begin{align*}
  &E[n] = \sum_{n = 0}^N n P(N, n, p) = Np \\
  &V[n] = E[n^2] - E[n]^2 = Np(1 -p)
 \end{align*}
 ## Distribuzione multinomiale
 È la generalizzazione della pdf precedente nel caso in cui ci siano $m$
 possibili risultati, ciascuno con una probabilità $P_m$ di verificarsi. Per
 esempio, è il caso di un istogramma riguardo al quale ci si domanda quale sia la
 probabilità di trovarlo esattamente con quelle specifiche entrate.  
 La probabilità è conseguentemente data da:
 $$
  P(N, \vec{n}, \vec{p}) = \frac{N!}{n_1! n_2! \dots n_m!}
  p_1^{n_1} p_2^{n_2} \dots p_m^{n_m}
 $$
 E come valore di aspettazione e deviazione standard si ottiene che:
 \begin{align*}
  &E[n_i] = Np_i \\
  &V[n_i] = Np_i (1 -p_i)
 \end{align*}
 ### Legge dei grandi numeri
 La legge dei grandi numeri afferma che la media sperimentale di una variabile
 $x$, per un numero di tentativi $N$ che tende all'infinito, si avvicina molto
 alla media vera. Questa legge può essere utilizzata per stimare le probabilità
 $P_i$ di una distribuzione muiltinomiale tramite le frequenze con cui i diversi
 eventi si verificano.  
 Si consideri la frequenza $f_j$ con cui l'evento j-esimo si verifica, dato un
 set di $N$ tentativi:
 $$
  f_j = \frac{1}{N} \sum_{i = 1}^N x_i = \frac{x_j}{N}
 $$
 dove $x_i$ è una variabile che vale 1 se l'evento j-esimo si è verificato e
 vale 0 quando se ne è verificato un altro e $x_j$ è quindi il numero di volte
 che l'evento j-esimo si è verificato.  
 A differenza di $P_j$, $f_j$ è una variabile casuale perché dipende da $x_j$
 che è la somma di variabili casuali. Definiamo valore medio:
 $$
  E(f_j) = \frac{E(x_j)}{N} = P_j
 $$
 e calcoliamo la varianza:
 $$
  V \left[ f_j \right] = V \left[ \frac{x_j}{N} \right] =
  E \left[ \frac{x_j^2}{N^2} \right] - \left( E \left[ \frac{x_j}{N}
  \right] \right)^2 = \frac{1}{N^2} V \left[ x_j \right]
 $$
 ora, $x_j$ è esattamente $n_j$ della multinomiale, perciò:
 $$
  V[x_j] = NP_j (1 - P_j)
  \hspace{30pt} \Longrightarrow \hspace{30pt}
  V[f_j] = \frac{1}{N} P_j (1 - P_j) \leqslant \frac{1}{N}
 $$
 ## Distribuzione di Poisson
 ![Distribuzione Poissoniana: $\nu$ = 1,
 N = 1000.](images/poisson.png)
 Se si considera la distribuzione binomiale e ci si pone nel limite in cui il
 numero di tentativi ripetuti tenda all'infinito e che la probabilità di
 successo tenda a zero (con il vincolo che $N \cdot p = cost = \nu$), si ottiene
 la distribuzione di Poisson:
 $$
  P(N, n, \nu) = \frac{\nu^n}{n!} e^{-\nu}
 $$
 con:
 \begin{align*}
  &E[n] = \nu \\
  &V[n] = \nu
 \end{align*}
 dove $\nu = NP = \frac{}{}$ numero medio di successi.  
 Quando $N$ è talmente grande da non essere definito (come nel caso in cui si
 osservino i decadimenti di un atomo e dunque i tentativi sono le osservazioni,
 che sono dunque continue), non è più possibile definire una probabilità di
 successo per ogni evento (perché sarebbe nulla, da cui il motivo per cui
 la Poissoniana è definita con questi due limiti), e quindi $\nu$ va definita in
 un altro modo. Infatti la distribuzione di Poisson, trattandosi di un limite in
 $N$ e $p$, non dipende più esplicitamente da queste due grandezze.  
 Nel caso in cui si osservi il decadimento di un atomo, si è soliti procedere in
 questo modo: si suddivide il tempo di osservazione in intervalli (il che
 significa aver suddiviso gli infiniti tentativi in sottoinsiemi di infiniti
 tentativi) e si misura quante volte in ognuno di questi intervalli si verifica
 un successo. L'esperimento è ora praticamente suddiviso in più esperimenti
 minori da cui è possibile dedurre un numero medio frequentistico di successi.
 Per esempio:
 ----------------------------------------------------------
 # successi     0      1     2     3    4    5   6     7
 ------------- ------ ----- ----- ---- ---- --- ----- -----
 # intervalli   1042   860   307   78   15   3   0     0
 Poisson        1064   823   318   82   16   2   0.3   0.3
 ----------------------------------------------------------
 Table: Decadimento di un atomo. Il tempo di osservazione è stato suddiviso in
 intervalli e per ogni intervallo è stato contato il numero di successi
 osservati.
 \newpage
 Il numero medio di eventi è:
 $$
  \frac{1042 \cdot 0 + 860 \cdot 1 + 307 \cdot 2 + 78 \cdot 3 + 15 \cdot 4
        + 3 \cdot 5 + 0 \cdot 6 + 0 \cdot 7}{1064 + 860 + 307 + 78 + 15 + 3
        + 0 + 0} = 0.77
 $$
 Da cui è possibile calcolare i valori sempre riportati nella tabella precedente.
 ## Distribuzione uniforme
 ![Distribuzione uniforme: $a = 0$, $b = 100$.](images/uniform.png)
 Una pdf di numeri che hanno tutti uguale probabilità di verificarsi è detta
 uniforme:
 $$
  P (n, a, b) = \begin{cases}
  \frac{1}{b - a} \hspace{30pt} a \leqslant x \leqslant b \\
  0 \hspace{42pt} \text{altrove}
  \end{cases}
 $$
 con:
 \begin{align*}
  E[n] = \frac{1}{2} (a + b) \\
  V[n] = \frac{1}{12} (a + b)^2
 \end{align*}
 Se una variabile è distribuita secondo una pdf $f(x)$, la sua cumulante è
 uniformemente distribuita. Intuitivamente è semplice perché basta vederla in
 questo modo: si immagini il grafico della pdf; ogni volta che si estrae un
 numero, questo cadrà in un punto casuale nell'area al di sotto della pdf,
 lasciando uno spazio casuale alla sua sinistra (che è il valore della
 cumulante)
 ## Distribuzione Gaussiana e CLT
 ![Distribuzione Gaussiana: $\mu = 30$,
 $\sigma = 5$.](images/gaussian.png)
 La distribuzione Gaussiana (o normale) è definita come:
 $$
  P (x, \mu, \sigma) = \frac{1}{\sqrt{2 \pi} \sigma}
  e^{\frac{(x - \mu)^2}{2 \sigma^2}}
 $$
 con:
 \begin{align*}
  E[x] = \mu \\
  V[x] = \sigma^2
 \end{align*}
 La error function, che è la cumulativa di questa pdf, è molto utile in
 laboratorio e i suoi valori sono tabulati.  
 Il teorema centrale del limite afferma che date $n$ variabili casuali
 indipendenti distribuite con una pdf comune e varianze $\sigma_i^2$, nel
 limite in qui $n \rightarrow + \infty$, la somma di queste variabili segue un
 andamento gaussiano con valore medio la somma dei valori medi e varianza la
 somma delle varianze.  
 Ciò può essere sfruttato per generare numeri casuali distribuiti secondo una
 distribuzione normale.
 Per grandi valori di $\mu$ (vale a dire qualche unità), la distribuzione di
 Poisson tende a quella Gaussiana con $\mu = \nu$ e $\sigma = \sqrt{\nu}$.
 Analogamente per $N \rightarrow + \infty$ la binomiale tende alla Gaussiana
 con $\mu = Np$ e $\sigma = \sqrt{Np (1 - p)}$.
 ## Distribuzione Gaussiana multivariata
 Nel caso multidimensionale, la pdf per il vettore $\vec{x} = {x_1 ... n_n}$ è
 data da:
 $$
  f(\vec{x}, \vec{\mu}, V) = \frac{1}{(2 \pi)^{N/2} \mid V \mid^{1/2}}
  \exp \left[ - \frac{1}{2} (\vec{x} - \vec{\mu})^t V^{-1} (\vec{x}
  - \vec{\mu}) \right]
 $$
 con $E[x_i] = \mu_i$ e $\text{cov}[x_i, x_j] = V_{ij}$
 ## Media pesata
 Quando si hanno misure con diversi errori, vanno combinate attraverso il
 concetto di media pesata:
 $$
  E[x] = \frac{\sum_{i = 1}^N \frac{x_i}{\sigma_i^2}}{\sum_{i = 1}^N
  \frac{1}{\sigma_i^2}}
 $$
 $$
  V[x] = \frac{1}{\sum_{i = 1}^N \frac{1}{\sigma_i^2}}
 $$
 Ma non ha senso mediare valori che non sono compatibili!
 ## Distribuzione di Breit-Wigner
 ![Distribuzione di Breit-Wigner: $x_0 = 20$,
 $\Gamma = 10$.](images/wigner.png)
 Esistono alcune distribuzioni che hanno momenti non ben definiti e che per
 questo si dicono "patologiche". Un esempio è la distribuzione di Breit-Wigner:
 $$
  f (x, \Gamma, x_o) = \frac{1}{\pi} \cdot
  \frac{\Gamma/2}{\Gamma^2/4 + (x - x_0)^2}
 $$
 Un caso particolare è quello in cui $x_0 = 0$ e $\Gamma = 2$, caso in cui è
 detta distribuzione di Cauchy:
 $$
  f(x, 2, 0) = f(x) = \frac{1}{\pi} \cdot \frac{1}{1 + x^2}
 $$
 Il valore medio e la varianza non sono definiti perché l'integrale è
 divergente. Conviene usare la moda e l'ampiezza a mezza altezza, che sono
 rispettivamente $x_0$ e $\Gamma$.  
 Nella libreria *GSL*, la pdf è scritta in questo modo:
 $$
  p(x) = \frac{1}{a \pi (1 + (x/a))^2}
  \hspace{50pt} \Longrightarrow \hspace{50pt}
  a = \Gamma/2
 $$
 ## Distribuzione di Landau
 Per una particella carica con $\beta = v/c$ che attraversa un materiale sottile
 di spessore $d$, la perdita di energia $\Delta$ segue la distribuzione di
 Landau:
 ![Distribuzione di Landau.](images/landau.png)
 Ha una forma complicatissima che racchiude integrali, logaritmi... Anche in
 questo caso non si possono definire i momenti algebrici perché l'integrale
 diverge.
 ## Distribuzione del chi-quadro
 ![Distribuzione del $\chi^2$: $n = 5$.](images/chi2.png)
 Date $N$ grandezze distribuite ciascuna con una propria distribuzione
 Gaussiana, la somma dei loro quadrati segue la distribuzione $\chi^2$.
 Formalmente è definita così:
 $$
  f(z, n) \frac{1}{2^{n/2} \Gamma (n/2)} z^{n/2 - 1}e^{-z/2}
 $$
 \begin{align*}
  &E[z] = n \\
  &V[z] = 2n
 \end{align*}
 dove $z$ è la variabile e $n$ è il numero di fradi di libertà.  
 Quando si fa un esperimento e si campiona $y(x)$ e poi si fittano i dati
 trovati con una funzione teorica $f(x)$, ciascun valore $y(x)$ si assume
 distribuito come una gaussiana attorno al suo valore vero, che assumiamo
 essere $f(x)$: dunque i residui, che sono la differenza $R(x)= y(x) - f(x)$,
 sono ancora una gaussiana, ma centrata in zero. Il chi quadro è definito come:
 $$
  \sum_i \frac{[y(x_i) - f(x_i)]^2}{f(x_i)}
  \hspace{50pt} \text{oppure} \hspace{50pt}
  \sum_i \frac{[y(x_i) - f(x_i)]^2}{\sigma_i^2}
 $$
 Ne consegue che il chi quadro segua appunto la distribuzione del chi quadro.
 Nella libreria *GSL* la distribuzione $\chi^2$ corrisponde alla distribuzione
 gamma con $a = n/2$ e $b = 2$.
 ## Distribuzione esponenziale
 ![Distribuzione del esponenziale: $\lambda = 3$.](images/exponential.png)
 $$
  f(x, \lambda) = \lambda e^{-\lambda x}
 $$
 \begin{align*}
  &E[z] = \frac{1}{\lambda} \\
  &V[z] = \frac{1}{\lambda^2}
 \end{align*}
 ## Distribuzione t di Student
 ![Distribuzione t di Student: $\nu = 3$.](images/student.png)
 È la distribuzione seguita dalla media di una popolazione gaussiana quando
 la si stima con un piccolo campione e senza conoscere la deviazione standard.
 Se $y_1$ è distribuita come una Gaussiana e $y_2$ come un $\chi^2$, se $\nu$
 sono i gradi di libertà, allora $x$ segue la t di Student:
 $$
  x = \frac{y_1}{\sqrt{\frac{y_2}{\nu}}}
 $$
 che è così definita:
 $$
  f(x, \nu) = \frac{\Gamma \left( \frac{\nu + 1}{2} \right)}{\sqrt{\nu \pi}
  \Gamma \left( \frac{\nu}{2} \right)} \left( 1 + \frac{x^2}{\nu}
  \right)^{- \frac{\nu + 1}{2}}
 $$
 \begin{align*}
  &E[z] = 0 \\
  &V[z] =
  \begin{cases}
    \frac{\nu}{\nu - 2} \hspace{15pt} \nu \greater 2 \\
    \infty \hspace{30pt} \nu \leqslant 2
  \end{cases}
 \end{align*}
 ## Distribuzione di Fischer-Snedecor
 ![Distribuzione di Fischer: $n = 3$, $m = 4$.](images/fischer.png)
 Se si hanno due campioni $\vec{x}$ e $\vec{y}$ di variabili che seguono le
 rispettive Gaussiane, si può usare la distribuzione di Fisher-Snedecor per
 comparare le due varianze. Se nel primo caso le variabili sono $n$ e nel
 secondo sono $m$, allora la distribuzione di Fisher con gradi di libertà
 $n-1$ e $m-1$ dà la distribuzione del rapporto:
 $$
  \frac{S^2_x / S^2_y}{\sigma^2_x / \sigma^2_y} =
  \frac{S^2_x / \sigma^2_x}{S^2_y / \sigma^2_y}
 $$
 con:
 $$
  S^2_x = \frac{1}{n -1} \sum_{i = 1}^n (x_i - \mu_i)^2
  \hspace{50pt} \text{,} \hspace{50pt}
  S^2_y = \frac{1}{m -1} \sum_{i = 1}^m (y_i - \mu_i)^2
 $$
 che quindi è il rapporto di due grandezze distribuite secondo il $chi^2$.  
 La definizione della pdf è complicata...
 ## Funzione caratteristica
 Si definisce funzione caratteristica di una variabile $x$ distribuita secondo
 una $f(x)$, la trasformata di Fourier di quest'ultima:
 $$
  \hat{f}(k) = E[e^{ikx}] = \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx}
 $$
 come per ogni trasformata, tutte le informazioni contenute nella funzione
 originaria sono contenute anche nella funzione caratteristica, perché per
 tornare alla prima è sufficiente calcolare la trasformata inversa:
 $$
  f(x) = \frac{1}{2 \pi} \int\limits_{-\infty}^{+\infty} dx \hat{f}(k) e^{-ikx}
 $$
 la funzione caratteristica è utile per semplificare alcuni conti. Se
 $x_1 \dots x_N$ sono variabili casuali indipendenti:
 \begin{align*}
  z = \sum_{i = 1}^N x_i
  \hspace{20pt} \Longrightarrow \hspace{20pt}
  \hat{f}_z(k) &= \int dx_1 \dots dx_N f_1(x_1)
  \dots f_N(x_N) e^{ik \sum_{i=1}^N x_i} = \\
  &= \int dx_1 f_1(x_1) e^{ikx_1} \dots \int dx_N f_N(x_N) e^{ikx_N} = \\
  &= \hat{f}_1(k) \dots \hat{f}_N(k)
 \end{align*}
 Inoltre vale anche che:
 $$
  \frac{d^m}{dk^m} \hat{f}(k) \big|_{k = 0} =
  \frac{d^m}{dk^m}
  \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} \big|_{k = 0} =
  i^m \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} x^m \big|_{k = 0} =
  i^m \mu_m = i^m E[x^m]
 $$
 che è il momento algebrico di ordine $m$.  
 Per esempio, nel caso di due variabili indipendenti $x$ e $y$ gaussiane, si
 può notare subito che la loro somma è una gaussiana con $\mu = \mu_x + \mu_y$ e
 $\sigma^2 = \sigma_x^2 + \sigma_y^2$. Analogamente per la Poissoniana.  
 Inoltre è facile osservare quale sia il comportamento delle pdf nei vari limiti
 che abbiamo visto in precedenza: se si manda $N \rightarrow \infty$ mantenendo
 il valore medio costante nella funzione caratteristica di una binomiale, si
 ottiene la funzione caratteristica di una Poissoniana. Anche il teorema
 centrale del limite si può dimostrare in questo modo.
--- a/misc/lessons/sections/modulo-3.md
+++ b/misc/lessons/sections/modulo-3.md
@ -0,0 +1,281 @@
 # BPH
 ## Statistica descrittiva
 Si possono distinguere due tipi di analisi dei dati: "model independent"
 (statistica descrittiva) e "model dependent", che si basano su un modello
 teorico. In questo capitolo studiamo quelli del primo tipo.  
 Alcuni argomenti tipici di statistica descrittiva sono:
 - test per stabilire se due datasets provengono dalla stessa distribuzione
  $f(x)$;
 - test per stabilire la correlazione tra due datasets (test di ipotesi);
 - metodi per determinare i momenti di una distribuzione;
 - metodi per lo smoothing dei dati sperimentali.
 ### Momenti di una distribuzione
 Definire i momenti di una distribuzione ha senso quando gli eventi che la
 costituiscono hanno la tendenza ad agglomerarsi attorno ad un valore centrale.
 Se i dati sono discreti, si usano le seguenti definizioni:
 Media campionaria: se $n_i$ è la frequenza con cui si presenta ciascun valore
 $x_j$:
 $$
  \bar{x} = \frac{1}{N} \sum_{i = 1}^N x_i
          = \frac{1}{N} \sum_{j = 1}^{N'} n_j x_j
 $$
 Momento centrale di ordine $r$:
 $$
  V_r = \frac{1}{N}   \sum_{j=1}^{N'} n_j^r (x_j - \bar{x})^r
 $$
 Esistono anche altri due valori "centrali", che nel caso continuo diventano:
 mediana:
 $$
  \int\limits_{-\infty}^{x_{\text{med}}} dx \, f(x) = \frac{1}{2}
 $$
 moda: valore per cui $f(x)$ è massima, ovvero valore che si ripete con
 maggiore frequenza.  
 Se la pdf ha code molto estese, è possibile che gli integrali non convergano
 e questi valori non siano definiti. Per questo motivo la mediana è uno
 stimatore del valore centrale più robusto della media.
 I momenti centrali definiscono il modo in cui i dati si distribuiscono attorno
 al valore centrale: quanto sono "diffusi". Il primo è la varianza (si noti la
 correzione di Bessel per cui $N \rightarrow N -1$ al denominatore):
 $$
  V = \frac{1}{N - 1} \sum_{i = 1}^N (x_i - \bar{x})^2
 $$
 La skewness (letteralmente "asimmetria") descrive quanto i valori siano
 distribuiti in modo disuniforme attorno al valore medio:
 $$
  \gamma = \frac{1}{\sigma^3} E[(x - \bar{x})^3]
 $$
 dove $\sigma$ è la deviazione standard.
 ![Skewness.](images/skewness.png){width=12cm}
 Quanto una pdf è più o meno piccata rispetto ad una gaussuana è dato dalla
 kurtosis ("curved", "arching"):
 $$
  K = \frac{1}{\sigma^4} E[(x -\bar{x})^4] -3
 $$
 ![Kurtosis.](images/kurtosis.png){width=8cm}
 Esiste una stima per le deviazioni standard di questi parametri nel caso di
 distribizioni circa gaussiane:
 \begin{align*}
  &V(\sigma^2) = \frac{2 \sigma^4}{N} \\
  &V(\gamma) \approx \frac{15}{N}     \\
  &V(K) \approx \frac{96}{N}
 \end{align*}
 ### Smoothing dei dati
 Lo smoothing dei dati si rende necessario quando i dati sono corrotti da un
 rumore casuale. Solitamente si attua una media su finestre che inglobano dati
 contigui. Fare una media, però, significa abbassare inevitabilmente il valore
 nei picchi, perché la maggior parte delle volte conservano l'area al di sotto
 del picco e la posizione, ma non l'altezza.  
 Uno dei più efficienti metodi di smoothing è il filtro di Savitsky-Golay.
 Il segnale viene analizzato a gruppi di punti incentrati ciascuno in $Y_i$, con
 $i$ che scorre su tutto l'array. Chiamiamo $y_0$ il punto centrale e $Y_N$ e
 $Y_{-N}$ gli estremi. $Y_0$ viene sostituito con un valore calcolato in un modo
 spiegato di seguito. Durante questo processo, i valori di $Y_i$ non vengono
 sostituiti con $f_i$, bensì si crea un array parallelo che sarà poi quello
 definitivo smoothato.  
 I valori di $Y_i$ si ottengono tramite un fit sui punti della finestra con
 un polinomio di grado arbitrario $g$: $P_g(j)$. Il polinomio viene poi
 valutato in zero e sostituito al valore di $y_0$.
 ### Test di ipotesi
 Supponiamo di voler dimostrare che una certa variabile casuale $x$ segua una
 pdf $f(x)$: questa è detta ipotesi nulla $H_0$. Se $f(x)$ non dipende da alcun
 parametro, si parla di ipotesi semplice, altrimenti di dice composta. Oltre
 alla ipotesi nulla si possono avere una o più ipotesi alternative $H_1$,
 $H_2$...  
 Consideriamo il semplice caso in cui abbiamo una sola ipotesi alternativa
 $H_1$ che proponga a sua volta una pdf. Per valutare l'accordo tra i dati e
 un'ipotesi nulla si costruisce una statistica di test $t(x)$, che è una
 variabile che dipende da $\vec{x}$ che definisco per determinare se l'ipotesi
 nulla sia vera oppure no (vedi $t_{\text{cut}}$ oppure la discrepanza...) e che
 segue a sua volta due pdf, una prevista da $H_0$ e una da $H_1$.
 \begin{center}
 \begin{tikzpicture}
  \draw [thick, ->] (0,0) -- (12,0);
  \draw [thick, ->] (0,0) -- (0,6);
  \node [left] at (0,6) {g(t)};
  \node [below] at (12,0) {t};
  \draw [thick, dashed] (6,0) -- (6,6);
  \node [below] at (6,0) {$t_{\text{cut}}$};
  \draw [thick, blue] (0,0) to [out = 20, in = 180] (3,5)
        to [out = 0, in = 180] (8,0);
  \draw [thick, red] (4,0) to [out = 20, in = 180] (7,3)
        to [out = 0, in = 180] (11,0);
  \node [blue] at (2.5, 2) {$g(t \, | \, H_0)$};
  \node [red] at (8, 1) {$g(t \, | \, H_1)$};
 \end{tikzpicture}
 \end{center}
 Si definisce 'significanza del criterio di test' $\alpha$ (mentre $(1 -
 \alpha)$ è il 'livello di confidenza del criterio di test', o 'efficienza'):
 $$
  \alpha = \int\limits_{t_{\text{cut}}}^{+ \infty} dt \, g(t \, | \, H_0)
 $$
 mentre $\beta$ è chiamato 'potenza del test' (mentre $(1 - \beta)$ è
 detto 'purezza'):
 $$
  \beta = \int\limits_{-\infty}^{t_{\text{cut}}} dt \, g(t \, | \, H_1)
 $$
 Si chiamano:
 - errore di prima specie: rigezione di $H_0$ qualora questa sia vera (con
  relativa probabilità $P_1$);
 - errore di seconda specie: accettazion di $H_0$ qualora questa sia falsa
  (con relativa probabilità $P_2$);
 Per $t < t_{\text{cut}}$ deciso arbitrariamente, imponiamo che l'ipotesi
 nulla sia verificata. Ne consgue che $\alpha = P_1$ e $\beta = P_2$.  
 La scelta migliore di $y_{\text{cut}}$ è quella che dà la massima purezza data
 una certa efficienza. Nel caso 1D lo si ottiene automaticamente (vedi esempio),
 altrimenti può essere complicato.
 Facciamo un esempio in cui applichiamo il lemma di Neyman-Pearson.  
 Immaginiamo di avere i valori $\vec{x} = (x_1 ... x_N)$ che appartengono ad
 una distribuzione normale la cui varianza $\sigma$ è nota e si deve distinguere
 tra due valori medi $\mu_0$ e $\mu_1$, cioé:
 $$
  H_0 = [\mu = \mu_0]
  \hspace{100pt}
  H_1 = [\mu = \mu_1]
 $$
 A questo punto le pdf previste da $H_0$ e $H_1$ sono due gaussiane centrate
 ciascuna nel proprio valore medio. Secondo il lemma di cui sopra, dobbiamo
 calcolare la Likelihood, che è la produttoria su tutte le misure effettuate
 $x_i$ della pdf prevista di un'ipotesi calcolata in $x_i$:
 $$
  L(\vec{x}, \mu, \sigma) = \frac{1}{(\sigma \sqrt{2 \pi})^N} \Pi_{i=1}^N
  N(x_i, \nu, \sigma)
 $$
 dove con $N$ si indica la distribuzione normale. Si tratta, cioè, della
 probabilità di avere ottenuto quelle misure secondo l'ipotesi considerata.
 Vorremo, quindi, che $L(H_0) >> L(H_1)$. A questo scopo si guarda $r$,
 parametro previsto dal lemma, che vale:
 $$
  r = \frac{(L(\vec{x}) \, | \, H_0)}{(L(\vec{x}) \, | \, H_1)}
  \hspace{30pt} \Longrightarrow \hspace{30pt}
  \ln{r} = \ln{L(\vec{x}, \mu_0, \sigma)} - \ln{L(\vec{x}, \mu_0, \sigma)}
 $$
 Che deve essere a sua volta molto grande. La regione in cui si deve accettare
 l'ipotesi nulla è infatti quella con $r > c$, dove $c$ deve ancora essere
 valutato.
 $$
  \ln{r} = R(\vec{x}) > \ln{c}
  \hspace{30pt} \Longrightarrow \hspace{30pt}
  \vec{x} > (\text{oppure} <) \, g(c) = t_{\text{cut}}
 $$
 Per scegliere $k$, si impone che:
 $$
  P_1 = \alpha = Pr(\vec{x} > (\text{oppure} <) \, t_{\text{cut}} \,
  | \, H_0) 
 $$
 Quindi ciò che può essere scelto arbitrariamente, alla fine dei conti, è
 $\alpha$, che solitamente si impone $= 5 \%$.
 ### Discriminante lineare di Fisher
 In che modo si possono definire $f(t \, | \, H_0)$ e $f(t \, | \, H_1)$? Si
 possono fare degli *ansatz* riguardo alla forma di $t$. Il modello di Fischer
 utilizza una funzione lineare:
 $$
  t = \sum_{i = 1}^N a_i x_i = \vec{a} \cdot \vec{x}
 $$
 dove il vettore $\vec{a}$ è da determinare. Definiamo l'insieme dei valori medi
 e delle "varianze" delle variabili misurate come segue: $\mu_{k, i}$ è il valore
 medio della variabile $i$-esima secondo l'ipotesi $k$-esima:
 $$
  \mu_{k,i} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N
  \, x_i f(\vec{x} \, | \, H_k)
 $$
 dove $k$ può quindi essere 0 o 1; mentre:
 $$
  (V_k)_{i,j} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N
  \, (x_i - \mu_{k,i})(x_j - \mu_{k,j}) f(\vec{x} \, | \, H_k)
 $$
 Si può dimostrare che, per funzioni
 gaussiane, la migliore statistica di test (ovvero che massimizza $1 - \beta$
 per un dato $a$) è quella per cui:
 $$
  \vec{a} = \frac{1}{w} (\vec{\nu}_0 - \vec{\nu}_1)
  \hspace{40pt} \text{con} \hspace{40pt}
  W_{i,j} = (V_0 + V_i)_{i,j}
 $$
 In genere si introduce anche un offset:
 $$
  t = a_0 + \sum_{i = 1}^N a_i x_i
 $$
 ### Reti neuronali
 Si può dimostrare che se si usa il discriminante lineare di Fisher, allora dati
 i dati $\vec{x}$, la probabilità che sia giusta $H_0$ è:
 $$
  P(H_0 | \vec{x}) = frac{1}{1 + e^{-t}}
 $$
 ![Logistic function.](images/logistic.png){width=6cm}
 che è la funzione logistica. Se le due pdf $f(\vec{x} | H_0)$ e $f(\vec{x} |
 H_1)$ non sono gaussiane, allora il discriminante lineare di Fisher non è più
 ottimale e si può generalizzare $t(\vec{x})$ con un caso speciale di Artificial
 Neural Network (ANN).  
 Supponiamo di prendere
 $$
  t(\vec{x}) = s_0 \left( a_0 \sum_{i = 1}^N a_i x_i \right)
 $$
 con $s$ detta funzione di attivazione e $a_0$ detta soglia. Siccome la sigmoide
 è monotona, questa ANN è equivalente ad un test lineare.
--- a/misc/pdfs.c
+++ b/misc/pdfs.c
@ -0,0 +1,58 @@
 // This program generates a collection of random numbers which follow the
 // distribution whose name is given in input by the user when calling the
 // program and outputs an histogram of the result. Maximum and minimum of
 // the range must also be given. The random number distribution functions
 // are taken from the GSL library at:
 // https://www.gnu.org/software/gsl/doc/html/randist.html
 #include <stdio.h>
 #include <string.h>
 #include <gsl/gsl_randist.h>
 #include <gsl/gsl_rng.h>
 #include <gsl/gsl_histogram.h>
 int main (int argc, char** argv)
  {
  // An histogram with 100 bins which range from 0 to 100 is set:
  size_t bins = 100;
  int low = atoi(argv[2]);
  int upp = atoi(argv[3]);
  gsl_histogram * histo = gsl_histogram_alloc(bins);
  gsl_histogram_set_ranges_uniform(histo, low, upp);
  // The random number generator is set:
  gsl_rng* gen = gsl_rng_alloc(gsl_rng_taus);
  // 100'000 random numbers are generated and put into the histogram:
  size_t N = 100000;
  for (size_t i = 0; i < N; i++)
    {
    if (strcmp(argv[1], "binomial") == 0)
      gsl_histogram_increment(histo, gsl_ran_binomial(gen, 0.3, 10));
    if (strcmp(argv[1], "poisson") == 0)
      gsl_histogram_increment(histo, gsl_ran_poisson(gen, 1));
    if (strcmp(argv[1], "uniform") == 0)
      gsl_histogram_increment(histo, gsl_rng_get(gen)*100);
    if (strcmp(argv[1], "gaussian") == 0)
      gsl_histogram_increment(histo, gsl_ran_gaussian(gen, 5)+30);
    if (strcmp(argv[1], "wigner") == 0)
      gsl_histogram_increment(histo, gsl_ran_cauchy(gen, 10)+20);
    if (strcmp(argv[1], "landau") == 0)
      gsl_histogram_increment(histo, gsl_ran_landau(gen));
    if (strcmp(argv[1], "chi2") == 0)
      gsl_histogram_increment(histo, gsl_ran_gamma(gen, 5, 2));
    if (strcmp(argv[1], "exponential") == 0)
      gsl_histogram_increment(histo, gsl_ran_exponential(gen, 5));
    if (strcmp(argv[1], "student") == 0)
      gsl_histogram_increment(histo, gsl_ran_tdist(gen, 3));
    if (strcmp(argv[1], "fischer") == 0)
      gsl_histogram_increment(histo, gsl_ran_fdist(gen, 3, 4));
    }
  // The histogram is given in output:
  gsl_histogram_fprintf (stdout, histo, "%g", "%g");
  // The memory is freed.
  gsl_rng_free(gen);
  gsl_histogram_free(histo);
  return 0;
  }
--- a/misc/plot
+++ b/misc/plot
@ -0,0 +1,15 @@
 #!/usr/bin/env python
 import matplotlib.pyplot as plt
 import numpy as np
 import sys
 lows, upps, values = np.loadtxt(sys.stdin, unpack=True,)
 dim = upps[1] - lows[1]
 values = values/np.sum((values)*dim)
 plt.bar(lows, align='edge', width=dim, height=values,
        color='#dbbf0d', edgecolor='#595856')
 plt.annotate("N = 100000\nbins=100", (15, 0.16))
 plt.ylabel("pdf(x)")
 plt.xlabel("x")
 plt.show()
--- a/notes/images/conti-ex6.jpg
+++ b/notes/images/conti-ex6.jpg
--- a/notes/images/dip.png
+++ b/notes/images/dip.png
--- a/notes/images/gamma-area.png
+++ b/notes/images/gamma-area.png
--- a/notes/images/landau-hist.png
+++ b/notes/images/landau-hist.png
--- a/notes/makefile
+++ b/notes/makefile
@ -0,0 +1,17 @@
 sections = $(shell find sections/ -name '*.md' | sort -V)
 pandoc = \
 	@ pandoc $(1)         \
 	--toc                 \
 	--standalone          \
 	--pdf-engine=xelatex  \
 	-F pandoc-crossref    \
 	-o $(2).pdf
 all: exercises.pdf
 clean:
 	rm exercises.pdf
 exercises.pdf: $(sections) images
 	$(call pandoc, $(sections), exercises)
--- a/notes/sections/0.md
+++ b/notes/sections/0.md
@ -0,0 +1,50 @@
 ---
 title: Statistical Analysis
 subtitle: Exercises
 numbersections: true
 documentclass: article
 fontsize: 12pt
 geometry:
  - width=150mm
  - top=20mm
  - bottom=30mm
 mainlang: english
 otherlang: russian
 header-includes: |
  ```{=latex}
  % start new page with each section
  \usepackage{etoolbox}
  \usepackage{tikz}
  \usepackage{amsmath}
  \usepackage{amssymb}
  \usepackage{siunitx}
  \pretocmd{\section}{\clearpage}{}{}
  % strict figure placement
  \usepackage{float}
  \floatplacement{figure}{H}
  % customer macros
  %% "with" in formulas
  \DeclareMathOperator*{\with}{%
    \hspace{30pt} \text{with} \hspace{30pt}  
  }
  %% "thus" in formulas
  \DeclareMathOperator*{\thus}{%
    \hspace{30pt} \Longrightarrow \hspace{30pt}  
  }
  %% "et" in formulas
  \DeclareMathOperator*{\et}{%
  \hspace{30pt} \wedge \hspace{30pt}
  }
  % configure captions
  \usepackage{caption}
  \captionsetup{font={footnotesize,it}}
  \captionsetup{width=11cm}
  \usepackage{stmaryrd}
  ```
 ---
--- a/notes/sections/1.md
+++ b/notes/sections/1.md
@ -0,0 +1,226 @@
 # Exercise 1
 ## Random numbers following the Landau distribution
 The Landau distribution is a probability density function which can be defined
 as follows:
 $$
  f(x) = \int \limits_{0}^{+ \infty} dt \, e^{-t log(t) -xt} \sin (\pi t)
 $$
 ![Landau distribution.](images/landau-small.pdf){width=50%}
 The GNU Scientific Library (GSL) provides a number of functions for generating
 random variates following tens of probability distributions. Thus, the function
 for generating numbers from the Landau distribution, namely `gsl_ran_landau()`,
 was used.  
 For the purpose of visualizing the resulting sample, the data was put into
 an histogram and plotted with matplotlib. The result is shown in @fig:landau.
 ![Example of N points generated with the `gsl_ran_landau()`
 function and plotted in a 100-bins histogram ranging from -10 to
 80.](images/landau.png){#fig:landau}
 ## Randomness testing of the generated sample
 ### Kolmogorov-Smirnov test
 In order to compare the sample with the Landau distribution, the
 Kolmogorov-Smirnov (KS) test was applied. This test statistically quantifies the
 distance between the cumulative distribution function of the Landau distribution
 and the one of the sample. The null hypothesis is that the sample was
 drawn from the reference distribution.  
 The KS statistic for a given cumulative distribution function $F(x)$ is:
 $$
  D_N = \text{sup}_x |F_N(x) - F(x)|
 $$
 where:
  - $x$ runs over the sample,
  - $F(x)$ is the Landau cumulative distribution and function 
  - $F_N(x)$ is the empirical cumulative distribution function of the sample.
 If $N$ numbers have been generated, for every point $x$,
 $F_N(x)$ is simply given by the number of points preceding the point (itself
 included) normalized by $N$, once the sample is sorted in ascending order.  
 $F(x)$ was computed numerically from the Landau distribution with a maximum
 relative error of $10^{-6}$, using the function `gsl_integration_qagiu()`,
 found in GSL.
 Under the null hypothesis, the distribution of $D_N$ is expected to
 asymptotically approach a Kolmogorov distribution:
 $$
  \sqrt{N}D_N \xrightarrow{N \rightarrow + \infty} K
 $$
 where $K$ is the Kolmogorov variable, with cumulative
 distribution function given by:
 $$
  P(K \leqslant K_0) = 1 - p = \frac{\sqrt{2 \pi}}{K_0}
  \sum_{j = 1}^{+ \infty} e^{-(2j - 1)^2 \pi^2 / 8 K_0^2}
 $$
 Plugging the observed value $\sqrt{N}D_N$ in $K_0$, the $p$-value can be
 computed. At 95% confidence level (which is the probability of confirming the
 null hypothesis when correct) the compatibility with the Landau distribution
 cannot be disproved if $p > α = 0.05$.  
 To approximate the series, the convergence was accelerated using the Levin
 $u$-transform with the `gsl_sum_levin_utrunc_accel()` function. The algorithm
 terminates when the difference between two successive extrapolations reaches a
 minimum.
 For $N = 1000$, the following results were obtained:
  - $D = 0.020$
  - $p = 0.79$
 Hence, the data was reasonably sampled from a Landau distribution.
 **Note**:
 Contrary to what one would expect, the $\chi^2$ test on a histogram is not very
 useful in this case. For the test to be significant, the data has to be binned
 such that at least several points fall in each bin. However, it can be seen
 (@fig:Landau) that many bins are empty both in the right and left side of the
 distribution, so it would be necessary to fit only the region where the points
 cluster or use very large bins in the others, making the $\chi^2$ test
 unpractical.
 ### Parameters comparison
 When a sample of points is generated in a given range, different tests can be
 applied in order to check whether they follow an even distribution or not. The
 idea which lies beneath most of them is to measure how far the parameters of
 the distribution are from the ones measured in the sample.  
 The same principle can be used to verify if the generated sample effectively
 follows the Landau distribution. Since it turns out to be a very pathological
 PDF, only two parameters can be easily checked: the mode and the
 full-width-half-maximum (FWHM).
 ![Landau distribution with emphatized mode and
  FWHM = ($x_+ - x_-$).](images/landau.pdf)
 \begin{figure}
 \hypertarget{fig:parameters}{%
 \begin{tikzpicture}[overlay]
  \begin{scope}[shift={(0,0.4)}]
  % Mode
  \draw [thick, dashed] (7.57,3.1) -- (7.57,8.55);
  \draw [thick, dashed] (1.9,8.55) -- (7.57,8.55);
  \node [above right] at (7.6,3.1)  {$m_e$};
  \node [below right] at (1.9,8.55) {$f(m_e)$};
  % FWHM
  \draw [thick, dashed] (1.9,5.95)  -- (9.05,5.95);
  \draw [thick, dashed] (6.85,5.83) -- (6.85,3.1);
  \draw [thick, dashed] (8.95,5.83)    -- (8.95,3.1);
  \node [below right] at (1.9,5.95) {$\frac{f(m_e)}{2}$};
  \node [above right] at (6.85,3.1) {$x_-$};
  \node [above right] at (8.95,3.1)  {$x_+$};
  \end{scope}
 \end{tikzpicture}
 }
 \end{figure}
 The mode of a set of data values is defined as the value that appears most
 often, namely: it is the maximum of the PDF. Since there is no way to find
 an analytic form for the mode of the Landau PDF, it was numerically estimated
 through a minimization method (found in GSL, called method 'golden section')
 with an arbitrary error of $10^{-2}$, obtaining:
  - expected mode $= m_e = \SI{-0.2227830 \pm 0.0000001}{}$
 The minimization algorithm begins with a bounded region known to contain a
 minimum. The region is described by a lower bound $x_\text{min}$ and an upper
 bound $x_\text{max}$, with an estimate of the location of the minimum $x_e$.
 The value of the function at $x_e$ must be less than the value of the function
 at the ends of the interval, in order to guarantee that a minimum is contained
 somewhere within the interval.
 $$
  f(x_\text{min}) > f(x_e) < f(x_\text{max})
 $$
 On each iteration the interval is divided in a golden section (using the ratio
 ($3 - \sqrt{5}/2 \approx 0.3819660$ and the value of the function at this new
 point $x'$ is calculated. If the new point is a better estimate of the minimum,
 namely is $f(x') < f(x_e)$, then the current estimate of the minimum is
 updated.  
 The new point allows the size of the bounded interval to be reduced, by choosing
 the most compact set of points which satisfies the constraint $f(a) > f(x') <
 f(b)$ between $f(x_\text{min})$, $f(x_\text{min})$ and $f(x_e)$. The interval is
 reduced until it encloses the true minimum to a desired tolerance.
 In the sample, on the other hand, once the data were binned, the mode can be
 estimated as the central value of the bin with maximum events and the error
 is the half width of the bins. In this case, with 40 bins between -20 and 20,
 the following result was obtained:
  - observed mode $= m_o = \SI{0 \pm 1}{}$
 In order to compare the values $m_e$ and $x_0$, the following compatibility
 t-test was applied:
 $$
  p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
  t = \frac{|m_e - m_o|}{\sqrt{\sigma_e^2 + \sigma_o^2}}
 $$
 where $\sigma_e$ and $\sigma_o$ are the absolute errors of $m_e$ and $m_o$
 respectively. At 95% confidence level, the values are compatible if $p > 0.05$.
 In this case:
 $$
  p = 0.82
 $$
 Thus, the observed value is compatible with the expected one.
 ---
 The same approach was taken as regards the FWHM. It is defined as the distance
 between the two points at which the function assumes half times the maximum
 value. Even in this case, there is not an analytic expression for it, thus it
 was computed numerically ad follow.  
 First, some definitions must be given:
 $$
  f_{\text{max}} := f(m_e) \et \text{FWHM} = x_{+} - x_{-} \with
  f(x_{\pm}) = \frac{f_{\text{max}}}{2}
 $$
 then the function $f'(x)$ was minimized using the same minimization method
 used for finding $m_e$, dividing the range into $[x_\text{min}, m_e]$ and
 $[m_e, x_\text{max}]$ (where $x_\text{min}$ and $x_\text{max}$ are the limits
 in which the points have been sampled) in order to be able to find both the
 minima of the function:
 $$
  f'(x) = |f(x) - \frac{f_{\text{max}}}{2}|
 $$
 resulting in:
 $$
  \text{expected FWHM} = w_e = \SI{4.0186457 \pm 0.0000001}{}
 $$
 On the other hand, the observed FWHM was computed as the difference between
 the center of the bins with the values closer to $\frac{f_{\text{max}}}{2}$
 and the error was taken as twice the width of the bins, obtaining:
 $$
  \text{observed FWHM} = w_o = \SI{4 \pm 2}{}
 $$
 This two values turn out to be compatible too, with:
 $$
  p = 0.99
 $$
--- a/notes/sections/2.md
+++ b/notes/sections/2.md
@ -0,0 +1,311 @@
 # Exercise 2
 ## Euler-Mascheroni constant
 The Euler-Mascheroni constant is defined as the limiting difference between the
 partial sums of the harmonic series and the natural logarithm:
 $$
  \gamma = \lim_{n \rightarrow +\infty} \left( \sum_{k=1}^{n} \frac{1}{k}
  - \ln(n) \right)
 $$ {#eq:gamma}
 and represents the limiting blue area in @fig:gamma. The first 30 digits of
 $\gamma$ are:
 $$
  \gamma = 0.57721\ 56649\ 01532\ 86060\ 65120\ 90082 \dots
 $$ {#eq:exact}
 In complex analysis, this constant is related to many functions and can be
 evaluated through a variety of identities. In this work, five methods were
 implemented and their results discussed. In fact, evaluating $\gamma$ with a
 limited precision due to floating points number representation entails limited
 precision on the estimation of $\gamma$ itself due to roundoff errors. All the
 known methods involve sums, subtractions or products of very big or small
 numbers, packed in series, partial sums or infinite products. Thus, the
 efficiency of the methods lies on how quickly they converge to their limit.
 ![The area of the blue region converges to the Euler–Mascheroni
 constant.](images/gamma-area.png){#fig:gamma width=7cm}
 ## Computing the constant
 ### Definition
 First, in order to have a quantitative idea of how hard it is to reach a good
 estimation of $\gamma$, it was naively computed using the definition given in
@eq:gamma. The difference was computed for increasing value of $n$, with
 $n_{i+1} = 10 \cdot n_i$ and $n_1 = 20$, till the approximation starts getting
 worse, namely:
 $$
  | \gamma(n_{i+1}) - \gamma | > | \gamma(n_i) - \gamma|
 $$
 and $\gamma (n_i)$ was selected as the result (see @tbl:1_results).
 -----------------------------------------------
 n           sum           $|\gamma(n)-\gamma|$
 ----------- ------------- ---------------------
 \SI{2e1}{}                  \SI{2.48e-02}{}
 \SI{2e2}{}                  \SI{2.50e-03}{}
 \SI{2e3}{}                  \SI{2.50e-04}{}
 \SI{2e4}{}                  \SI{2.50e-05}{}
 \SI{2e5}{}                  \SI{2.50e-06}{}
 \SI{2e6}{}                  \SI{2.50e-07}{}
 \SI{2e7}{}                  \SI{2.50e-08}{}
 \SI{2e8}{}                  \SI{2.50e-09}{}
 \SI{2e9}{}                  \SI{2.55e-10}{}
 \SI{2e10}{}                 \SI{2.42e-11}{}
 \SI{2e11}{}                 \SI{1.44e-08}{}
 -----------------------------------------------
 Table: Partial results using the definition of $\gamma$ with double
 precision. {#tbl:1_results}
 The convergence is logarithmic: to fix the first $d$ decimal places about
 $10^d$  terms are needed. The double precision runs out at the
 10\textsuperscript{th} place, $n=\SI{2e10}{}$.  
 Since all the number are given with double precision, there can be at best 15
 correct digits but only 10 were correctly computed: this means that when the
 terms of the series start being smaller than the smallest representable double,
 the sum of all the remaining terms give a number $\propto 10^{-11}$.
 --------- -----------------------
 true:	     0.57721\ 56649\ 01533
 approx:    0.57721\ 56648\ 77325
 diff:	     0.00000\ 00000\ 24207
 --------- -----------------------
 Table: First method results. {#tbl:first}
 ### Alternative formula
 As a first alternative, the constant was computed through the identity which
 relates $\gamma$ to the $\Gamma$ function as follow:
 $$
  \gamma = \lim_{M \rightarrow + \infty} \sum_{k  = 1}^{M}
  \binom{M}{k} \frac{(-1)^k}{k} \ln(\Gamma(k + 1))
 $$
 Varying $M$ from 1 to 100, the best result was obtained for $M = 41$ (see
@tbl:second). It went sour: the convergence is worse than using the definition
 itself. Only two places were correctly computed.
 --------- -----------------------
 true:	     0.57721\ 56649\ 01533
 approx:    0.57225\ 72410\ 34058
 diff:	     0.00495\ 84238\ 67473
 --------- -----------------------
 Table: Second method results. {#tbl:second}
 Here, the problem lies in the binomial term: computing the factorial of a
 number greater than 18 goes over 15 places and so cannot be correctly
 represented. Furthermore, the convergence (even if this is not a series
 and, consequently, it is not properly a "convergence") is slowed down by
 the logarithmic factor.
 ### Reciprocal $\Gamma$ function
 A better result was found using the well known reciprocal $\Gamma$ function
 formula:
 $$
  \frac{1}{\Gamma(z)} = z e^{yz} \prod_{k = 1}^{+ \infty}
  \left( 1 + \frac{z}{k} \right) e^{-z/k}
 $$
 which gives:
 $$
  \gamma = - \frac{1}{z} \ln \left( z \Gamma(z)
  \prod_{k = 1}^{+ \infty} \left( 1 + \frac{z}{k} \right) e^{-z/k} \right)
 $$
 The execution stops when there is no difference between two consecutive therms
 of the infinite product (it happens for $k = 456565794$, meaning that for this
 value of $k$, the term of the product is equal to 1). Different values of $z$
 were checked, with $z_{i+1} = z_i + 0.01$, ranging from 0 to 20 and the best
 result was found for $z = 9$. As can be seen in @tbl:3_results, it's only by
 chance, since all $|\gamma(z) - \gamma |$ are of the same order of magnitude.
 The best one is compared with the exact value of $\gamma$ in @tbl:third.
 ---------------------------------------------------------------
    z  $|\gamma(z) - \gamma |$      z   $|\gamma(z) - \gamma |$
 ----- ------------------------ ------  ------------------------
 1     \SI{9.712e-9}{}          8.95    \SI{9.770e-9}{}
 3     \SI{9.320e-9}{}          8.96    \SI{9.833e-9}{}
 5     \SI{9.239e-9}{}          8.97    \SI{9.622e-9}{}
 7     \SI{9.391e-9}{}          8.98    \SI{9.300e-9}{}
 9     \SI{8.482e-9}{}          8.99    \SI{9.059e-9}{}
 11    \SI{9.185e-9}{}          9.00    \SI{8.482e-9}{}
 13    \SI{9.758e-9}{}          9.01    \SI{9.564e-9}{}
 15    \SI{9.747e-9}{}          9.02    \SI{9.260e-9}{}
 17    \SI{9.971e-9}{}          9.03    \SI{9.264e-9}{}
 19    \SI{10.084e-9}{}         9.04    \SI{9.419e-9}{}
 ---------------------------------------------------------------
 Table: Differences between the obtained values of $\gamma$ and the exact
 one. {#tbl:3_results}
 --------- -----------------------
 true:	     0.57721\ 56649\ 01533
 approx:    0.57721\ 56564\ 18607
 diff:	     0.00000\ 00084\ 82925
 --------- -----------------------
 Table: Third method results for z = 9.00. {#tbl:third}
 This time, the convergence of the infinite product is fast enough to ensure the
 $8^{th}$ place.
 ### Fastest convergence formula
 The fastest known convergence belongs to the following formula:  
 (source: http://www.numberworld.org/y-cruncher/internals/formulas.html):
 $$
  \gamma = \frac{A(N)}{B(N)} -\frac{C(N)}{B^2(N)} - \ln(N)
 $$ {#eq:faster}
 with:
 \begin{align*}
  &A(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!} \cdot H(k)
          \with H(k) = \sum_{j=1}^{k} \frac{1}{j}                 \\
  &B(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!}            \\
  &C(N) = \frac{1}{4N} \sum_{k=0}^{2N}
          \frac{((2k)!)^3}{(k!)^4 \cdot (16k)^2k}         \\
 \end{align*}
 The series $A$ and $B$ are computed till there is no difference between two
 consecutive terms.  
 The number of desired correct decimals is given in input and $N$ is
 consequently computed through a formula given in the same article
 above-mentioned. Results are shown in @tbl:fourth.
 --------- ------------------------------
 true:	     0.57721\ 56649\ 01532\ 75452
 approx:    0.57721\ 56649\ 01532\ 86554
 diff:	     0.00000\ 00000\ 00000\ 11102
 --------- ------------------------------
 Table: Fourth method results. {#tbl:fourth}
 Due to roundoff errors, the best results is obtained for $N = 10$. Since up to
 15 places were correctly computed, an approximation of $\gamma$ better than the
 one reached with the definition in @eq:gamma was obtained.
 ### Arbitrary precision
 To overcome the issues related to the double representation, one can resort to a
 representation with arbitrary precision. In the GMP library (which stands for
 GNU Multiple Precision arithmetic), for example, real numbers can be
 approximated by a ratio of two integers (fraction) with arbitrary precision: 
 this means a check is performed on every operation and in case of an integer
 overflow, additional memory is requested and used to represent the larger result.
 Additionally, the library automatically switches to the optimal algorithm
 to compute an operation based on the size of the operands.
 The terms in @eq:faster can therefore be computed with arbitrarily large
 precision.  Thus, a program that computes the Euler-Mascheroni constant within
 a user controllable precision has been implemented. Unlike the previously
 mentioned programs, this one was more carefully optimized.
 The $A$ and $B$ series are computed up to an arbitrary limit $k_{\text{max}}$.
 Different values of $k_{\text{max}}$ were tested but, obviously, they all
 eventually reach a point where the approximation cannot guarantee the
 requested precision; the solution turned out to be to let $k_{\text{max}}$ depends on
 $N$.  Consequently, $k_{\text{max}}$ was chosen to be $5N$, after it has been
 verified to produce the correct digits up to 500 decimal places.
 The GMP library offers functions to perform some operations such as addition,
 multiplication, division, etc. however, the logarithm function is not
 implemented. Thus, most of the code carries out the $\ln(N)$ computation.
 First, it should be noted that the logarithm of only some special numbers can
 be computed with arbitrary precision, namely the ones of which a converging
 series is known. This forces $N$ to be rewritten in the following way:
 $$
  N = N_0 \cdot b^e \thus \ln(N) = \ln(N_0) + e \cdot \ln(b)
 $$
 Since a fast converging series for $\ln(2)$ is known $b = 2$ was chosen.  As
 well as for the scientific notation, in order to get the mantissa $1 \leqslant
 N_0 < 2$, the number of binary digits of $N$ must be computed (conveniently, a
 dedicated function `mpz_sizeinbase()` can be found in GMP). If the digits are $n$:
 $$
  e = b - 1 \thus N_0 = \frac{N}{2^{n - 1}}
 $$
 Then, by defining:
 $$
  N_0 = \frac{1 + y}{1 - y} \thus y = \frac{N_0 - 1}{N_0 + 1} < 1
 $$
 and the following series (which is convergent for $y < 1$) can therefore be
 used:
 $$
  \ln \left( \frac{1 + y}{1 - y} \right) =
  2 \sum_{k = 0}^{+ \infty} \frac{y^{2k + 1}}{2k + 1}
 $$
 But when to stop computing the series?  
 Given a partial sum $S_k$ of the series, it is possible to know when a digit is
 definitely correct, meaning that no matter how large $k$ can be,
 it will not affect that decimal place. The key lies in the following concept.
 Letting $S$ the value of the series:
 $$
  S_k + \frac{a_{k+1}}{1 - \frac{a_{k+1}}{a_k}} < S < S_k + a_k \frac{L}{1 -L}
 $$
 where $L$ is the limiting ratio of the series terms, which must be $< 1$ in
 order for it to converge (in this case, it is easy to prove that $L = y^2$).
 The width $\Delta S$ of the interval containing $S$, for a given $S_k$, gives
 the precision of the partial sum with respect to $S$. By imposing $\Delta S <
 1/10^D$ where $D$ is the correct decimal place required, the desired precision
 is obtained.  This is achieved by trials, checking at every step whether $\Delta
 S$ is less or greater than $1/10^D$.
 The same holds for $\ln(2)$, which is given by the following series:
 $$
  \log(2) = \sum_{k=1}^{+ \infty} \frac{1}{k \cdot 2^k}
 $$
 In this case the ratio is $L = 1/2$.  
--- a/notes/sections/3.md
+++ b/notes/sections/3.md
@ -0,0 +1,423 @@
 # Exercise 3 {#sec:3}
 ## Meson decay events generation
 A number of $N = 50'000$ points on the unit sphere, each representing a
 particle detection event, must be generated according to then angular
 probability distribution function $F_0$:
 \begin{align*}
  F_0 (\theta, \phi) = \frac{3}{4 \pi} \Bigg[ 
  \frac{1}{2} (1 - \alpha_0) + \frac{1}{2} (3\alpha_0 - 1) \cos^2(\theta)& \\
  - \beta_0 \sin^2(\theta) \cos(2 \phi)
  - \gamma_0 \sqrt{2} \sin(2 \theta) \cos(\phi)& \Bigg]
 \end{align*}
 where $\theta$ and $\phi$ are, respectively, the polar and azimuthal angles, and
 $$
  \alpha_0 = 0.65 \et \beta_0 = 0.06 \et \gamma_0 = -0.18
 $$
 To generate the points a *try and catch* method was employed:
 1. generate a point $(\theta , \phi)$ uniformly on a unit sphere
 2. generate a third value $Y$ uniformly in $[0, 1]$
 3. if @eq:requirement is satisfied save the point
 4. repeat from 1.
 $$
  Y < F_0(\theta, \phi)
 $$ {#eq:requirement}
 To increase the efficiency of the procedure, the $Y$ values were actually
 generated in $[0, 0.2]$, since $\max F_0 = 0.179$ for the given parameters
 (an other option would have been generating the numbers in the range $[0 ; 1]$,
 but all the points with $Y_{\theta, \phi} > 0.179$ would have been rejected with
 the same probability, namely 1).   
 While $\phi$ can simply be generated uniformly between 0 and $2 \pi$, for
 $\theta$ one has to be more careful: by generating evenly distributed numbers
 between 0 and $\pi$, the points turn out to cluster on the poles. The point
 $(\theta,\phi)$ is *not* uniform on the sphere but is uniform on a cylindrical
 projection of it. (The difference can be appreciated in @fig:compare).
 The proper distribution of $\theta$ is obtained by applying a transform
 to a variable $x$ with uniform distribution in $[0, 1)$,
 easily generated by the GSL function `gsl_rng_uniform()`.
 <div id="fig:compare">
  ![Uniformly distributed points with $\theta$ evenly distributed between
  0 and $\pi$.](images/histo-i-u.pdf){width=45%}
  ![Points uniformly distributed on a spherical
  surface.](images/histo-p-u.pdf){width=45%}
  ![Sample generated according to $F$ with $\theta$ evenly distributed between
  0 and $\pi$.](images/histo-i-F.pdf){width=45%}
  ![Sample generated according to $F$ with $\theta$ properly
  distributed.](images/histo-p-F.pdf){width=45%}
 Examples of samples. On the left, points with $\theta$ evenly distributed
 between 0 and $\pi$; on the right, points with $\theta$ properly distributed.
 </div>
 The transformation can be found by imposing the angular PDF to be a constant:
 \begin{align*}
  \frac{d^2 P}{d \Omega^2} = \text{const} = \frac{1}{4 \pi}
  \hspace{20pt} &\Longrightarrow \hspace{20pt}
  d^2P = \frac{1}{4 \pi} \, d \Omega^2 = \frac{1}{4 \pi} \,
  d \phi \sin(\theta) d \theta
  \\
  &\Longrightarrow \hspace{20pt}
  \frac{dP}{d \theta} = \int_0^{2 \pi} d \phi \, \frac{1}{4 \pi} \sin(\theta)=
  \frac{1}{4 \pi} \sin(\theta) 2 \pi = \frac{1}{2} \sin(\theta)
 \end{align*}
 \begin{align*}
  \theta = \theta (x)
  \hspace{20pt} &\Longrightarrow \hspace{20pt}
  \frac{dP}{d \theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d \theta} \right|
  = \left. \frac{dP}{dx} \middle/ \, \left| \frac{d \theta}{dx} \right| \right. 
  \\
  &\Longrightarrow \hspace{20pt}
  \frac{1}{2} \sin(\theta) = \left. 1 \middle/ \, \left|
  \frac{d \theta}{dx} \right| \right. 
 \end{align*}
 Since $\theta \in [0, \pi]$, then the absolute value symbol can be omitted:
 \begin{align*}
  \frac{d \theta}{dx} = \frac{2}{\sin(\theta)}
  \hspace{20pt} &\Longrightarrow \hspace{20pt}
  d \theta \sin(\theta) = 2 dx
  \\
  &\Longrightarrow \hspace{20pt}
  - \cos (\theta') |_{0}^{\theta} = 2[x(\theta) - x(0)] = 2(x - 0) = 2x
  \\
  &\Longrightarrow \hspace{20pt}
  - \cos(\theta) + 1 = 2x
  \\
  &\Longrightarrow \hspace{20pt}
  \theta = \text{acos} (1 -2x)
 \end{align*}
 Since 1 is not in $[0 , 1)$, $\theta = \pi$ can't be generated. Being it just
 a single point, the effect of this omission is negligible.
 \vspace{30pt}
 ## Parameter estimation
 The sample must now be used to estimate the parameters $\alpha_0$,
 $\beta_0$ and $\gamma_0$ of the angular distribution $F_0$.
 ### Maximum Likelihood method
 The Maximum Likelihood method (MLM) is based on the observation that the best
 estimate {$\bar{\alpha}$, $\bar{\beta}$, $\bar{\gamma}$} of the parameters
 {$\alpha$, $\beta$, $\gamma$} should maximize the Likelihood function $L$
 defined in @eq:Like0, where the index $i$ runs over the sample and $F$ is the
 function $F_0$ with free parameters $\alpha$, $\beta$ and $\gamma$:
 $$
  L = \prod_i F(\theta_i, \phi_i) = \prod_i F_i
 $$ {#eq:Like0}
 The reason is that the points were generated according to the probability
 distribution function $F$ with parameters $\alpha_0$, $\beta_0$ and $\gamma_0$,
 thus the probability $P$ of sampling this special sample is, by definition, the
 product of their probabilities $F_i$ and it must be maximum for {$\alpha$,
 $\beta$, $\gamma$} = {$\alpha_0$, $\beta_0$, $\gamma_0$}. Thus, by viewing $F_i$
 as a function of the parameters, by maximizing $P = L$ with respect
 to $\alpha$, $\beta$ and $\gamma$, a good estimate should be found.  
 Instead of actually maximising $L$, the function $-\ln(L)$
 was minimised, as minimisation methods as usually described in literature
 and $\ln$ since it simplifies the math:
 $$
  L = \prod_i F_i \thus - \ln(L) = - \sum_{i} \ln(F_i)
 $$ {#eq:Like}
 The problem of multidimensional minimisation was tackled using the GSL library
 `gsl_multimin.h`, which implements a variety of methods, including the
 conjugate gradient Fletcher-Reeves algorithm, used in the solution, which requires
 the implementation of $-\ln(L)$ and its gradient.
 To minimise a function $f$, given an initial guess point $x_0$, the gradient
 $\nabla f$ is used to find the initial direction $v_0$ (the steepest descent) along
 which a line minimisation is performed: the minimum occurs where the
 directional derivative along $v_0$ is zero:
 $$
  \frac{\partial f}{\partial v_0} = \nabla f \cdot v_0 = 0
 $$
 or, equivalently, at the point where the gradient is orthogonal to $v_0$.
 After this first step the following procedure is iterated:
  1. Find the steepest descent $v_n$.
  2. Compute the *conjugate* direction: $w_n = v_n + \beta_n w_{n-1}$
  3. Perform a line minimisation along $w_n$
  4. Update the position $x_{n+1} = x_n + \alpha_n w_n$
 where $\alpha_n$ is line minimum and $\beta_n$ is given by the Fletcher-Reeves
 formula:
 $$
  \beta = \frac{\|\nabla f(x_n)\|^2}{\|\nabla f(x_{n-1})\|^2}
 $$
 The accuracy of each line minimisation is controlled the parameter `tol`,
 meaning:
 $$
  w \cdot \nabla f < \text{tol} \, \|w\| \, \|\nabla f\|
 $$
 The minimisation is quite unstable and this forced the starting point to be
 taken very close to the solution, namely:
 $$
  \alpha_{sp} = 0.79 \et
  \beta_{sp} = 0.02 \et
  \gamma_{sp} = -0.17
 $$
 The overall minimisation ends when the gradient module is smaller than $10^{-4}$.
 The program took 25 of the above iterations to reach the result shown in @eq:Like_res.
 The Cramér-Rao bound states that the covariance matrix of parameters estimated
 by MLM is greater than the of the inverse of the Hessian matrix of $-\log(L)$
 at the minimum. Thus, the Hessian matrix was computed
 analytically and inverted by a Cholesky decomposition, which states that
 every positive definite symmetric square matrix $H$ can be written as the
 product of a lower triangular matrix $L$ and its transpose $L^T$:
 $$
  H = L \cdot L^T
 $$
 The Hessian matrix is, indeed, both symmetric and positive definite, when
 computed in a minimum. Once decomposed the inverse is given by
 $$
  H^{-1} = (L \cdot L^T)^{-1} = (L^{-1})^T \cdot L^{-1}
 $$
 The inverse of $L$ is easily computed since it's a triangular matrix.
 The GSL library provides two functions `gsl_linalg_cholesky_decomp()` and
 `gsl_linalg_cholesky_invert()` to decompose and invert in-place a matrix.
 The result is shown below:
 $$
  \bar{\alpha_L} = 0.6541 \et
  \bar{\beta_L}  = 0.06125 \et
  \bar{\gamma_L} = -0.1763
 $$ {#eq:Like_res}
 $$
 \text{cov}_L = \begin{pmatrix}
 9.225 \cdot 10^{-6} & -1.785 \cdot 10^{-6} & 2.41  \cdot 10^{-7} \\
 -1.785 \cdot 10^{-6} &  4.413 \cdot 10^{-6} & 1.58  \cdot 10^{-6} \\
  2.41 \cdot 10^{-7} &  1.58  \cdot 10^{-6} & 2.602 \cdot 10^{-6}
 \end{pmatrix}
 $$ {#eq:Like_cov}
 Hence:
 \begin{align*}
  \alpha_L &=  0.654  \pm 0.003  \\
  \beta_L  &=  0.0613 \pm 0.0021 \\
  \gamma_L &= -0.1763 \pm 0.0016
 \end{align*}
 ### Least squares estimation
 Another method that can be used to estimate {$\alpha$, $\beta$, $\gamma$} are
 the least squares (LSQ), which is a multidimensional minimisation
 specialised to the case of sum of squared functions called residuals.  
 The residuals can be anything in general but are usually interpreted as the
 difference between an expected (fit) and an observed value. The least squares
 then correspond to the expected values that best fit the observations.
 To apply the LSQ method, the data must be properly binned, meaning that each
 bin should contain a significant number of events (say greater than four or
 five): in this case, 30 bins for $\theta$ and 60 for $\phi$ turned out
 to be satisfactory. The expected values were given as the product of $N$, $F$
 computed in the geometric center of the bin and the solid angle enclosed by the
 bin itself, namely:
 $$
  E_{\theta, \phi} = N F(\theta, \phi) \, \Delta \theta \, \Delta \phi \,
  \sin{\theta}
 $$
 Once the data are binned, the number of events in each bin plays the role of the
 observed value, while the expected one is given by the probability of finding
 an event in that bin multiplied by the total number of points, $N$. Then, the
 $\chi^2$, defined as follow, must be minimized with respect to the three
 parameters to be estimated:
 $$
  \chi^2 = \sum_i f_i^2 = \|\vec f\|^2 \with f_i = \frac{O_i - E_i}{\sqrt{E_i}}
 $$
 where the sum runs over the bins, $O_i$ are the observed values and $E_i$ the
 expected ones. Besides the more generic `gsl_multimin.h` library, GSL provides a
 special one for least square minimisation, called `gsl_multifit_nlinear.h`. A
 trust region method was used to minimize the $\chi^2$.  
 In such methods, the $\chi^2$, its gradient and its Hessian matrix are computed
 in a series of points in the parameters space till the gradient and the matrix
 reach given proper values, meaning that the minimum has been well approached,
 where "well" is related to that values.  
 If {$x_1 \dots x_p$} are the parameters which must be estimated, first $\chi^2$
 is computed in the starting point $\vec{x_k}$, then its value in a point
 $\vec{x}_k + \vec{\delta}$ is modelled by a function $m_k (\vec{\delta})$ which
 is the second order Taylor expansion around the point $\vec{x}_k$, namely:
 $$
  \chi^2 (\vec{x}_k + \vec{\delta}) \sim m_k (\vec{\delta}) =
  \chi^2 (\vec{x}_k) + \nabla_k^T  \vec{\delta} + \frac{1}{2}
  \vec{\delta}^T H_k \vec{\delta}
 $$
 where $\nabla_k$ and $H_k$ are the gradient and the Hessian matrix of $\chi^2$
 in the point $\vec{x}_k$ and the superscript $T$ stands for the transpose. The
 initial problem is reduced the so called trust-region subproblem (TRS), which
 is the minimisation of $m_k(\vec{\delta})$ in a region where
 $m_k(\vec{\delta})$ should be a good approximation of $\chi^2 (\vec{x}_k
 + \vec{\delta})$, given by the condition:
 $$
  \|D_k \vec\delta\| < \Delta_k
 $$
 If $D_k = I$ the region is a hypersphere of radius $\Delta_k$
 centered at $\vec{x}_k$ but can be deformed according to $D_k$. This is
 necessary in the case one or more parameters have scale very different scales.
 Given an initial point $x_0$, radius $\Delta_0$, scale matrix $D_0$
 and step $\vec\delta_0$ the LSQ algorithm consist in the following iteration:
  1. Construct the function $m_k(\vec\delta)$.
  2. Solve the TRS for step $\vec\delta_k$.
  3. Check whether the solution actually decreases $\chi^2$
      1. If positive increase the trust region radius $\Delta_{k+1} =
         \alpha\Delta_k$ and shift the position $\vec x_{k+1} = \vec x_k +
         \vec \delta_k$.
      2. If negative decrease the radius $\Delta_{k+1} = \Delta_k/\beta$.
  4. Repeat
 This method is advantageous compared to a general minimisation because
 the TRS usually amounts to solving a linear system. There are many algorithms
 to solve the problem, in this case the Levenberg-Marquardt was used. It is based
 on a theorem that proves the existence of a number $\mu_k$ such that
 \begin{align*}
   \Delta_k \mu_k = \|D_k \vec\delta_k\| &&
    (H_k + \mu_k D_k^TD_k) \vec\delta_k = -\nabla_k
 \end{align*}
 Using the approximation[^1] $H\approx J^TJ$, obtained by computing the Hessian
 of the first-order Taylor expansion of $\chi^2$, $\vec\delta_k$ can
 be found by solving the system
 $$
 \begin{cases}
  J_k \vec{\delta_k} + \vec{f_k} = 0 \\
  \sqrt{\mu_k} D_k \vec{\delta_k} = 0
 \end{cases}
 $$
 [^1]: Here $J_{ij} = \partial f_i/\partial x_j$ is the Jacobian matrix of the
 vector-valued function $\vec f(\vec x)$.
 The algorithm terminates if on of the following condition are satisfied:
  1. $|\delta_i| \leq \text{xtol} (|x_i| + \text{xtol})$ for every component
     of $\vec \delta$.
  2. $\max_i |\nabla_i \cdot \max(x_i, 1)| \leq \text{gtol}\cdot \max(\chi^2, 1)$
  3. $\|\vec f(\vec x+ \vec\delta) - \vec f(\vec x)|| \leq \text{ftol}
     \cdot \max(\|\vec f(\vec x)\|, 1)$
 These tolerance have all been set to \SI{1e-8}{}. The program converged in 7
 iterations giving the results below. The covariance of the parameters can again
 been estimated through the Hessian matrix at the minimum. The following results
 were obtained:
 $$
  \bar{\alpha_{\chi}} = 0.6537 \et
  \bar{\beta_{\chi}} = 0.05972 \et
  \bar{\gamma_{\chi}} = -0.1736
 $$ {#eq:lsq-res}
 $$
 \text{cov}_{\chi} = \begin{pmatrix}
 9.244 \cdot 10^{-6} & -1.66  \cdot 10^{-6} & 2.383 \cdot 10^{-7} \\
 -1.66  \cdot 10^{-6} &  4.495 \cdot 10^{-6} & 1.505 \cdot 10^{-6} \\
 2.383 \cdot 10^{-7} &  1.505 \cdot 10^{-6} & 2.681 \cdot 10^{-6}
 \end{pmatrix}
 $$
 Hence:
 \begin{align*}
  &\alpha_{\chi} = 0.654   \pm 0.003   \\
  &\beta_{\chi}  = 0.0597  \pm 0.0021  \\
  &\gamma_{\chi} = -0.1736 \pm 0.0016
 \end{align*}
 ### Results compatibility
 In order to compare the values $x_L$ and $x_{\chi}$ obtained from both methods
 with the correct ones (namely {$\alpha_0$, $\beta_0$, $\gamma_0$}), the
 following compatibility t-test was applied:
 $$
  p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
  t = \frac{x_i - x_0}{\sqrt{\sigma_{x_i}^2 + \sigma_{x_0}^2}}
  = \frac{x_i - x_0}{\sigma_{x_i}}
 $$
 where $i$ stands either for the MLM or LSQ parameters and $\sigma_{x_i}$ is the
 uncertainty of the value $x_i$. At 95% confidence level, the values are
 compatible if $p > 0.05$.
 Likelihood results:
 ----------------------------
 par         $p$-value
 ------------ ---------------
 $\alpha_L$  0.18
 $\beta_L$   0.55
 $\gamma_L$  0.023
 ----------------------------
 Table: Likelihood results compatibility.
 $\chi^2$ results:
 ---------------------------------
 par              $p$-value
 ----------------- ---------------
 $\alpha_{\chi}$  0.22
 $\beta_{\chi}$   0.89
 $\gamma_{\chi}$  0.0001
 ---------------------------------
 Table: $\chi^2$ results compatibility.
 It can be concluded that only the third parameter, $\gamma$ is not compatible
 with the expected one in both cases. An in-depth analysis of the algebraic
 arrangement of $F$ would be required in order to justify this outcome.
 \vspace{30pt}
 ## Isotropic hypothesis testing
 What if the probability distribution function was isotropic? Could it be
 compatible with the found results?  If $F$ was isotropic, then $\alpha_I$,
 $\beta_I$ and $\gamma_I$ would be $1/3$, 0, and 0 respectively, since this
 gives $F_I = 1/{4 \pi}$. The t-test gives a $p$-value approximately zero for all
 the three parameters, meaning that there is no compatibility at all with this
 hypothesis.
--- a/notes/sections/4.md
+++ b/notes/sections/4.md
@ -0,0 +1,269 @@
 # Exercise 4
 **Kinematic dip pdf derivation**
 Consider a great number of non-interacting particles, each of which with a
 random momentum $\vec{p}$ with module between 0 and $p_{\text{max}}$ randomly
 angled with respect to a coordinate system {$\hat{x}$, $\hat{y}$, $\hat{z}$}.
 Once the polar angle $\theta$ is defined, the momentum vertical and horizontal
 components of a particle, which will be referred as $p_v$ and $p_h$, are the
 ones shown in @fig:components.  
 If $\theta$ is evenly distributed on the sphere and the same holds for the
 module $p$, which distribution will the average value of the absolute value of
 $p_v$ follow as a function of $p_h$?
 \begin{figure}
 \hypertarget{fig:components}{%
 \centering
 \begin{tikzpicture}
  % Axes
  \draw [thick, ->] (5,2) -- (5,8);
  \draw [thick, ->] (5,2) -- (2,1);
  \draw [thick, ->] (5,2) -- (8,1);
  \node at (1.5,0.9) {$x$};
  \node at (8.5,0.9) {$y$};
  \node at (5,8.4)   {$z$};
  % Momentum
  \definecolor{cyclamen}{RGB}{146, 24, 43}
  \draw [ultra thick, ->, cyclamen] (5,2)     -- (3.8,6);
  \draw [thick, dashed, cyclamen]   (3.8,0.8) -- (3.8,6);
  \draw [thick, dashed, cyclamen]   (5,7.2)   -- (3.8,6);
  \draw [ultra thick, ->, pink]     (5,2)     -- (5,7.2);
  \draw [ultra thick, ->, pink]     (5,2)     -- (3.8,0.8);
  \node at (4.8,1.1) {$\vec{p_h}$};
  \node at (5.5,6.6) {$\vec{p_v}$};
  \node at (3.3,5.5) {$\vec{p}$};
  % Angle
  \draw [thick, cyclamen] (4.4,4) to [out=80,in=210] (5,5);
  \node at (4.7,4.2) {$\theta$};
 \end{tikzpicture}
 \caption{Momentum components.}\label{fig:components}
 }
 \end{figure}
 The aim is to compute $\langle |p_v| \rangle (p_h) dp_h$.
 Consider all the points with $p_h \in [p_h ; p_h - dp_h]$: the values of
 $p_v$ that these points can assume depend on $\theta$ and the total momentum
 length $p$.
 $$
  \begin{cases}
  p_h = p \sin{\theta} \\ p_v = p \cos{\theta}
  \end{cases}
  \thus |p_v| = p |\cos{\theta}| = p_h \frac{|\cos{\theta}|}{\sin{\theta}}
  = |p_v| (\theta)
 $$
 It looks like the dependence on $p$ has disappeared, but obviously it has
 not. In fact, it lies beneath the limits that one has to put on the possible
 values of $\theta$. For the sake of clarity, take a look at @fig:sphere (the
 system is rotation-invariant, hence it can be drown at a fixed azimuthal angle).
 \begin{figure}
 \hypertarget{fig:sphere}{%
 \centering
 \begin{tikzpicture}
  % p_h slice
  \definecolor{cyclamen}{RGB}{146, 24, 43}
  \filldraw [cyclamen!15!white] (1.5,-3.15) -- (1.5,3.15) -- (1.75,3.05)
                             -- (2,2.85) -- (2,-2.85) -- (1.75,-3.05)
                             -- (1.5,-3.15);
  \draw [cyclamen] (1.5,-3.15) -- (1.5,3.15);
  \draw [cyclamen] (2,-2.9)    -- (2,2.9);
  \node [cyclamen, left] at (1.5,-0.3) {$p_h$};
  \node [cyclamen, right] at (2,-0.3) {$p_h + dp_h$};
  % Axes
  \draw [thick, ->] (0,-4) -- (0,4);
  \draw [thick, ->] (0,0)  -- (4,0);
  \node at (0.3,3.8) {$z$};
  \node at (4,0.3) {$hd$};
  % p_max semicircle
  \draw [thick, cyclamen] (0,-3.5) to [out=0, in=-90] (3.5,0);
  \draw [thick, cyclamen] (0,3.5)  to [out=0, in=90]  (3.5,0);
  \node [cyclamen, left] at (-0.2,3.5)  {$p_{\text{max}}$};
  \node [cyclamen, left] at (-0.2,-3.5) {$-p_{\text{max}}$};
  % Angles
  \draw [thick, cyclamen, ->] (0,1.5) to [out=0, in=140] (0.55,1.2);
  \node [cyclamen] at (0.4,2) {$\theta$};
  \draw [thick, cyclamen] (0,0) -- (1.5,3.15);
  \node [cyclamen, above right] at (1.5,3.15) {$\theta_x$};
  \draw [thick, cyclamen] (0,0) -- (1.5,-3.15);
  \node [cyclamen, below right] at (1.5,-3.15) {$\theta_y$};
  % Vectors
  \draw [ultra thick, cyclamen, ->] (0,0) -- (1.7,2.2);
  \draw [ultra thick, cyclamen, ->] (0,0) -- (1.9,0.6);
  \draw [ultra thick, cyclamen, ->] (0,0) -- (1.6,-2);
 \end{tikzpicture}
 \caption{Momentum space at fixed azimuthal angle ("$hd$" stands for
 "horizontal direction"). Some vectors with $p_h \in [p_h, p_h +dp_h]$
 are evidenced.}\label{fig:sphere}
 }
 \end{figure}
 As can be seen, $\theta_x$ and $\theta_y$ are the minimum and maximum tilts
 angles of these vectors respectively, because if a point had $p_h \in [p_h; p_h
 + dp_h]$ and $\theta < \theta_x$ or $\theta > \theta_y$, it would have $p >
 p_{\text{max}}$. Therefore their values are easily computed as follow:
 $$
  p_h = p_{\text{max}} \sin{\theta_x} = p_{\text{max}} \sin{\theta_y}
  \thus \sin{\theta_x} = \sin{\theta_y} = \frac{p_h}{p_{\text{max}}}
 $$
 Since the average value of a quantity is computed by integrating it over all the
 possible quantities it depends on weighted on their probability, one gets:
 $$
  \langle |p_v| \rangle (p_h) dp_h = \int_{\theta_x}^{\theta_y} 
  d\theta P(\theta) \cdot  P(p) \, dp \cdot |p_v| (\theta)
 $$
 where $d\theta P(\theta)$ is the probability of generating a point with $\theta
 \in [\theta; \theta + d\theta]$ and $P(p) \, dp$ is the probability of
 generating a point with $\vec{p}$ in the pink region in @fig:sphere, given a
 fixed $\theta$.  
 The easiest to deduce is $P(p) \, dp$: since $p$ is evenly distributed, it
 follows that:
 $$
  P(p) \, dp = \frac{1}{p_{\text{max}}} dp
 $$
 with:
 $$
  dp = p(p_h + dp_h) - p(p_h)
     = \frac{p_h + dp_h}{\sin{\theta}} - \frac{p_h}{\sin{\theta}}
     = \frac{dp_h}{\sin{\theta}}
 $$
 hence
 $$
  P(p) \, dp = \frac{1}{p_{\text{max}}} \cdot \frac{1}{\sin{\theta}} \, dp_h
 $$
 For $d\theta P(\theta)$, instead, one has to do the same considerations done
 in @sec:3, from which:
 $$
  P(\theta) d\theta = \frac{1}{2} \sin{\theta} d\theta
 $$
 Ultimately, having found all the pieces, the integral must be computed:
 \begin{align*}
  \langle |p_v| \rangle (p_h) dp_h &= \int_{\theta_x}^{\theta_y} 
  d\theta \frac{1}{2} \sin{\theta} \cdot
  \frac{1}{p_{\text{max}}} \frac{1}{\sin{\theta}} \, dp_h \cdot
  p_h \frac{|\cos{\theta}|}{\sin{\theta}}
  \\
  &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \int_{\theta_x}^{\theta_y}
  d\theta \frac{|\cos{\theta}|}{\sin{\theta}} 
  \\
  &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \cdot \mathscr{O}
 \end{align*}
 Then, with a bit of math:
 \begin{align*}
  \mathscr{O} &= \int_{\theta_x}^{\theta_y} d\theta
  \frac{|\cos{\theta}|}{\sin{\theta}}
  \\
  &= \int_{\theta_x}^{\frac{\pi}{2}} d\theta \frac{\cos{\theta}}{\sin{\theta}}
  - \int_{\frac{\pi}{2}}^{\theta_y} d\theta \frac{\cos{\theta}}{\sin{\theta}}
  \\
  &= \left[ \ln{(\sin{\theta})} \right]_{\theta_x}^{\frac{\pi}{2}}
  - \left[ \ln{(\sin{\theta})} \right]_{\frac{\pi}{2}}^{\theta_y}
  \\
  &= \ln{(1)} -\ln{ \left( \frac{p_h}{p_{\text{max}}} \right) }
  - \ln{ \left( \frac{p_h}{p_{\text{max}}} \right) } + \ln{(1)}
  \\
  &= 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
 \end{align*}
 \newpage
 Hence, in conclusion:
 \begin{align*}
  \langle |p_v| \rangle (p_h) dp_h &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}}
  \cdot 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
  \\
  &= \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
  \frac{p_h}{p_{\text{max}}} dp_h
 \end{align*}
 Namely:
 \begin{figure}
 \hypertarget{fig:plot}{%
 \centering
 \begin{tikzpicture}
  \definecolor{cyclamen}{RGB}{146, 24, 43}
  % Axis
  \draw [thick, <->] (0,5) -- (0,0) -- (11,0);
  \node [below right] at (11,0) {$p_h$};
  \node [above left] at (0,5) {$\langle |p_v| \rangle$};
  % Plot
  \draw [domain=0.001:10, smooth, variable=\x,
         cyclamen, ultra thick] plot ({\x},{12*ln(10/\x)*\x/10});
  \node [cyclamen, below] at (10,0) {$p_{\text{max}}$};
 \end{tikzpicture}
 \caption{Plot of the expected distribution.}\label{fig:plot}
 }
 \end{figure}
 **The code**
 The same distribution should be found by generating and binning points in a
 proper way.  
 A number of $N = 50'000$ points were generated as a couple of values ($p$,
 $\theta$), with $p$ evenly distrinuted between 0 and $p_{\text{max}}$ and
 $\theta$ given by the same procedure described in @sec:3, namely:
 $$
  \theta = \text{acos}(1 - 2x)
 $$
 with $x$ uniformely distributed between 0 and 1.  
 The data binning turned out to be a bit challenging. Once $p$ was sorted and
 $p_h$ was computed, the bin in which the latter goes in must be found. If $n$ is
 the number of bins in which the range [0, $p_{\text{max}}$] is willing to be
 divided into, then the width $w$ of each bin is given by:
 $$
  w = \frac{p_{\text{max}}}{n}
 $$
 and the $j^{th}$ bin in which $p_h$ goes in is:
 $$
  j = \text{floor} \left( \frac{p_h}{w} \right)
 $$
 where 'floor' is the function which gives the upper integer lesser than its
 argument and the bins are counted starting from zero.  
 Then, a vector in which the $j^{\text{th}}$ entry contains the sum $S_j$ of all
 the $|p_v|$s relative to each $p_h$ fallen into the $j^{\text{th}}$ bin and the
 number num$_j$ of entries in the $j^{\text{th}}$ bin was reiteratively updated.
 At the end, the average value of $|p_v|_j$ was computed as $S_j /
 \text{num}_j$.  
 For the sake of clarity, for each sorted couple, it works like this:
  - the couple $[p; \theta]$ is generated;
  - $p_h$ and $p_v$ are computed;
  - the $j^{\text{th}}$ bin in which $p_h$ goes in is computed;
  - num$_j$ is increased by 1;
  - $S_j$ (which is zero at the beginning of everything) is increased by a factor
    $|p_v|$.
 At the end, $\langle |p_h| \rangle_j$ = $\langle |p_h| \rangle (p_h)$ where:
 $$
  p_h = j \cdot w + \frac{w}{2} = w \left( 1 + \frac{1}{2} \right)
 $$
 The following result was obtained:
 ![Histogram of the obtained distribution.](images/dip.pdf)
--- a/notes/sections/5.md
+++ b/notes/sections/5.md
@ -0,0 +1,113 @@
 # Exercize 5
 **Numerically compute an integral value via Monte Carlo approaches**
 The integral to be evaluated is the following:
 $$
  I = \int\limits_0^1 dx \, e(x)
 $$
 whose exact value is 1.7182818285...  
 The three most popular MC methods where applied: plain Monte Carlo, Miser and
 Vegas (besides this popularity fact, these three method were chosen for being
 the only ones implemented in the GSL library).
 ## Plain Monte Carlo
 When the integral $I$ over a volume $V$ of a function $f$ must be evaluated in
 a $n-$dimensional space, the simplest MC method approach is to sample $N$
 points $x_i$ evenly distributed in $V$ and approx $I$ as:
 $$
  I \sim I_N = \frac{V}{N} \sum_{i=1}^N f(x_i) = V \cdot \langle f \rangle
 $$
 with $I_N \rightarrow I$ for $N \rightarrow + \infty$ for the law of large
 numbers. Hence, the variance can be merely extimated as:
 $$
  \sigma^2 =
  \frac{1}{N-1} \sum_{i = 1}^N \left( f(x_i) - \langle f \rangle \right)^2
 $$
 thus, the error on $I_N$ decreases as $1/\sqrt{N}$. Unlike in deterministic
 methods, the estimate of the error is not a strict error bound: random sampling
 may not uncover all the important features of the integrand that can result in
 an underestimate of the error.  
 Since the proximity of $I_N$ to $I$ is related to $N$, the accuracy of the
 method is determined by the number of function calls when implemented (proof
 in @tbl:MC).
 -----------------------------------------------------------------
           500'000 calls     5'000'000 calls    50'000'000 calls
 --------- ----------------- ------------------ ------------------
 result     1.7166435813      1.7181231109       1.7183387184
 $\sigma$   0.0006955691      0.0002200309       0.0000695809
 -----------------------------------------------------------------
 Table: MC results and errors with different numbers of function
 calls. {#tbl:MC}
 ## Miser
 The MISER algorithm is based on recursive stratified sampling.
 On each recursion step the integral and the error are estimated using a plain
 Monte Carlo algorithm. If the error estimate is larger than the required
 accuracy, the integration volume is divided into sub-volumes and the procedure
 is recursively applied to sub-volumes.  
 This technique aims to reduce the overall integration error by concentrating
 integration points in the regions of highest variance.  
 The idea of stratified sampling begins with the observation that for two
 disjoint regions $a$ and $b$ with Monte Carlo estimates of the integral
 $E_a (f)$ and $E_b (f)$ and variances $\sigma_a^2 (f)$ and $\sigma_b^2 (f)$,
 the variance $V (f)$ of the combined estimate $E (f)$:
 $$
  E (f)= \frac {1}{2} \left( E_a (f) + E_b (f) \right)
 $$
 is given by,
 $$
  V(f) = \frac{\sigma_a^2 (f)}{4 N_a} + \frac{\sigma_b^2 (f)}{4 N_b}
 $$
 It can be shown that this variance is minimized by distributing the points such that,
 $$
  \frac{N_a}{N_a + N_b} = \frac{\sigma_a}{\sigma_a + \sigma_b}
                          \cdot \frac{N_a}{N_a + N_b}
                        = \frac{\sigma_a}{\sigma_a + \sigma b}
 $$
 Hence the smallest error estimate is obtained by allocating sample points in proportion
 to the standard deviation of the function in each sub-region.
 ---
 ---------------------------------------------------------
       calls     plain MC        Miser          Vegas    
 ------------ -------------- -------------- --------------
    500'000   1.7166435813   1.7182850738   1.7182818354 
  5'000'000   1.7181231109   1.7182819143   1.7182818289 
 50'000'000   1.7183387184   1.7182818221   1.7182818285 
 ---------------------------------------------------------
 Table: Results of the three methods. {#tbl:results}
 ---------------------------------------------------------
       calls     plain MC        Miser          Vegas    
 ------------ -------------- -------------- --------------
    500'000   0.0006955691   0.0000021829   0.0000000137
  5'000'000   0.0002200309   0.0000001024   0.0000000004
 50'000'000   0.0000695809   0.0000000049   0.0000000000
 ---------------------------------------------------------
 Table: $\sigma$s of the three methods. {#tbl:sigmas}
--- a/notes/sections/6.md
+++ b/notes/sections/6.md
@ -0,0 +1,85 @@
 # Exercise 6
 **Generating points according to Fraunhofer diffraction**
 The diffraction of a plane wave thorough a round slit must be simulated by
 generating $N =$ 50'000 points according to the intensity distribution
 $I(\theta)$ on a screen at a great distance $L$ from the slit iself:
 $$
  I(\theta) = \frac{E^2}{2} \left( \frac{2 \pi a^2 \cos{\theta}}{L}
  \frac{J_1(x)}{x} \right)^2 \with x = k a \sin{\theta}
 $$
 where:
 - $E$ is the electric field amplitude, default set $E = \SI{1e4}{V/m}$;
 - $a$ is the radius of the slit aperture, default set $a = \SI{0.01}{m}$;
 - $\theta$ is the angle specified in @fig:fenditure;
 - $J_1$ is the Bessel function of first order;
 - $k$ is the wavenumber, default set $k = \SI{1e-4}{m^{-1}}$;
 - $L$ default set $L = \SI{1}{m}$.
 \begin{figure}
 \hypertarget{fig:fenditure}{%
 \centering
 \begin{tikzpicture}
  \definecolor{cyclamen}{RGB}{146, 24, 43}
  % Walls
  \draw [thick] (-1,3)  -- (1,3)  -- (1,0.3)  -- (1.2,0.3)  -- (1.2,3)
                       -- (9,3);
  \draw [thick] (-1,-3) -- (1,-3) -- (1,-0.3) -- (1.2,-0.3) -- (1.2,-3)
                       -- (9,-3);
  \draw [thick] (10,3) -- (9.8,3) -- (9.8,-3) -- (10,-3);
  % Lines
  \draw [thick, gray] (0.7,0.3)  -- (0.5,0.3);
  \draw [thick, gray] (0.7,-0.3) -- (0.5,-0.3);
  \draw [thick, gray] (0.6,0.3)  -- (0.6,-0.3);
  \draw [thick, gray] (1.2,0)    -- (9.8,0);
  \draw [thick, gray] (1.2,-0.1)  -- (1.2,0.1);
  \draw [thick, gray] (9.8,-0.1)  -- (9.8,0.1);
  \draw [thick, cyclamen] (1.2,0)  -- (9.8,-2);
  \draw [thick, cyclamen] (7,0) to [out=-90, in=50] (6.6,-1.23);
  % Nodes
  \node at (0,0) {$2a$};
  \node at (5.5,0.4) {$L$};
  \node [cyclamen] at (5.5,-0.4) {$\theta$};
  \node [rotate=-90] at (10.2,0) {screen};
 \end{tikzpicture}
 \caption{Fraunhofer diffraction.}\label{fig:fenditure}
 }
 \end{figure}
 Once again, $\theta$, which must be evenly distributed on half sphere, can be
 generated only as a function of a variable $x$ uniformely distributed between
 0 and 1. Therefore:
 \begin{align*}
  \frac{d^2 P}{d\omega^2} = const = \frac{1}{2 \pi}
  &\thus d^2 P = \frac{1}{2 \pi} d\omega^2 =
  \frac{1}{2 \pi} d\phi \sin{\theta} d\theta \\
  &\thus \frac{dP}{d\theta} = \int_0^{2 \pi} d\phi \frac{1}{2 \pi} \sin{\theta}
  = \frac{1}{2 \pi} \sin{\theta} \, 2 \pi = \sin{\theta}
 \end{align*}
 \begin{align*}
  \theta = \theta (x) &\thus
  \frac{dP}{d\theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d\theta} \right|
  = \left. \frac{dP}{dx} \middle/ \, \left| \frac{d\theta}{dx} \right| \right. 
  \\
  &\thus \sin{\theta} = \left. 1 \middle/ \, \left|
  \frac{d\theta}{dx} \right| \right. 
 \end{align*}
 Since $\theta \in [0, \pi/2]$, then the absolute value symbol can be omitted:
 \begin{align*}
  \frac{d\theta}{dx} = \frac{1}{\sin{\theta}}
  &\thus d\theta \sin(\theta) = dx
  \\
  &\thus - \cos (\theta') |_{0}^{\theta} = x(\theta) - x(0) = x - 0 = x
  \\
  &\thus - \cos(\theta) + 1 =x
  \\
  &\thus \theta = \text{acos} (1 -x)
 \end{align*}
--- a/shell.nix
+++ b/shell.nix
@ -0,0 +1,10 @@
 with import <nixpkgs> { };
 stdenv.mkDerivation {
  name = "jeypsi";
  GMP_PATH = "${gmp}/lib";
  buildInputs = with pkgs; [ gdb gsl gmp pkgconfig ];
  shellHook = "exec fish";
 }