commit cf27610d537e0a198c973b254dff836aa317af53
Author: rnhmjoj <rnhmjoj@inventati.org>
Date:   Fri Mar 6 02:24:32 2020 +0100

    initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cca1883
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+bin/
+*.pdf
+.stignore
+todo
diff --git a/ex-1/main.c b/ex-1/main.c
new file mode 100644
index 0000000..4341cf2
--- /dev/null
+++ b/ex-1/main.c
@@ -0,0 +1,365 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <gsl/gsl_min.h>
+#include <gsl/gsl_sum.h>
+#include <gsl/gsl_roots.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_histogram.h>
+#include <gsl/gsl_integration.h>
+
+
+/* Function that compare doubles for sorting:
+ * x > y  ⇒ 1
+ * x == y ⇒ 0
+ * x < y  ⇒ -1
+ */
+int cmp_double (const void *xp, const void *yp) {
+  double x = *(double*)xp,
+         y = *(double*)yp;
+  return x > y ? 1 : -1;
+}
+
+
+/* This is a wrapper needed by `landau_cdf` because
+ * the numerical integration expects a function
+ * with parameters.
+ */
+double landau_pdf(double x, void* params) {
+  return gsl_ran_landau_pdf(x);
+}
+
+
+/* The cumulative function of the Landau distribution
+ * calculated by numerical integration.
+ */
+double landau_cdf(double x, void* params) {
+  // create the integrand
+  gsl_function pdf;
+  pdf.function = &landau_pdf;
+  pdf.params   = NULL;
+
+  // set up the integration
+  double res, err;
+  size_t iter = 500;
+  gsl_integration_workspace* w =
+    gsl_integration_workspace_alloc(iter);
+
+  // clip values too small
+  if (x < -200) x = -200;
+
+  // We integrate the pdf in [x, +∞) instead
+  // of (-∞, x] to avoid a singularity and
+  // then return 1 - P.
+  gsl_integration_qagiu(
+    &pdf,        // integrand
+    x,           // lower bound
+    1e-10, 1e-6, // abs and rel error
+    iter,        // max iteration
+    w,           // "workspace"
+    &res, &err); // result, abs error
+
+  // free the memory
+  gsl_integration_workspace_free(w);
+
+  return 1 - res;
+}
+
+
+/* Another wrapper: this time it is needed by
+ * `landau_qdf` to solve the equation:
+ * `landau_cdf(x) = p0` for x
+ */
+double landau_cdf_root(double x, void* params) {
+  double p0 = *(double*)params;
+  return p0 - landau_cdf(x, NULL);
+}
+
+
+/* The quantile function (inverse CDF) of the Landau
+ * distribution calculated by numerical root method.
+ */
+double landau_qdf(double p0) {
+  // create function
+  gsl_function cdf;
+  cdf.function = &landau_cdf_root;
+  cdf.params   = &p0;
+
+  // use the Brent method
+  gsl_root_fsolver* s =
+    gsl_root_fsolver_alloc(gsl_root_fsolver_brent);
+
+  // search interval
+  double low = -1000,   // lower value
+         upp =  100000; // upper value
+  gsl_root_fsolver_set(s, &cdf, low, upp);
+
+  // iterative search
+  size_t iter = 1000; // max iteration
+  int stat = GSL_CONTINUE;
+  for (size_t i=0; stat==GSL_CONTINUE && i<iter; i++) {
+    stat = gsl_root_fsolver_iterate(s);
+    low  = gsl_root_fsolver_x_lower(s);
+    upp  = gsl_root_fsolver_x_upper(s);
+    stat = gsl_root_test_interval(low, upp, 0, 0.01);
+  }
+
+  return gsl_root_fsolver_root(s);
+}
+
+
+/* Kolmogorov distribution CDF
+ * for sample size n and statistic D
+ */
+double kolmogorov_cdf(double D, int n) {
+  double x = sqrt(n) * D;
+
+  // trick to reduce estimate error
+  x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
+
+  // calculate the first n_terms of the series
+  // Σ_k=1 exp(-(2k - 1)²π²/8x²)
+  size_t n_terms = 30;
+  double *terms = calloc(n_terms, sizeof(double));
+  for (size_t k=1; k<n_terms; k++) {
+    terms[k] = exp(-pow((2*(double)k - 1)*M_PI/x, 2) / 8);
+  }
+
+  // do a transform to accelerate the convergence
+  double sum, abserr;
+  gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
+  gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
+
+  fprintf(stderr, "\n# Kolmogorov CDF\n");
+  fprintf(stderr, "accel sum: %f\n", sum);
+  fprintf(stderr, "plain sum: %f\n", s->sum_plain);
+  fprintf(stderr, "err: %f\n",   abserr);
+
+  gsl_sum_levin_utrunc_free(s);
+  free(terms);
+
+  return sqrt(2*M_PI)/x * sum;
+}
+
+
+/* This is a wrapper needed by `numeric_mode` because
+ * the minimization expects a function to be minimized and not
+ * maximized.
+ */
+double neg_landau_pdf(double x, void* params) {
+  return (-1) * gsl_ran_landau_pdf(x);
+}
+
+
+/* Compute numerically the mode of the Landau distribution.
+ */
+double numeric_mode (double min, double max) {
+  // create funtion
+  gsl_function pdf;
+  pdf.function = &neg_landau_pdf;
+  pdf.params   = NULL;
+
+  // initialize minimization
+  double guess = 0;
+  int    iter = 0;
+  int    max_iter = 100;
+  double prec = 1e-7;
+  int    status;
+  const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
+  gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
+  gsl_min_fminimizer_set(s, &pdf, guess, min, max);
+
+  // minimization
+  do {
+      iter++;
+      status = gsl_min_fminimizer_iterate (s);
+      guess = gsl_min_fminimizer_x_minimum (s);
+      min = gsl_min_fminimizer_x_lower (s);
+      max = gsl_min_fminimizer_x_upper (s);
+      status = gsl_min_test_interval (min, max, prec, prec);
+
+  } while (status == GSL_CONTINUE && iter < max_iter);
+
+  // Free memory
+  gsl_min_fminimizer_free (s);
+  return guess;
+}
+
+
+/* This is the function to be minimized in `numeric_FWHM`.
+ */
+double abs_landau_pdf(double x, void* params_) {
+  double* params = ((double *) params_);
+  return fabs(gsl_ran_landau_pdf(x) - params[0]);
+}
+
+
+/* Compute numerically the FWHM of the Landau distribution.
+ */
+double numeric_fwhm (double min_lim, double mode, double max_lim) {
+  // create funtion
+  gsl_function pdf;
+  pdf.function = &abs_landau_pdf;
+  double params [1];
+  params[0]= gsl_ran_landau_pdf(mode)/2;
+  pdf.params = params;
+
+  // initialize minimization for x₋
+  double guess = mode - 1;
+  double min, max;
+  int    iter = 0;
+  int    max_iter = 100;
+  double prec = 1e-7;
+  int    status;
+  const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
+  gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
+  gsl_min_fminimizer_set(s, &pdf, guess, min_lim, mode);
+
+  // minimization
+  do {
+      iter++;
+      status = gsl_min_fminimizer_iterate (s);
+      guess = gsl_min_fminimizer_x_minimum (s);
+      min = gsl_min_fminimizer_x_lower (s);
+      max = gsl_min_fminimizer_x_upper (s);
+      status = gsl_min_test_interval (min, max, prec, prec);
+  } while (status == GSL_CONTINUE && iter < max_iter);
+  double x_low = guess;
+
+  // initialize minimization for x₊
+  guess = mode + 1;
+  gsl_min_fminimizer_set(s, &pdf, guess, mode, max_lim);
+
+  // minimization
+  do {
+      iter++;
+      status = gsl_min_fminimizer_iterate (s);
+      guess = gsl_min_fminimizer_x_minimum (s);
+      min = gsl_min_fminimizer_x_lower (s);
+      max = gsl_min_fminimizer_x_upper (s);
+      status = gsl_min_test_interval (min, max, prec, prec);
+
+  } while (status == GSL_CONTINUE && iter < max_iter);
+  double x_upp = guess;
+
+  // Free memory
+  gsl_min_fminimizer_free (s);
+  return x_upp - x_low;
+}
+
+
+/* Here we generate random numbers in a uniform 
+ * range and by using the quantile we map them
+ * to a Landau distribution. Then we generate an
+ * histogram to check the correctness.
+ */
+int main(int argc, char** argv) {
+  // initialize an RNG
+  gsl_rng_env_setup();
+  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
+
+  // prepare histogram
+  size_t samples = 100000;
+  double* sample = calloc(samples, sizeof(double));
+  size_t bins = 40;
+  double min = -20;
+  double max = 20;
+  gsl_histogram* hist = gsl_histogram_alloc(bins);
+  gsl_histogram_set_ranges_uniform(hist, min, max);
+
+  /* sample points from the Landau distribution
+   * and fill the histogram
+   */
+  double x;
+  for(size_t i=0; i<samples; i++) {
+    x = gsl_ran_landau(r);
+    sample[i] = x;
+    gsl_histogram_increment(hist, x);
+  }
+
+  // sort the sample
+  qsort(sample, samples, sizeof(double), &cmp_double);
+
+  // calculate Kolmogorov-Smirnov D
+  double D = 0;
+  double d;
+  for(size_t i=0; i<samples; i++) {
+    d = fabs(landau_cdf(sample[i], NULL) - ((double)i+1)/samples);
+    if (d > D)
+      D = d;
+  }
+
+  // Kolmogorov-Smirnov test
+  double beta = kolmogorov_cdf(D, samples);
+  fprintf(stderr, "\n# K-S test results\n");
+  fprintf(stderr, "D: %f\n", D);
+  fprintf(stderr, "α: %g\n", 1 - beta);
+
+  // Mode comparison
+  //
+  // find the bin with the maximum number of events
+  double mode_o, maxbin = 0;
+  double f_mode_o = 0;
+  double m1, m2   = 0;
+  for(size_t i=0; i<bins; i++) {
+    m1 = hist->bin[i];
+    if (m1 > m2){
+      m2 = m1;
+      maxbin = (double)i;
+      f_mode_o = hist->bin[i];
+    }
+  }
+  fprintf(stderr, "\nstep:%.2f\n ", (max - min)/bins);
+  f_mode_o = f_mode_o/samples;
+  mode_o = min + (maxbin + 0.5)*(max - min)/bins;
+
+  // print the results
+  double mode_e = numeric_mode(min, max);
+  fprintf(stderr, "\n# Numeric mode\n");
+  fprintf(stderr, "expected mode: %.7f\n", mode_e);
+  fprintf(stderr, "observed mode: %.3f\n", mode_o);
+
+  // FWHM comparison
+  //
+  // find the bins x₋ and x₊
+  double half = f_mode_o*samples/2;
+  m2 = samples;
+  double x_low = 0;
+  double x_upp = 0;
+  double diff;
+  for(size_t i=0; i<maxbin; i++) {
+    m1 = hist->bin[i];
+    diff = fabs(m1 - half);
+    if (diff < m2){
+      m2 = diff;
+      x_low = (double)i;
+    }
+  }
+  m2 = samples;
+  for(size_t i=maxbin; i<bins; i++) {
+    m1 = hist->bin[i];
+    diff = fabs(m1 - half);
+    if (diff < m2){
+      m2 = diff;
+      x_upp = (double)i;
+    }
+  }
+  x_low = min + (x_low + 0.5)*(max - min)/bins;
+  x_upp = min + (x_upp + 0.5)*(max - min)/bins;
+  double fwhm_o = x_upp - x_low;
+
+  // print the results
+  fprintf(stderr, "\n# Numeric FWHM\n");
+  double fwhm_e = numeric_fwhm(min, mode_e, max);
+  fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e);
+  fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o);
+
+
+  // print the counts
+  // gsl_histogram_fprintf(stdout, hist, "%g", "%g");
+
+  // clean up and exit
+  gsl_histogram_free(hist);
+  gsl_rng_free(r);
+  free(sample);
+  return EXIT_SUCCESS;
+}
diff --git a/ex-1/pdf-plot.py b/ex-1/pdf-plot.py
new file mode 100755
index 0000000..db0bbc7
--- /dev/null
+++ b/ex-1/pdf-plot.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+
+from pylab import *
+import sys
+
+x, y = loadtxt(sys.stdin, unpack=True)
+title('Landau distribution', loc='right')
+plot(x, y, color='#92182b')
+show()
diff --git a/ex-1/pdf.c b/ex-1/pdf.c
new file mode 100644
index 0000000..6baea2f
--- /dev/null
+++ b/ex-1/pdf.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include <gsl/gsl_randist.h>
+
+int main(int argc, char** argv) {
+  size_t n  = 1000;
+  double
+    left  = -10,
+    right = +10,
+    dx    = (right - left)/n;
+
+  double x = left;
+  for (size_t i = 0; i < n; i++) {
+    x += dx;
+    printf("%f %f\n", x, gsl_ran_landau_pdf(x));
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-1/plot.py b/ex-1/plot.py
new file mode 100755
index 0000000..4d554db
--- /dev/null
+++ b/ex-1/plot.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+
+from pylab import *
+import sys
+
+a, b, f = loadtxt(sys.stdin, unpack=True)
+title('Landau distribution sampling', loc='right')
+hist(a, np.insert(b, 0, a[0]), weights=f/sum(f))
+annotate(f'N={sum(f)}\nbins={len(f)}', (50, 0.2))
+show()
diff --git a/ex-2/fancier.c b/ex-2/fancier.c
new file mode 100644
index 0000000..c3c4fd1
--- /dev/null
+++ b/ex-2/fancier.c
@@ -0,0 +1,511 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <math.h>
+#include <gmp.h>
+
+/* The Euler-Mascheroni constant is here computed to
+ * arbitrary precision using GMP rational numbers
+ * with the formula:
+ *
+ *    γ = A(N)/B(N) - C(N)/B(N)² - log(N)
+ *
+ * with:
+ *
+ *    A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k)
+ *    B(N) = Σ_(k = 0)^(k_max) (N^k/k!)
+ *    C(N) = 1/4N Σ_(k = 0)^(2N) ((2k)!)^3/((k!)^4*(16N))^2k
+ *    H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number
+ *
+ * where N is computed from D as written below and k_max is
+ * currently 5*N, chosen in a completely arbitrary way.
+ *
+ * source: http://www.numberworld.org/y-cruncher/internals/formulas.html
+ *
+ */
+
+/* `mpq_dec_str(z, n)`
+ * returns the decimal representation of the
+ * rational z as a string, up to n digits.
+ */
+char* mpq_dec_str(mpq_t z, size_t mantissa_size) {
+  mpz_t num, den, quot, rem;
+  mpz_inits(num, den, quot, rem, NULL);
+
+  // calculate integral part
+  mpq_get_num(num, z);
+  mpq_get_den(den, z);
+  mpz_tdiv_qr(quot, rem, num, den);
+
+  // store the integral part
+  size_t integral_size = mpz_sizeinbase(quot, 10);
+  char* repr = calloc(integral_size + 1 + mantissa_size + 1, sizeof(char));
+  mpz_get_str(repr, 10, quot);
+
+  // add decimal point
+  repr[integral_size] = '.';
+
+  // calculate the mantissa
+  mpz_t cur;
+  mpz_init(cur);
+  size_t digit;
+  for (size_t i = 0; i < mantissa_size; i++) {
+    // calculate i-th digit as:
+    // digit = (remainder * 10) / denominator
+    // new_ramainder = (remainder * 10) % denominator
+    mpz_set(cur, rem);
+    mpz_mul_ui(cur, rem, 10);
+    mpz_tdiv_qr(quot, rem, cur, den);
+
+    // store digit in mantissa
+    digit = mpz_get_ui(quot);
+    sprintf(repr + integral_size + 1 + i, "%ld", digit);
+  }
+
+  // free memory
+  mpz_clears(num, den, quot, rem, cur, NULL);
+
+  return repr;
+}
+
+
+void mpq_fprintf(FILE *fp, size_t digits, char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+
+  char* str;
+  char fmtc[2] = "%x";
+
+  for (char* p = fmt; *p; p++) {
+    if (*p != '%') {
+      fprintf(fp, "%c", *p);
+      continue;
+    }
+    switch (*++p) {
+      case 's':
+        str = va_arg(args, char*);
+        fputs(str, fp);
+        break;
+      case 'l':
+        if (*++p == 'd')
+          fprintf(fp, "%ld", va_arg(args, size_t));
+        break;
+      case 'q':
+        str = mpq_dec_str(*va_arg(args, mpq_t*), digits);
+        fputs(str, fp);
+        free(str);
+        break;
+      default:
+        fmtc[1] = *p;
+        gmp_fprintf(fp, fmtc, *va_arg(args, mpq_t*));
+    }
+  }
+  va_end(args);
+}
+
+
+/* `mpq_log2(z, D)`
+ * calculate log(2) to D decimal places
+ * using the series
+ *
+ *   log(2) = Σ_{k=1} 1/(k 2^k)
+ *
+ * The error of the n-th partial sum is
+ * estimated as
+ *
+ *   Δn = 1/(n(n+2) 2^(n-1)).
+ *
+ * So to fix the first D decimal places
+ * we evaluate up to n terms such that
+ *
+ *   Δn < 1/10^D
+ */
+void mpq_log2(mpq_t rop, size_t digits) {
+  mpq_t sum, term;
+  mpq_inits(sum, term, NULL);
+
+  fprintf(stderr, "[mpq_log2] decimal digits: %ld\n", digits);
+
+  /* calculate n, the number of required terms */
+  size_t n;
+  mpz_t temp, prec, width; mpz_inits(temp, prec, width, NULL);
+  // prec = 10^digits
+  mpz_ui_pow_ui(prec, 10, digits + 2);
+
+  for (n = 1;; n++) {
+    // width = n*(n + 2)*2^(n - 1)
+    mpz_set_ui(width, n);
+    mpz_set_ui(temp, n);
+    mpz_add_ui(temp, temp, 2);
+    mpz_mul(width, width, temp);
+    mpz_sub_ui(temp, temp, 3);
+    mpz_mul_2exp(width, width, mpz_get_ui(temp));
+
+    if (mpz_cmp(width, prec) > 0) break;
+  }
+  fprintf(stderr, "[mpq_log2] series terms: %ld\n", n);
+
+  /* calculate series */
+  for (size_t i=1; i<n+1; i++) {
+    mpq_set_ui(term, 1, 1);
+    mpz_ui_pow_ui(mpq_denref(term), 2, i);
+    mpz_mul_ui(mpq_denref(term), mpq_denref(term), i);
+    mpq_add(sum, sum, term);
+  }
+  mpq_fprintf(stderr, digits > 50 ? 50 : digits,
+              "[mpq_log2] log(2)=%q...\n", &sum);
+
+  mpq_set(rop, sum);
+
+  // free memory
+  mpq_clears(sum, term, NULL);
+  mpz_clears(temp, prec, width, NULL);
+}
+
+
+/* `mpq_log_ui(z, x, D)`
+ * calculates the logarithm of x up
+ * to D decimal places using the formula
+ *
+ *   log(x) = log(a(x) * 2^(n-1))
+ *          = log(a) + (n-1) log(2)
+ *
+ * where 1<a<2 and n is the number of binary
+ * digits of x.
+ *
+ * log(a) is the computed from
+ *
+ *   log((1+y)/(1-y)) = 2Σ_{k=0} y^(2k+1)/(2k+1)
+ *
+ * where y = (a-1)/(a+1).
+ *
+ * The error of the n-th partial sum is
+ * estimated as:
+ *
+ *   Δ(n, y) = 4y^(2n + 3) /
+ *        (4n^2y^4 - 8n^2y^2 + 4n^2 + 4ny^4 - 12ny^2 + 8n + y^4 - 4y^2 + 3)
+ *
+ * So to fix the first D decimal places
+ * we evaluate up to n terms such that
+ *
+ *   Δ(n, y) < 1/10^D
+ */
+void mpq_log_ui(mpq_t rop, size_t x, size_t digits) {
+  /* calculate `a` such that x = a⋅2^(m-1) */
+  mpq_t a;         mpq_init(a);
+  mpz_t xz, power; mpz_inits(xz, power, NULL);
+  mpz_set_ui(xz, x);
+
+  // m = digits in base 2 of x
+  size_t m = mpz_sizeinbase(xz, 2);
+  mpz_ui_pow_ui(power, 2, m - 1);
+  mpq_set_num(a, xz);
+  mpq_set_den(a, power);
+
+  fprintf(stderr, "[mpq_log_ui] binary digits: %ld\n", m);
+  fprintf(stderr, "[mpq_log_ui] a=%g\n", mpq_get_d(a));
+
+  /* calculate y = (a-1)/(a+1) */
+  mpq_t y, temp; mpq_inits(y, temp, NULL);
+
+  mpq_set(temp, a);
+  // decrement: a/b -= b/b
+  mpz_submul_ui(mpq_numref(temp),
+                mpq_denref(temp), 1);
+  mpq_set(y, temp);
+
+  mpq_set(temp, a);
+  // increment: a/b += b/b
+  mpz_addmul_ui(mpq_numref(temp),
+                mpq_denref(temp), 1);
+  mpq_div(y, y, temp);
+
+  fprintf(stderr, "[mpq_log_ui] y=%g\n", mpq_get_d(y));
+
+
+  /* calculate number of required terms:
+   * find smallest n such that Δ(n, y) < 1/10^digits.
+   */
+  mpq_t y4, y2, y2n, t1, t2;
+  mpq_inits(y4, y2, y2n, t1, t2, NULL);
+
+  // y4 = y^4
+  mpz_pow_ui(mpq_numref(y4), mpq_numref(y), 4);
+  mpz_pow_ui(mpq_denref(y4), mpq_denref(y), 4);
+
+  // y2 = y^2
+  mpz_pow_ui(mpq_numref(y2), mpq_numref(y), 2);
+  mpz_pow_ui(mpq_denref(y2), mpq_denref(y), 2);
+
+  size_t n;
+  size_t p1, p2, p3;
+
+  for(n = 0;; n++) {
+    // polynomials in n
+    p1 = 4*pow(n, 2) + 4*n + 1;
+    p2 = 8*pow(n, 2) + 12*n - 4;
+    p3 = 4*pow(n, 2) + 3;
+
+    // t1 = y4 * p1
+    mpz_mul_ui(mpq_numref(t1), mpq_numref(y4), p1);
+    mpq_canonicalize(t1);
+    // t2 = y2 * p2
+    mpz_mul_ui(mpq_numref(t2), mpq_numref(y2), p2);
+    mpq_canonicalize(t2);
+
+    // t1 -= t2 
+    mpq_sub(t1, t1, t2);
+    // t1 += p3
+    mpz_addmul_ui(mpq_numref(t1), mpq_denref(t1), p3);
+
+    // y2n = 4*y^(2n + 3)
+    mpq_set(y2n, y);
+    mpz_pow_ui(mpq_numref(y2n), mpq_numref(y2n), 2*n+3);
+    mpz_pow_ui(mpq_denref(y2n), mpq_denref(y2n), 2*n+3);
+    mpz_mul_ui(mpq_numref(y2n), mpq_numref(y2n), 4);
+    mpq_canonicalize(y2n);
+
+    // t2 = 10^digits * y2n
+    mpq_set_ui(t2, 1, 1);
+    mpz_ui_pow_ui(mpq_numref(t2), 10, digits + 1);
+    mpq_mul(t2, t2, y2n);
+
+    if (mpq_cmp(t1, t2) > 0) break;
+  }
+
+  fprintf(stderr, "[mpq_log_ui] series terms: %ld\n", n);
+  fprintf(stderr, "[mpq_log_ui] decimal digits: %ld\n", digits);
+
+
+  /* calculate the series of
+   * log(A) = log((1+y)/(1-y)) = 2 Σ_{k=0} y^(2k+1)/(2k+1)
+   */
+  mpq_t sum, term; mpq_inits(sum, term, NULL);
+
+  for (size_t i = 0; i <= n; i++) {
+    // numerator: y^(2i+1)
+    mpq_set(temp, y);
+    mpz_pow_ui(mpq_numref(temp), mpq_numref(temp), 2*i + 1);
+    mpz_pow_ui(mpq_denref(temp), mpq_denref(temp), 2*i + 1);
+    mpq_canonicalize(temp);
+    mpq_set(term, temp);
+
+    // denominator: 2i+1
+    mpq_set_ui(temp, 2*i + 1, 1);
+    mpq_div(term, term, temp);
+
+    // sum the term
+    mpq_add(sum, sum, term);
+  }
+
+  // multiply by 2
+  mpz_mul_ui(mpq_numref(sum), mpq_numref(sum), 2);
+  mpq_canonicalize(sum);
+
+  // calculate log(2)*(m-1)
+  mpq_t log2; mpq_init(log2);
+  mpq_log2(log2, digits);
+
+  mpz_mul_ui(mpq_numref(log2), mpq_numref(log2), m-1);
+  mpq_canonicalize(log2);
+
+  // series += log(2)*(m-1)
+  mpq_add(sum, sum, log2);
+  mpq_set(rop, sum);
+
+  mpq_fprintf(stderr, digits > 50 ? 50 : digits,
+          "[mpq_log_ui] log(%ld)=%q...\n", x, sum);
+
+  // free memory
+  mpq_clears(a, y, temp, sum, term, log2,
+             y4, y2, y2n, t1, t2, NULL);
+  mpz_clears(xz, power, NULL);
+}
+
+
+/* A series */
+void a_series(mpq_t rop, size_t N) {
+  mpq_t sum, term, factq, harm, recip;
+  mpz_t fact, power, stop;
+
+  mpq_inits(sum, term, factq, harm, recip, NULL);
+  mpz_inits(fact, power, stop, NULL);
+  mpz_set_ui(stop, N);
+
+  for (size_t k = 0; k < 5*N; k++) {
+    mpz_fac_ui(fact, k);
+    mpz_pow_ui(power, stop, k);
+
+    mpq_set_z(term, power);
+    mpq_set_z(factq, fact);
+    mpq_div(term, term, factq);
+    mpq_mul(term, term, term);
+
+    if (k > 0)
+      mpq_set_ui(recip, 1, k);
+    mpq_add(harm, harm, recip);
+    mpq_mul(term, term, harm);
+    mpq_add(sum, sum, term);
+  }
+
+  mpq_set(rop, sum);
+
+  // free memory
+  mpq_clears(sum, term, factq, harm, recip, NULL);
+  mpz_clears(fact, power, stop, NULL);
+}
+
+
+/* B series */
+void b_series(mpq_t rop, size_t N){
+  mpq_t sum, term, factq;
+  mpz_t fact, power, stop;
+
+  mpq_inits(sum, term, factq, NULL);
+  mpz_inits(fact, power, stop, NULL);
+  mpz_set_ui(stop, N);
+
+  for (size_t k = 0; k < 5*N; k++) {
+    mpz_fac_ui(fact, k);
+    mpz_pow_ui(power, stop, k);
+
+    mpq_set_z(term, power);
+    mpq_set_z(factq, fact);
+    mpq_div(term, term, factq);
+
+    mpq_mul(term, term, term);
+    mpq_add(sum, sum, term);
+  }
+
+  mpq_set(rop, sum);
+
+  // free memory
+  mpq_clears(sum, term, factq, NULL);
+  mpz_clears(fact, power, stop, NULL);
+}
+
+
+/* C series */
+void c_series(mpq_t rop, size_t N) {
+  mpq_t sum, term, stop2, fact2q;
+  mpz_t fact1, fact2, power, stop1;
+
+  mpq_inits(sum, term, stop2, fact2q, NULL);
+  mpz_inits(fact1, fact2, power, stop1, NULL);
+  mpq_set_ui(stop2, 4*N, 1);
+
+  for (size_t k = 0; k <= 2*N; k++) {
+    mpz_fac_ui(fact1, 2*k);
+    mpz_pow_ui(fact1, fact1, 3);
+
+    mpz_fac_ui(fact2, k);
+    mpz_pow_ui(fact2, fact2, 4);
+
+    mpz_set_ui(stop1, 16*N);
+    mpz_pow_ui(stop1, stop1, 2*k);
+    mpz_mul(fact2, fact2, stop1);
+
+    mpq_set_z(term, fact1);
+    mpq_set_z(fact2q, fact2);
+
+    mpq_div(term, term, fact2q);
+    mpq_add(sum, sum, term);
+  }
+  mpq_div(sum, sum, stop2);
+
+  mpq_set(rop, sum);
+
+  // free memory
+  mpq_clears(sum, term, stop2, fact2q, NULL);
+  mpz_clears(fact1, fact2, power, stop1, NULL);
+}
+
+
+int main(int argc, char** argv) {
+  /* if no argument is given, show the usage */
+  if (argc != 2) {
+    fprintf(stderr, "usage: %s D\n", argv[0]);
+    fprintf(stderr, "Computes γ up to D decimal places.\n");
+    return EXIT_FAILURE;
+  }
+
+  mpq_t exact, indicator;
+  mpq_inits(exact, indicator, NULL);
+
+  // value of γ up to 500 digits
+  mpq_set_str(
+    exact,
+    "47342467929426395252720948664321583815681737620205"
+    "78046578043524355342995304421105234825234769529727"
+    "60710541774258570118818437511973945213031180449905"
+    "77067110892216494927227098092412641670668896104172"
+    "59380467495735805599918127376547804186055507379270"
+    "09659478275476110597673948659342592173551684412254"
+    "47940426607883070678658320829056222506449877887289"
+    "02099638646695565284675455912794915138438516540912"
+    "22061900908485016328626399040675802962645141413722"
+    "567840383761005003563012969560659130635723002086605/"
+    "82018681765164048732047980836753451023877954010252"
+    "60062364748361667340168652059998708337602423525120"
+    "45225158774173869894826877890589130978987229877889"
+    "33367849273189687823618289122425446493605087108634"
+    "04387981302669131224273324182166778131513056804533"
+    "58955006355665628938266331979307689540884269372365"
+    "76288367811322713649805442241450184023209087215891"
+    "55369788474437679223152173114447113970483314961392"
+    "48250188991402851129033493732164230227458717486395"
+    "514436574417275149404197774547389507462779807727616",
+    10);
+
+  // number with decimal expansion equal to
+  // 0123456789 periodic: useful to count digits
+  mpq_set_str(
+      indicator,
+      "137174210/1111111111",
+      10);
+
+  // number of decimal places
+  size_t D = atol(argv[1]);
+  // number of terms
+  size_t N = floor(1.0/4 * log(10) * (double)D);
+
+  /* calculate series */
+  mpq_t A, B, C;
+  mpq_inits(A, B, C, NULL);
+  a_series(A, N);
+  b_series(B, N);
+  c_series(C, N);
+
+  /* calculate γ... */
+  mpq_t gamma, corr, logn;
+  mpq_inits(gamma, corr, logn, NULL);
+
+  mpq_log_ui(logn, N, D);
+  mpq_div(gamma, A, B);
+  mpq_div(corr, C, B);
+  mpq_div(corr, corr, B);
+  mpq_sub(gamma, gamma, corr);
+  mpq_sub(gamma, gamma, logn);
+
+  /* calculate difference */
+  mpq_t diff;
+  mpq_init(diff);
+  mpq_sub(diff, gamma, exact);
+
+  /* print results */
+  size_t d = D+1 > 50 ? 50 : D+1;
+
+  fprintf(stderr, "[main] N: %ld\n", N);
+  mpq_fprintf(stderr, d, "[main] approx:\t%q...\n", gamma);
+  mpq_fprintf(stderr, d, "[main] exact:\t%q...\n",  exact);
+  mpq_fprintf(stderr, d, "[main] diff:\t%q...\n",   diff);
+  mpq_fprintf(stderr, d, "[main] digits:\t%q\n",    indicator);
+
+  mpq_fprintf(stdout, D+1, "%q\n", gamma);
+
+  // free memory
+  mpq_clears(A, B, C, gamma, corr, logn,
+             exact, indicator, diff, NULL);
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-2/fancy.c b/ex-2/fancy.c
new file mode 100644
index 0000000..103d4c6
--- /dev/null
+++ b/ex-2/fancy.c
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+// The Euler-Mascheroni constant is computed through the formula:
+//
+//    γ = A(N)/B(N) - ln(N)
+//
+// with:
+//
+//    A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k)
+//    B(N) = Σ_(k = 0)^(k_max) (N^k/k!)
+//    H(k) = Σ_(j = 1)^(k) (1/k)
+//
+// where N is computed from D as written below and k_max is the value
+// at which there is no difference between two consecutive terms of
+// the sum because of double precision.
+//
+// source: http://www.numberworld.org/y-cruncher/internals/formulas.html
+
+// Partial harmonic sum h
+double harmonic_sum(double n) {
+  double sum = 0;
+  for (double k = 1; k < n+1; k++) {
+    sum += 1/k;
+  }
+  return sum;
+}
+
+// A series 
+double a_series(int N) {
+  double sum = 0;
+  double prev = -1;
+  for (double k = 1; sum != prev; k++) {
+    prev = sum;
+    sum += pow(((pow(N, k))/(tgamma(k+1))), 2) * harmonic_sum(k);
+  }
+  return sum;
+}
+
+// B series
+double b_series(int N){
+  double sum = 0;
+  double prev = -1;
+  for (double k = 0; sum != prev; k++) {
+    prev = sum;
+    sum += pow(((pow(N, k))/(tgamma(k+1))), 2);
+  }
+  return sum;
+}
+
+double c_series(int N) {
+  double sum = 0;
+  for (double k = 0; k < N; k++) {
+    sum += pow(tgamma(2*k + 1), 3)/(pow(tgamma(k + 1), 4) * pow(16.0*N, (int)2*k));
+  }
+  return sum/(4.0*N);
+}
+
+// Takes in input the number D of desired correct decimals
+// Best result obtained with D = 15, N = 10
+
+int main(int argc, char** argv) {
+  double exact =
+    0.57721566490153286060651209008240243104215933593992;
+
+  // If no argument is given is input, an error signal is displayed
+  // and the program quits
+
+  if (argc != 2) {
+    fprintf(stderr, "usage: %s D\n", argv[0]);
+    fprintf(stderr, "Computes γ up to D decimal places.\n");
+    return EXIT_FAILURE;
+  }
+
+  int N = floor(2.0 + 1.0/4 * log(10) * (double)atoi(argv[1]));
+  double A = a_series(N);
+  double B = b_series(N);
+  double C = c_series(N);
+  double gamma = A/B - C/(B*B) - log(N);
+
+  printf("N: %d\n", N);
+  printf("approx:\t%.30f\n", gamma);
+  printf("true:\t%.30f\n", exact);
+  printf("diff:\t%.30f\n", fabs(gamma - exact));
+  printf("\t  123456789 123456789 123456789\n");
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-2/limit.c b/ex-2/limit.c
new file mode 100644
index 0000000..61c7d2e
--- /dev/null
+++ b/ex-2/limit.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+/* Compute the Euler_Mascheroni constant through the formula:
+ *
+ * γ = lim_{M → +inf} sum_{k  = 1}^{M} bin{M, k} (-1)^k / k * ln(Γ(k+1))
+ *
+ * Best result is obtained for M = 41.
+ * Values from 1 to 100 were checked.
+ *
+ */
+
+double exact = 0.577215664901532860;
+
+int main() {
+  double best = 0, num = 0;
+  double sum = 0, prev;
+
+  for (double M = 1; M < 60; M++) {
+    prev = sum;
+    sum = 0;
+    for (double k = 1; k <= M; k++) {
+      sum += (tgamma(M + 1))/(tgamma(k + 1)*tgamma(M - k + 1)) * pow(-1, k)/k
+        * log(tgamma (k + 1));
+    }
+    printf("M: \t%d\t", (int)M);
+    printf("diff:\t%.15f\n", fabs(sum - exact));
+    if ((fabs(sum - exact) < fabs(prev - exact)) &&
+        (fabs(sum - exact) < fabs(best - exact))){
+      best = sum;
+      num = M;
+    }
+  }
+
+  printf("M: %d\n", (int)num);
+  printf("approx:\t%.15f\n", best);
+  printf("true:\t%.15f\n", exact);
+  printf("diff:\t%.15f\n", fabs(best - exact));
+  printf("\t  123456789_12345\n");
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-2/naive.c b/ex-2/naive.c
new file mode 100644
index 0000000..cfa1e1b
--- /dev/null
+++ b/ex-2/naive.c
@@ -0,0 +1,57 @@
+#include <stdio.h>
+#include <math.h>
+
+/* Naive approach using the definition
+ * of γ as the difference between log(n)
+ * and the Rieman sum of its derivative (1/n).
+ *
+ * The result is:
+ *
+ * approx:	0.57721 56648 77325
+ * true: 	  0.57721 56649 01533
+ * diff:	  0.00000 00000 24207
+ *
+ * The convergence is logarithmic and so
+ * slow that the double precision runs out
+ * at the 10th decimal digit.
+ *
+ */
+
+double exact = 0.577215664901532860;
+
+double partial_sum(size_t n) {
+  double sum = 0;
+  for (double k = 1; k <= n; k++) {
+    sum += 1/k;
+  }
+  return sum - log(n);
+}
+
+int main () {
+  size_t prev_n, n = 2;
+  double prev_diff, diff=1;
+  double approx;
+
+  /* Compute the approximation
+   * and the difference from the true value.
+   * Stop when the difference starts increasing
+   * due to roundoff errors.
+   *
+   */
+
+  do {
+    prev_n = n;
+    n *= 10;
+    prev_diff = diff;
+    approx = partial_sum(n);
+    diff = fabs(exact - approx);
+    printf("n:\t%.0e\t", (double)n);
+    printf("diff:\t%e\n", diff);
+  } while (diff < prev_diff);
+
+  printf("n:\t%.0e\n", (double)prev_n);
+  printf("approx:\t%.15f\n", approx);
+  printf("true:\t%.15f\n", exact);
+  printf("diff:\t%.15f\n", prev_diff);
+  printf("\t  123456789_12345\n");
+}
diff --git a/ex-2/recip.c b/ex-2/recip.c
new file mode 100644
index 0000000..130d79e
--- /dev/null
+++ b/ex-2/recip.c
@@ -0,0 +1,63 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+/* Compute the Euler-Mascheroni constant through the formula
+ * of the reciprocal Γ function:
+ *
+ *   1/Γ(z) = z*e^{γz} pro_{k = 1}^{+ inf} ((1 + z/k) e^{- z/k})
+ *
+ * Thus:
+ *
+ *   γ = - 1/z * ln(z*Γ(z)*pro{k = 1}^{+ inf} (1 + z/k) e^{- z/k})
+ *
+ * Product stops when there is no difference between two consecutive
+ * terms of the productory due to limited precision.
+ *
+ * Range of z given in input as min, max and step.
+ *
+ */
+
+double exact = 0.577215664901532860;
+
+int main(int argc, char** argv) {
+
+  double min = (double)atof(argv[1]);
+  double max = (double)atof(argv[2]);
+  double step = (double)atof(argv[3]);
+
+  double prev_gamma, gamma = 0;
+  double Z;
+  double best = 0;
+
+
+  for (double z = min; z <= max; z += step) {
+  
+    prev_gamma = gamma;
+
+    double pro = 1;
+    double prev = -1;
+    for (double k = 1; pro != prev; k++) {
+      prev = pro;
+      pro *= (1 + z/k) * exp(- z/k);
+      if (z == 9 && pro == prev) printf("k:\t%.0f\n", k);
+    }
+    double gamma = - 1/z * log(z*tgamma(z)*pro);
+    printf("z:\t%.2f\t", z);
+    printf("diff:\t%.20f\n", fabs(gamma - exact));
+
+    if ((fabs(gamma - exact) < fabs(prev_gamma - exact)) &&
+        (fabs(gamma - exact) < fabs(best - exact))){
+      best = gamma;
+      Z = z;
+    }
+  }
+
+  printf("z:\t%.2f\n", Z);
+  printf("approx:\t%.20f\n", best);
+  printf("true:\t%.20f\n", exact);
+  printf("diff:\t%.20f\n", best - exact);
+  printf("\t   123456789 123456789 123456789\n");
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-3/chisquared.c b/ex-3/chisquared.c
new file mode 100644
index 0000000..efe3051
--- /dev/null
+++ b/ex-3/chisquared.c
@@ -0,0 +1,292 @@
+#include "common.h"
+#include "chisquared.h"
+#include <gsl/gsl_multifit_nlinear.h>
+#include <gsl/gsl_blas.h>
+
+/* Minimisation function
+ *
+ * The χ² is defined as
+ * 
+ * χ² = |f|²
+ *    = Σi fi²
+ *
+ * where fi = (Oi - Ei)/√Ei are the residuals.
+ *
+ * This function takes the parameters (α,β,γ) as
+ * a gsl_vector `par`, the sample `data` and a
+ * gsl_vector `f`, that will store the results
+ * `fi`.
+ */
+int chisquared_f(const gsl_vector *par,
+                 void *data_, gsl_vector *f) {
+  struct hist data = *((struct hist*) data_);
+
+  struct event event;
+  double max, min;
+  double delta_th, delta_ph;
+  double expected, observed;
+
+  /* Loop over the (i,j) bins of the 2D histogram.
+   * The index k of the k-th component of f is actually:
+   *
+   *   k = i * φbins + j
+   *
+   * Note: this is called the C-order.
+   */
+
+  for (size_t i = 0; i < data.h->nx; i++)
+    for (size_t j = 0; j < data.h->ny; j++) {
+      /* Get the bin ranges and width for θ,φ.
+       * The event is supposed to happen in the
+       * midpoint of the range.
+       */
+      gsl_histogram2d_get_xrange(data.h, i, &max, &min);
+      event.th = (max + min)/2;
+      delta_th = (max - min);
+      gsl_histogram2d_get_yrange(data.h, j, &max, &min);
+      event.ph = (max + min)/2;
+      delta_ph = (max - min);
+
+      /* O = observed number of events in the k-th bin
+       * E = expected number of events in k-th bin
+       *
+       * E is given by:
+       *
+       *        /---> total number of events
+       *       /
+       *   E = N  F(α,β,γ; θ,φ) Δθ Δφ sin(θ)
+       *         \__________________________/
+       *                      |
+       *                      `-> probability
+       */
+      observed = gsl_histogram2d_get(data.h, i, j);
+      expected = data.n * distr(par, &event)
+               * delta_th * delta_ph * sin(event.th);
+
+      /* Return an error (invalid domain) if
+       * the current bin is empty. That would
+       * be division by zero.
+       */
+      if (expected < 0) {
+        //fprintf(stderr, "[warning] bin %ld:%ld p<0 (%.2g, %.2g, %.2g)\n",
+        //        i, j, gsl_vector_get(par, 0),
+        //              gsl_vector_get(par, 1),
+        //              gsl_vector_get(par, 2));
+        expected = 1e-6;
+      }
+
+      gsl_vector_set(
+        f, i * data.h->ny + j,
+        (observed - expected)/sqrt(expected));
+    }
+
+  return GSL_SUCCESS; 
+}
+
+
+/* Jacobian function
+ *
+ * The gradient of the χ² function is:
+ *
+ *   ∇χ² = ∇ |f|²
+ *       = 2 J^T⋅f
+ *
+ * where J is the jacobian of the (vector)
+ * function f. Its entries are given by
+ *
+ *  2 Jij = 2 ∂fi/∂xj
+ *        = -(Oi + Ei)/√Ei 1/Fi (∇Fi)j
+ *
+ */
+int chisquared_df(const gsl_vector *par,
+                  void *data_, gsl_matrix *jac) {
+
+  struct hist data = *((struct hist*) data_);
+
+  struct event event;
+  double max, min;
+  double delta_th, delta_ph, prob;
+  double expected, observed;
+  gsl_vector *grad = gsl_vector_alloc(3);
+
+  for (size_t i = 0; i < data.h->nx; i++)
+    for (size_t j = 0; j < data.h->ny; j++) {
+      /* Get the bin ranges for θ,φ.
+       * The event is supposed to happen in the
+       * midpoint of the range.
+       */
+      gsl_histogram2d_get_xrange(data.h, i, &max, &min);
+      event.th = (max + min)/2;
+      delta_th = (max - min);
+      gsl_histogram2d_get_yrange(data.h, j, &max, &min);
+      event.ph = (max + min)/2;
+      delta_ph = (max - min);
+
+      prob = distr(par, &event);
+      observed = gsl_histogram2d_get(data.h, i, j);
+      expected = data.n * prob * delta_th * delta_ph * sin(event.th);
+
+      if (expected < 0)
+        expected = 1e-6;
+      if (observed == 0)
+        observed = 1;
+
+      /* Compute the gradient of F(α,β,γ; θi,φi),
+       * then rescale it and set it to the i-th row
+       * of the jacobian.
+       */
+      grad_distr(par, &event, grad);
+      gsl_vector_scale(
+          grad,
+          -0.5*(observed + expected)/sqrt(expected) * 1/prob);
+      gsl_matrix_set_row(jac, i * data.h->ny + j, grad);
+
+    }
+
+  // free memory
+  gsl_vector_free(grad);
+
+  return GSL_SUCCESS;
+}
+
+
+/* This is a callback function called during
+ * the minimisation to show the current progress.
+ * It prints:
+ *   1. the condition number cond(J) of the jacobian
+ *   2. the reduced χ² value
+ *   3. the current parameters
+ */
+void callback(const size_t iter, void *params,
+              const gsl_multifit_nlinear_workspace *w) {
+  gsl_vector *f = gsl_multifit_nlinear_residual(w);
+  gsl_vector *x = gsl_multifit_nlinear_position(w);
+
+  //if (iter % 4 != 0)
+  //  return;
+
+  /* Compute the condition number of the
+   * jacobian and the reduced χ² (χ²/d).
+   */
+  double rcond, chi2;
+  int d = w->fdf->n - w->fdf->p;
+  gsl_multifit_nlinear_rcond(&rcond, w);
+  gsl_blas_ddot(f, f, &chi2);
+
+  fprintf(stderr, "%2ld\t", iter);
+  vector_fprint(stderr, x);
+  fprintf(
+    stderr, "\tcond(J)=%.4g, χ²/d=%.4g\n\n",
+    1.0/rcond,
+    chi2/d);
+}
+
+
+/* Minimum χ² estimation of of the parameters
+ * α,β,γ.
+ */
+min_result minChiSquared(struct hist data) {
+  /* Initialise the function to be minimised */
+  gsl_multifit_nlinear_fdf chisquared;
+  chisquared.f      = chisquared_f;             // function
+  chisquared.df     = chisquared_df;            // gradient
+  chisquared.fvv    = NULL;                     // something for geodesic accel.
+  chisquared.n      = data.h->nx * data.h->ny;  // numeber of data points
+  chisquared.p      = 3;                        // numeber of data points
+  chisquared.params = &data;                    // histogram data
+
+  /* Initialise the minimisation workspace */
+  gsl_multifit_nlinear_parameters options =
+    gsl_multifit_nlinear_default_parameters();
+
+  gsl_multifit_nlinear_workspace *w = 
+    gsl_multifit_nlinear_alloc(
+      gsl_multifit_nlinear_trust,  // minimisation method
+      &options,                    // minimisation options
+      data.h->nx * data.h->ny,     // number of data points
+      3);                          // number of (unknown) params
+
+  /* Set the starting point of the 
+   * minimisation.
+   */
+  gsl_vector *par = gsl_vector_alloc(3);
+  gsl_vector_set(par, 0,  0.50);  // α
+  gsl_vector_set(par, 1,  0.01);  // β
+  gsl_vector_set(par, 2, -0.10);  // γ
+  gsl_multifit_nlinear_init(par, &chisquared, w);
+
+  /* Configure the solver and run the minimisation
+   * using the high-level driver.
+   */
+  fputs("\n# least χ²\n\n", stderr);
+  int status, info;
+  status = gsl_multifit_nlinear_driver(
+    100,      // max number of iterations
+    1e-8,     // tolerance for step test: |δ| ≤ xtol(|x| + xtol)
+    1e-8,     // tolerance for gradient test: max |∇i⋅ xi| ≤ gtol⋅xi
+    1e-8,     // tolerance for norm test
+    callback, // function called on each iteration
+    NULL,     // callback parameters 
+    &info,    // stores convergence information
+    w);       // minimisation workspace
+
+  fprintf(stderr, "status: %s\n", gsl_strerror(status));
+  if (status != GSL_SUCCESS)
+    fprintf(stderr, "info: %s\n", gsl_strerror(info));
+  fprintf(stderr, "iterations: %ld\n", gsl_multifit_nlinear_niter(w));
+
+  /* Store results in the min_result type.
+   * Note: We allocate a new vector/matrix for the
+   * parameters because `w->x` will be freed
+   * along with the workspace `w`.
+   */
+  min_result res;
+  res.par = gsl_vector_alloc(3);
+  res.err = gsl_vector_alloc(3);
+  res.cov = gsl_matrix_alloc(3, 3);
+  gsl_vector_memcpy(res.par, gsl_multifit_nlinear_position(w));
+
+  /* Compute the covariance of the fit parameters.
+   * The covariance Σ is estimated by
+   *
+   *   Σ = H⁻¹
+   *
+   * where H is the hessian of the χ², which is
+   * itself approximated by the jacobian as
+   *
+   *   H = J^T⋅J
+   *
+   */
+  gsl_matrix *jac = gsl_multifit_nlinear_jac(w);
+  gsl_multifit_nlinear_covar(jac, 0.0, res.cov);
+
+  /* Compute the standard errors
+   * from the covariance matrix.
+   */
+  gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
+  gsl_vector_memcpy(res.err, &diagonal.vector);
+  vector_map(&sqrt, res.err);
+
+  /* Compute the reduced χ² */
+  double chi2;
+  gsl_vector *f = gsl_multifit_nlinear_residual(w);
+  gsl_blas_ddot(f, f, &chi2);
+  chi2 /= chisquared.n - chisquared.p;
+
+  /* Print the results */
+  fputs("\n## results\n", stderr);
+
+  fprintf(stderr, "\n* χ²/d: %.3f\n", chi2);
+
+  fputs("\n* parameters:\n    ", stderr);
+  vector_fprint(stderr, res.par);
+
+  fputs("\n* covariance:\n", stderr);
+  matrix_fprint(stderr, res.cov);
+
+  // free memory
+  gsl_multifit_nlinear_free(w);
+  gsl_vector_free(par);
+
+  return res;
+}
diff --git a/ex-3/chisquared.h b/ex-3/chisquared.h
new file mode 100644
index 0000000..dc0dafb
--- /dev/null
+++ b/ex-3/chisquared.h
@@ -0,0 +1,17 @@
+#include "common.h"
+#include <gsl/gsl_histogram2d.h>
+
+/* Structure containing the histogram
+ * for the χ² minimisation.
+ * n: number of events
+ * h: 2D histogram
+ */
+struct hist {
+  size_t n;
+  gsl_histogram2d *h;
+};
+
+/* Minimum χ² estimation of of the parameters
+ * α,β,γ.
+ */
+min_result minChiSquared(struct hist data);
diff --git a/ex-3/common.c b/ex-3/common.c
new file mode 100644
index 0000000..4211124
--- /dev/null
+++ b/ex-3/common.c
@@ -0,0 +1,126 @@
+#include "common.h"
+
+/* `vector_fprint(fp, x)` prints to file `fp`
+ * the components of a gsl_vector.
+ */
+void vector_fprint(FILE *fp, const gsl_vector *x) {
+  fprintf(fp, "[");
+  for (size_t i = 0; i < x->size; i++) {
+    fprintf(fp, i < x->size -1 ? "%10.4g, " : "%10.4g",
+            gsl_vector_get(x, i));
+  }
+  fprintf(fp, "]\n");
+}
+
+
+/* `matrix_fprint(fp, x)` prints to file `fp`
+ * the components of a gsl_vector.
+ */
+void matrix_fprint(FILE *fp, const gsl_matrix *x) {
+  gsl_vector row;
+  for (size_t i = 0; i < x->size1; i++) {
+    row = gsl_matrix_const_row(x, i).vector;
+    fputs("    ", fp);
+    vector_fprint(fp, &row);
+  }
+}
+
+
+/* `vector_map(f, v)` applies the function
+ * `f` to each element of the vector `v`, inplace.
+ * The function return a GSL exit code to either
+ * report success or a failure.
+ */
+int vector_map(double (*f)(double x), gsl_vector *v) {
+  double y;
+  for (size_t i = 0; i < v->size; i++ ) {
+    y = (*f)(gsl_vector_get(v, i));
+    gsl_vector_set(v, i, y);
+  }
+  return GSL_SUCCESS;
+}
+
+
+/* Angular distribution function F(θ,φ; α,β,γ)
+ * Takes three parameters α,β,γ and the angles θ,φ.
+ */
+double distr(const gsl_vector *par,
+             const struct event *event){
+  double a = gsl_vector_get(par, 0),
+         b = gsl_vector_get(par, 1),
+         c = gsl_vector_get(par, 2);
+
+  double th = event->th,
+         ph = event->ph;
+
+  double A = 3 /(4 * M_PI);
+  double B = 0.5 * (1 - a);
+  double C = 0.5 * (3 * a - 1) * pow(cos(th), 2);
+  double D = b * pow(sin(th), 2) * cos(2 * ph);
+  double E = sqrt(2) * c * sin(2 * th) * cos(ph);
+
+  return A * (B + C - D - E);
+}
+
+/* `grad_distr(par, event, grad)`
+ * computes the gradient of F(α,β,γ; α,β) as:
+ *
+ * ⎡ 3/2⋅cos²(θ) - 1/2 ⎤
+ * ⎢                   ⎥ 
+ * ⎢  sin²(θ)⋅cos(2φ)  ⎥ 3/(4π) 
+ * ⎢                   ⎥
+ * ⎣ √2⋅sin(2θ)⋅cos(φ) ⎦
+ *
+ * where `par` is [α,β,γ], `event` is the angle
+ * structure. The results is stored in the
+ * gsl_vector `grad`
+ */
+void grad_distr(const gsl_vector *par,
+                const struct event *event,
+                gsl_vector *grad) {
+  double th = event->th,
+         ph = event->ph;
+
+  gsl_vector_set(grad, 0, 1.5*pow(cos(th), 2) - 0.5);
+  gsl_vector_set(grad, 1, -pow(sin(th), 2) * cos(2*ph));
+  gsl_vector_set(grad, 2, -sqrt(2)*sin(2*th) * cos(ph));
+  gsl_vector_scale(grad, 3.0/(4.0 * M_PI));
+}
+
+
+/* Test the compatibility between the original
+ * parameters `def` and the estimates `est.par`
+ * with standard errors `est.err`.
+ */
+void compatibility(gsl_vector *def,
+            min_result est) {
+ 
+  char* names[] = { "α", "β", "γ" };
+  double a, b;
+  double t, sigma, alpha;
+
+  fprintf(stderr, "| par | σ        | α-value  |\n");
+  fprintf(stderr, "|-----|----------|----------|\n");
+  for (size_t i = 0; i < def->size; i++) {
+    // discrepancy
+    a = gsl_vector_get(def, i);
+    b = gsl_vector_get(est.par, i);
+    sigma = gsl_vector_get(est.err, i);
+
+    // normal t-statistic
+    t = fabs(a - b)/sigma;
+    // α-value / significance
+    alpha = 1 - erf(t/sqrt(2));
+
+    fprintf(stderr, "|  %s  | %-8.2g | %-8.2g |\n",
+            names[i], sigma, alpha);
+  }
+}
+
+
+/* Free a min_result structure.
+ */
+void min_result_free(min_result x) {
+  gsl_vector_free(x.par);
+  gsl_matrix_free(x.cov);
+}
diff --git a/ex-3/common.h b/ex-3/common.h
new file mode 100644
index 0000000..1ec5e57
--- /dev/null
+++ b/ex-3/common.h
@@ -0,0 +1,88 @@
+#pragma once
+
+#include <math.h>
+#include <gsl/gsl_vector.h>
+#include <gsl/gsl_matrix.h>
+
+/* Structure that holds the θ,φ angles 
+ * of a single event of the sample.
+ */
+struct event {
+  double th;
+  double ph;
+};
+
+/* Structure that contains the sample events
+ * and its size.
+ */
+struct sample {
+  const size_t size;
+  struct event *events;
+};
+
+/* This structure contains the result of a
+ * minimisation method: estimated parameters,
+ * standard error and their covariance.
+ */
+typedef struct {
+  gsl_vector *par;
+  gsl_vector *err;
+  gsl_matrix *cov;
+} min_result;
+
+
+/* `vector_fprint(fp, x)` prints to file `fp`
+ * the components of a gsl_vector.
+ */
+void vector_fprint(FILE *fp, const gsl_vector *x);
+
+
+/* `matrix_fprint(fp, x)` prints to file `fp`
+ * the components of a gsl_vector.
+ */
+void matrix_fprint(FILE *fp, const gsl_matrix *x);
+
+
+/* `vector_map(f, v)` applies the function
+ * `f` to each element of the vector `v`, inplace.
+ * The function return a GSL exit code to either
+ * report success or a failure.
+ */
+int vector_map(double (*f)(double x), gsl_vector *v);
+
+
+/* Angular distribution function F(θ,φ; α,β,γ)
+ * Takes three parameters α,β,γ and the angles θ,φ.
+ */
+double distr(const gsl_vector *par,
+             const struct event *event);
+
+
+/* `grad_distr(par, event, grad)`
+ * computes the gradient of F(α,β,γ; α,β) as:
+ *
+ * ⎡ 1/2 - 3/2⋅cos²(θ) ⎤
+ * ⎢                   ⎥ 
+ * ⎢  sin²(θ)⋅cos(2φ)  ⎥ 3/(4π) 
+ * ⎢                   ⎥
+ * ⎣ √2⋅sin(2θ)⋅cos(φ) ⎦
+ *
+ * where `par` is [α,β,γ], `event` is the angle
+ * structure. The results is stored in the
+ * gsl_vector `grad`
+ */
+void grad_distr(const gsl_vector *par,
+                const struct event *event,
+                gsl_vector *grad);
+
+
+/* Test the compatibility between the original
+ * parameters `def` and the estimates `est.par`
+ * with standard errors `est.err`.
+ */
+void compatibility(gsl_vector *def, min_result est);
+
+
+/* Free a min_result structure.
+ */
+void min_result_free(min_result x);
diff --git a/ex-3/likelihood.c b/ex-3/likelihood.c
new file mode 100644
index 0000000..53c137b
--- /dev/null
+++ b/ex-3/likelihood.c
@@ -0,0 +1,239 @@
+#include "likelihood.h"
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_multimin.h>
+#include <gsl/gsl_linalg.h>
+
+/* `f_logL(par, &sample)`
+ * gives the negative log-likelihood for the sample `sample`.
+ */
+double f_logL(const gsl_vector *par, void *sample_) {
+  struct sample sample = *((struct sample*) sample_);
+  double sum = 0;
+
+  for (size_t i = 0; i < sample.size; i++)
+    sum += log(fabs(distr(par, &sample.events[i])));
+
+  /* Rescale the function to avoid
+   * round-off errors during minimisation
+   */
+  sum *= 1e-5;
+
+  return -sum;
+}
+
+
+/* `fdf_logL(par, &sample, fun, grad)`
+ * simultaneously computes the gradient and the value of
+ * the negative log-likelihood for the sample `sample`
+ * and stores the results in `fun` and `grad` respectively.
+ */
+void fdf_logL(const gsl_vector *par, void *sample_,
+              double *fun, gsl_vector *grad) {
+  struct sample sample = *((struct sample*) sample_);
+
+  double prob;
+  struct event *event;
+  gsl_vector *term = gsl_vector_alloc(3);
+
+  // Note: fun/grad are *not* guaranteed to be 0
+  gsl_vector_set_zero(grad);
+  *fun = 0;
+
+  for (size_t i = 0; i < sample.size; i++) {
+    event = &sample.events[i];
+    prob = distr(par, event);
+
+    /* The gradient of log(F(α,β,γ)) is:
+     * 1/F(α,β,γ) ∇F(α,β,γ)
+     */
+    grad_distr(par, event, term);
+    gsl_vector_scale(term, -1.0/prob);
+    gsl_vector_add(grad, term);
+
+    // compute function
+    *fun += log(fabs(prob));
+  }
+
+  /* Rescale the function to avoid
+   * round-off errors during minimisation
+   */
+  *fun = -*fun*1e-5;
+  gsl_vector_scale(grad, 1e-5);
+
+  // free memory
+  gsl_vector_free(term);
+}
+
+
+/* `df_logL(par, &sample, grad)`
+ * gives the gradient of the negative log-likelihood
+ * for the sample `sample`. The result is stored in
+ * the `grad` gsl_vector.
+ */
+void df_logL(const gsl_vector *par, void *sample, gsl_vector *grad) {
+  double fun;
+  fdf_logL(par, sample, &fun, grad);
+}
+
+
+/* `hf_logL(par, &sample, hess)`
+ * gives the hessian matrix of the negative log-likelihood
+ * for the sample `sample`. The result is stored in
+ * the `hess` gsl_matrix.
+ */
+gsl_matrix* hf_logL(const gsl_vector *par, void *sample_) {
+  struct sample sample = *((struct sample*) sample_);
+
+  gsl_vector *grad = gsl_vector_calloc(3);
+  gsl_matrix *term = gsl_matrix_calloc(3, 3);
+  gsl_matrix *hess = gsl_matrix_calloc(3, 3);
+
+  double prob;
+  struct event *event;
+  for (size_t n = 0; n < sample.size; n++) {
+    /* Compute gradient and value of F */
+    event = &sample.events[n];
+    prob = distr(par, event);
+    grad_distr(par, event, grad);
+
+    /* Compute the hessian matrix of -log(F):
+     * H_ij = 1/F² ∇F_i ∇F_j
+     */
+    for (size_t i = 0; i < 3; i++)
+    for (size_t j = 0; j < 3; j++) {
+      gsl_matrix_set(
+        term, i, j,
+        gsl_vector_get(grad, i) * gsl_vector_get(grad, j));
+    }
+    gsl_matrix_scale(term, 1.0/pow(prob, 2));
+    gsl_matrix_add(hess, term);
+  }
+
+  // free memory
+  gsl_vector_free(grad);
+  gsl_matrix_free(term);
+
+  return hess;
+}
+
+
+/* Maximum likelihood estimation of the parameters
+ * α,β,γ.
+ */
+min_result maxLikelihood(struct sample *sample) {
+  /* Starting point: α,β,γ = "something close
+   * to the solution", because the minimisation
+   * seems pretty unstable.
+   */
+  gsl_vector *par = gsl_vector_alloc(3);
+  gsl_vector_set(par, 0, 0.79);
+  gsl_vector_set(par, 1, 0.02);
+  gsl_vector_set(par, 2, -0.17);
+
+  /* Initialise the minimisation */
+  gsl_multimin_function_fdf likelihood;
+  likelihood.n      = 3;
+  likelihood.f      = f_logL;
+  likelihood.df     = df_logL;
+  likelihood.fdf    = fdf_logL;
+  likelihood.params = sample;
+
+  /* The minimisation technique used
+   * is the conjugate gradient method. */
+  const gsl_multimin_fdfminimizer_type *type =
+    gsl_multimin_fdfminimizer_conjugate_fr;
+
+  gsl_multimin_fdfminimizer *m = 
+    gsl_multimin_fdfminimizer_alloc(type, 3);
+
+  gsl_multimin_fdfminimizer_set(
+    m,            // minimisation technique
+    &likelihood,  // function to minimise
+    par,          // initial parameters/starting point
+    1e-5,         // first step length
+    0.1);         // accuracy
+
+  size_t iter;
+  int status = GSL_CONTINUE;
+
+  /* Iterate the minimisation until the gradient
+   * is smaller that 10^-4 or fail.
+   */
+  fputs("\n# maximum likelihood\n\n", stderr);
+  for (iter = 0; status == GSL_CONTINUE && iter < 100; iter++) {
+    status = gsl_multimin_fdfminimizer_iterate(m);
+
+    if (status)
+      break;
+
+    status = gsl_multimin_test_gradient(m->gradient, 1e-5);
+
+    /* Print the iteration, current parameters
+     * and gradient of the log-likelihood.
+     */
+    if (iter % 5 == 0 || status == GSL_SUCCESS) {
+      fprintf(stderr, "%2ld\t", iter);
+      vector_fprint(stderr, m->x);
+      fputs("\t", stderr);
+      vector_fprint(stderr, m->gradient);
+      putc('\n', stderr);
+    }
+  }
+  fprintf(stderr, "status: %s\n", gsl_strerror(status));
+  fprintf(stderr, "iterations: %ld\n", iter);
+
+  /* Store results in the min_result type.
+   * Note: We allocate a new vector for the
+   * parameters because `m->x` will be freed
+   * along with m.
+   */
+  min_result res;
+  res.par = gsl_vector_alloc(3);
+  res.err = gsl_vector_alloc(3);
+  res.cov = gsl_matrix_alloc(3, 3);
+  gsl_vector_memcpy(res.par, m->x);
+
+  /* Compute the standard errors of the estimates.
+   * The Cramér–Rao bound states that the covariance
+   * matrix of the estimates is
+   * 
+   *   Σ ≥ I⁻¹ = (-H)⁻¹
+   *
+   * where:
+   *   - I is called the Fisher information,
+   *   - H is the hessian of log(L) at
+   *     the maximum likelihood.
+   */
+  res.cov = hf_logL(m->x, sample);
+
+  /* Invert H by Cholesky decomposition:
+   *
+   * H = LL^T ⇒ H⁻¹ = L^T⁻¹L⁻¹
+   *
+   * Note: H is positive defined (because -logL is
+   * minimised) and symmetric so this is valid.
+   *
+   */
+  gsl_linalg_cholesky_decomp(res.cov);
+  gsl_linalg_cholesky_invert(res.cov);
+
+  /* Compute the standard errors
+   * from the covariance matrix.
+   */
+  gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
+  gsl_vector_memcpy(res.err, &diagonal.vector);
+  vector_map(&sqrt, res.err);
+
+  fputs("\n## results\n", stderr);
+
+  fputs("\n* parameters:\n    ", stderr);
+  vector_fprint(stderr, res.par);
+
+  fputs("\n* covariance (Cramér–Rao lower bound):\n", stderr);
+  matrix_fprint(stderr, res.cov);
+
+  // free memory
+  gsl_multimin_fdfminimizer_free(m);
+
+  return res;
+}
diff --git a/ex-3/likelihood.h b/ex-3/likelihood.h
new file mode 100644
index 0000000..7330ace
--- /dev/null
+++ b/ex-3/likelihood.h
@@ -0,0 +1,6 @@
+#include "common.h"
+
+/* Maximum likelihood estimation of the parameters
+ * α,β,γ.
+ */
+min_result maxLikelihood(struct sample *sample);
diff --git a/ex-3/main.c b/ex-3/main.c
new file mode 100644
index 0000000..3d9c981
--- /dev/null
+++ b/ex-3/main.c
@@ -0,0 +1,158 @@
+#include <stdlib.h>
+#include <string.h>
+#include "common.h"
+#include "likelihood.h"
+#include "chisquared.h"
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_histogram2d.h>
+
+/* Options for the program */
+struct options {
+  int show_hist;      // whether to print the 2D histogram
+  int only_hist;      // whether to skip the minimisations
+  size_t num_events;  // number of generated events
+  size_t bins_theta;  // number of bins for θ
+  size_t bins_phi;    // number of bins for φ
+};
+
+
+int main(int argc, char **argv) {
+  /* Set default options */
+  struct options opts;
+  opts.num_events = 50000;
+  opts.show_hist  = 0;
+  opts.only_hist  = 0;
+  opts.bins_theta = 30;
+  opts.bins_phi   = 60;
+
+  /* Process CLI arguments */
+  for (size_t i = 1; i < argc; i++) {
+         if (!strcmp(argv[i], "-i")) opts.show_hist  = 1;
+         if (!strcmp(argv[i], "-I")) opts.only_hist  = 1;
+    else if (!strcmp(argv[i], "-n")) opts.num_events = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-t")) opts.bins_theta = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-p")) opts.bins_phi   = atol(argv[++i]);
+    else {
+      fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]);
+      fprintf(stderr, "\t-h\tShow this message.\n");
+      fprintf(stderr, "\t-i\tPrint the histogram to stdout.\n");
+      fprintf(stderr, "\t-I\tPrint *only* the histogram.\n");
+      fprintf(stderr, "\t-n N\tThe number of events to generate.\n");
+      fprintf(stderr, "\t-t N\tThe number of θ bins.\n");
+      fprintf(stderr, "\t-p N\tThe number of φ bins.\n");
+      return EXIT_FAILURE;
+    }
+  }
+
+  /* Initialize an RNG. */
+  gsl_rng_env_setup();
+  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
+
+  /* Initialise the sample data structure with
+   * the number of events and allocate an array
+   * for the events.
+   */
+  struct sample s = { opts.num_events, NULL };
+  s.events = calloc(s.size, sizeof(struct event));
+
+  /* Prepare a 2D histogram of the sample
+   * with 30x60 bins.
+   */
+  gsl_histogram2d* hist =
+    gsl_histogram2d_alloc(opts.bins_theta, opts.bins_phi);
+  // set the ranges θ∈[0, π] and φ∈[0, 2π]
+  gsl_histogram2d_set_ranges_uniform(hist, 0,M_PI, 0,2*M_PI);
+
+  /* F(θ.φ) parameters we will generate
+   * a sample with, and later try to recover.
+   * Note: for this choice max F(θ,φ) ≈ 0.179.
+   */
+  gsl_vector *def_par = gsl_vector_alloc(3);
+  gsl_vector_set(def_par, 0,  0.65);  // α
+  gsl_vector_set(def_par, 1,  0.06);  // β
+  gsl_vector_set(def_par, 2, -0.18);  // γ
+
+  /* Generate the points (θ, φ) on the unit sphere
+   * using the inverse transform:
+   *   θ = acos(1 - 2X)
+   *   φ = 2πY
+   * where X,Y are random uniform variables in [0,1).
+   * This ensures that θ,φ are uniformly distributed, then we
+   * proceed to reject the events as follow:
+   *   1. generate a y ∈ [0, max F(θ,φ)] uniformly
+   *   2. reject the sample if y > F(θ,φ) else save it.
+   */
+  fputs("# event sampling\n\n", stderr);
+  fprintf(stderr, "generating %ld events...", opts.num_events);
+  struct event *e;
+  for (size_t i = 0; i < s.size; i++){
+    e = &s.events[i];
+    do {
+      e->th = acos(1 - 2*gsl_rng_uniform(r));
+      e->ph = 2 * M_PI * gsl_rng_uniform(r);
+    } while(0.2 * gsl_rng_uniform(r) > distr(def_par, e));
+
+    // update the histogram
+    gsl_histogram2d_increment(hist, e->th, e->ph);
+  }
+  fputs("done\n", stderr);
+
+  // print histogram to stdout
+  if (opts.show_hist || opts.only_hist) {
+    fputs("\n# histogram\n", stderr);
+    gsl_histogram2d_fprintf(stdout, hist, "%g", "%g");
+  }
+
+  if (!opts.only_hist) {
+    /* Estimate the parameters α,β,γ by
+     * the maximum likelihood method.
+     */
+    min_result like = maxLikelihood(&s);
+
+    /* Estimate the parameters α,β,γ by 
+     * a minimum χ² method.
+     */
+    struct hist data;
+    data.h = hist;
+    data.n = s.size;
+    min_result chi2 = minChiSquared(data);
+
+    /* Compare the results with the original ones
+     * and check whether they are compatible.
+     */
+    fputs("\n# vector decay compatibility\n", stderr);
+
+    fputs("\n## likelihood results\n\n", stderr);
+    compatibility(def_par, like);
+
+    fputs("\n## χ² results\n\n", stderr);
+    compatibility(def_par, chi2);
+
+    /* Test the isotropic decay hypothesys
+     * with the χ² results
+     */
+    fputs("\n# isotropic hypotesys\n\n", stderr);
+
+    /* If the decay is isotropic then F = 1/4π.
+     * Solving for α,β,γ yields α=1/3,β=γ=0.
+     */
+    gsl_vector *iso_par = gsl_vector_alloc(3);
+    gsl_vector_set(iso_par, 0, 0.33);  // α
+    gsl_vector_set(iso_par, 1, 0.00);  // β
+    gsl_vector_set(iso_par, 2, 0.00);  // γ
+
+    compatibility(iso_par, chi2);
+
+    // free memory
+    min_result_free(like);
+    min_result_free(chi2);
+  };
+
+  // free memory
+  gsl_rng_free(r);
+  free(s.events);
+  gsl_histogram2d_free(hist);
+  gsl_vector_free(def_par);
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-3/plot.py b/ex-3/plot.py
new file mode 100755
index 0000000..9ae01c4
--- /dev/null
+++ b/ex-3/plot.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+from pylab import *
+from mpl_toolkits.mplot3d import Axes3D
+from matplotlib import cm
+import sys
+
+xedges, yedges, counts = loadtxt(sys.stdin, unpack=True, usecols=[0,2,4])
+bins = (30, 60)
+counts = counts.reshape(bins)
+
+plt.rcParams['font.size'] = 15
+suptitle('Angular decay distribution')
+# subplot2grid((1, 3), (0, 0), colspan=2, aspect='equal')
+xlabel(r'$\phi$ (radians)')
+ylabel(r'$\theta$ (radians)')
+pcolor(yedges[:bins[1]], xedges[::bins[1]], counts)
+norm = colorbar(fraction=0.023, pad=0.04).norm
+
+show()
+
+# Ax = subplot2grid((1, 3), (0, 2), projection='3d')
+# Θ, φ = mgrid[0:pi:bins[0]*1j, 0:2*pi:bins[1]*1j]
+# X = 5 * sin(θ) * cos(φ)
+# Y = 5 * sin(θ) * sin(φ)
+# Z = 5 * cos(θ)
+# Ax.plot_surface(
+#     x, y, z, rstride=1, cstride=1,
+#     facecolors=cm.viridis(norm(counts)))
+# Axis('off')
+# show()
diff --git a/ex-3/results.md b/ex-3/results.md
new file mode 100644
index 0000000..0c76533
--- /dev/null
+++ b/ex-3/results.md
@@ -0,0 +1,85 @@
+# event sampling
+
+generating 50000 events...done
+
+# maximum likelihood
+
+ 0	[      0.79,       0.02,      -0.17]
+	[      0.18,   -0.06812,   0.001955]
+
+ 5	[    0.7894,    0.02022,      -0.17]
+	[    0.1789,   -0.06748,   0.002162]
+
+10	[    0.7709,    0.02725,    -0.1702]
+	[    0.1465,   -0.04898,   0.007551]
+
+15	[    0.6525,    0.06317,    -0.1725]
+	[ -0.002576,  -0.002313,     0.0161]
+
+20	[     0.654,    0.06135,    -0.1763]
+	[-2.098e-06,  0.0001837,  0.0001225]
+
+24	[    0.6541,    0.06125,    -0.1763]
+	[ -3.24e-06, -7.905e-07, -1.432e-06]
+
+status: success
+iterations: 25
+
+## results
+
+* parameters:
+    [         0,    -0.5106,      1.089]
+
+* covariance (Cramér–Rao lower bound):
+    [ 6.604e-06, -3.474e-11, -1.798e-11]
+    [-3.474e-11,  1.227e-05, -1.267e-10]
+    [-1.798e-11, -1.267e-10,  9.564e-05]
+
+# least χ²
+
+ 0	[       0.5,       0.01,       -0.1]
+	cond(J)=inf, χ²/d=3.32
+
+ 4	[    0.6537,    0.05976,    -0.1736]
+	cond(J)=2.725, χ²/d=1.017
+
+status: success
+iterations: 7
+
+## results
+
+* χ²/d: 1.017
+
+* parameters:
+    [    0.6537,    0.05972,    -0.1736]
+
+* covariance:
+    [   0.00304,  -1.66e-06,  2.383e-07]
+    [ -1.66e-06,    0.00212,  1.505e-06]
+    [ 2.383e-07,  1.505e-06,   0.001637]
+
+# vector decay compatibility
+
+## likelihood results
+
+| par | σ        | α-value  |
+|-----|----------|----------|
+|  α  | 6.6e-06  | 0        |
+|  β  | 1.2e-05  | 0        |
+|  γ  | 9.6e-05  | 0        |
+
+## χ² results
+
+| par | σ        | α-value  |
+|-----|----------|----------|
+|  α  | 0.003    | 0.22     |
+|  β  | 0.0021   | 0.89     |
+|  γ  | 0.0016   | 0.0001   |
+
+# isotropic hypotesys
+
+| par | σ        | α-value  |
+|-----|----------|----------|
+|  α  | 0.003    | 0        |
+|  β  | 0.0021   | 0        |
+|  γ  | 0.0016   | 0        |
diff --git a/ex-4/dip.c b/ex-4/dip.c
new file mode 100644
index 0000000..778ea9e
--- /dev/null
+++ b/ex-4/dip.c
@@ -0,0 +1,117 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <gsl/gsl_rng.h>
+
+int main(int argc, char **argv)
+  {
+
+  // Set default options.
+  //
+  size_t N = 50000;          // number of events.
+  size_t n = 50;             // number of bins.
+  double p_max = 10;         // maximum value of momentum module.
+
+  // Process CLI arguments.
+  //
+  for (size_t i = 1; i < argc; i++)
+    {
+         if (!strcmp(argv[i], "-N")) N     = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-n")) n     = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-p")) p_max = atof(argv[++i]);
+    else
+      {
+      fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]);
+      fprintf(stderr, "\t-h\tShow this message.\n");
+      fprintf(stderr, "\t-N integer\tThe number of events to generate.\n");
+      fprintf(stderr, "\t-n integer\tThe number of bins of the histogram.\n");
+      fprintf(stderr, "\t-p float\tThe maximum value of momentum.\n");
+      return EXIT_FAILURE;
+      }
+    }
+
+  // Initialize an RNG.
+  //
+  gsl_rng_env_setup();
+  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
+
+  // Generate the angle θ uniformly distributed on a sphere using the
+  // inverse transform:
+  //
+  //   θ = acos(1 - 2X)
+  //
+  // where X is a random uniform variable in [0,1), and the module p of
+  // the vector:
+  //
+  //   p² = p_v² + p_h²
+  // 
+  // uniformly distributed between 0 and p_max. The two components are
+  // then computed as:
+  //
+  //   p_v = p⋅cos(θ)
+  //   p_h = p⋅sin(θ)
+  //
+  // The histogram will be updated this way.
+  // The j-th bin where p_h goes in is given by:
+  //
+  //   step = p_max / n
+  //   j = floor(p_h / step)
+  //
+  // Thus an histogram was created and a structure containing the number of
+  // entries in each bin and the sum of |p_v| in each of them is created and
+  // filled while generating the events.
+  //
+  struct bin
+    {
+    size_t amo;  // Amount of events in the bin.
+    double sum;  // Sum of |p_v|s of all the events in the bin.
+    };
+  struct bin *histo = calloc(n, sizeof(struct bin));
+
+  // Some useful variables.
+  //
+  double step = p_max / n;
+  struct bin *b;
+  double theta;
+  double p;
+  double p_v;
+  double p_h;
+  size_t j;
+
+  for (size_t i = 0; i < N; i++)
+    {
+    // Generate the event.
+    // 
+    theta = acos(1 - 2*gsl_rng_uniform(r));
+    p = p_max * gsl_rng_uniform(r);
+
+    // Compute the components.
+    //
+    p_v = p * cos(theta);
+    p_h = p * sin(theta);
+
+    // Update the histogram.
+    //
+    j = floor(p_h / step);
+    b = &histo[j];
+    b->amo++;
+    b->sum += fabs(p_v);
+    }
+
+  // Compute the mean value of each bin and print it to stodut
+  // together with other useful things to make the histogram.
+  //
+  printf("bins: \t%ld\n", n);
+  printf("step: \t%.5f\n", step);
+  for (size_t i = 0; i < n; i++)
+    {
+    histo[i].sum = histo[i].sum / histo[i].amo;
+    printf("\n%.5f", histo[i].sum);
+    };
+
+  // free memory
+  gsl_rng_free(r);
+  free(histo);
+
+  return EXIT_SUCCESS;
+  }
diff --git a/ex-4/plot.py b/ex-4/plot.py
new file mode 100755
index 0000000..9f9ecea
--- /dev/null
+++ b/ex-4/plot.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+import matplotlib.pyplot as plt
+import numpy as np
+import sys
+
+
+bins = input()
+step = input()
+bins = int(bins.split("\t")[1])
+step = float(step.split("\t")[1])
+counts = np.loadtxt(sys.stdin)
+edges = np.linspace(0, bins*step, bins+1)
+
+plt.rcParams['font.size'] = 17
+
+plt.hist(edges[:-1], edges, weights=counts,
+         color='#eacf00', edgecolor='#3d3d3c')
+plt.title('Vertical component distribution', loc='right')
+plt.xlabel(r'$p_h$')
+plt.ylabel(r'$\left<|p_v|\right>$')
+
+plt.show()
diff --git a/ex-5/casino.c b/ex-5/casino.c
new file mode 100644
index 0000000..961c494
--- /dev/null
+++ b/ex-5/casino.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <gsl/gsl_rng.h>
+
+int main(int argc, char** argv){
+  size_t size = atoi(argv[1]);             // size of the generated sample
+
+  // Initialize an RNG.
+  gsl_rng_env_setup();
+  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
+
+  // Random points (x, y) generation: x in [0,1), y in [0,3)
+  // get if   y < exp(x)
+  // leave if y > exp(x)
+  size_t in = 0;
+  size_t total = 0;
+  do{
+    double x = gsl_rng_uniform(r);
+    double y = 3 * gsl_rng_uniform(r);
+    if (y <= exp(x)){
+      in++;
+    }
+    total++;
+  } while (total < size);
+
+  
+  // The integral is esitmated thorugh the proportion:
+  // integral : 3 = in : total
+  double integral = 3.0*(double)in/(double)total;
+  printf("∫exp(x)dx in [0,1]:\t%.07f\n", integral);
+
+  // Free memory
+  gsl_rng_free(r);
+  return EXIT_SUCCESS;
+}
+
diff --git a/ex-5/manual.c b/ex-5/manual.c
new file mode 100644
index 0000000..16ba2f9
--- /dev/null
+++ b/ex-5/manual.c
@@ -0,0 +1,31 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* This program evaluates the integral of exp(x) in [0;1] through the
+ * trapezoidal rule.
+ * The integration step is given in input as d in step = 1e-d.
+ *
+ */
+
+int main(int argc, char** argv){
+  double left = 0;                         // left limit
+  double point = left;                     // points at the current position
+  double right = 1;                        // right limit
+
+  double step = pow(10, -atoi(argv[1]));   // step of integration
+
+  double sum = 0;
+
+  // Integral evaluation: stops when point goes over the right edge of the
+  // range.
+  do {
+    sum += step * 0.5 * (exp(point) + exp(point + step));
+    point += step;
+  } while (point < right);
+
+  // Print out the result
+  printf("∫exp(x)dx in [0,1]:\t%.7f\n", sum);
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-5/trifecta.c b/ex-5/trifecta.c
new file mode 100644
index 0000000..5cf3822
--- /dev/null
+++ b/ex-5/trifecta.c
@@ -0,0 +1,89 @@
+#include <gsl/gsl_monte_plain.h>
+#include <gsl/gsl_monte_miser.h>
+#include <gsl/gsl_monte_vegas.h>
+#include <gsl/gsl_monte.h>
+#include <math.h>
+
+
+// Exact value of the integral.
+double exact = 1.71828182845905;
+
+
+// Function which must be passed to the gsl_function.
+double function (double * x, size_t dim, void * params){
+  // Avoid warning when no parameters are passed to the function.
+  (void)(params);
+  return exp(x[0]); 
+}
+
+
+// Function which prints out the results when called.
+void results (char *title, double result, double error)
+{
+  printf ("---------------------------\n%s\n", title);
+  printf ("result:  % .10f\n", result);
+  printf ("sigma:   % .10f\n", error);
+  printf ("exact:   % .10f\n", exact);
+} 
+
+
+int main(){
+  // Some useful variables.
+  double integral, error;
+  size_t calls = 50000000;
+  size_t dims = 1;
+  double lower[1] = {0};    // Integration lower limit
+  double upper[1] = {1};    // Integration upper limit
+
+  // Initialize an RNG.
+  gsl_rng_env_setup();
+  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
+
+  // Define GSL function.
+  gsl_monte_function expo = {&function, dims, NULL};
+  expo.f = &function;
+  expo.dim = 1;
+  expo.params = NULL;
+
+  {
+    gsl_monte_vegas_state *s = gsl_monte_vegas_alloc (dims);
+    gsl_monte_vegas_integrate (&expo, lower, upper, dims, calls,
+                               r, s, &integral, &error);
+    do { gsl_monte_vegas_integrate (&expo, lower, upper, dims,
+                                    calls/5, r, s, &integral, &error);
+    } while (fabs (gsl_monte_vegas_chisq (s) - 1.0) > 0.5);
+
+    // Free memory.
+    gsl_monte_vegas_free(s);
+    
+    // Display results
+    results("Vegas", integral, error);
+  }
+
+  {
+    gsl_monte_miser_state *s = gsl_monte_miser_alloc (dims);
+    gsl_monte_miser_integrate (&expo, lower, upper, dims, calls,
+                               r, s, &integral, &error);
+    // Free memory.
+    gsl_monte_miser_free (s);
+
+    // Display results
+    results ("Miser", integral, error);
+  }
+
+  {
+    gsl_monte_plain_state *s = gsl_monte_plain_alloc (dims);
+    gsl_monte_plain_integrate (&expo, lower, upper, dims, calls,
+                               r, s, &integral, &error);
+    // Display results
+    results ("Montecarlo", integral, error);
+
+    // Free memory.
+    gsl_monte_plain_free (s);
+  }
+
+  // Free memory.
+  gsl_rng_free(r);
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-6/fft.c b/ex-6/fft.c
new file mode 100644
index 0000000..8a5de4b
--- /dev/null
+++ b/ex-6/fft.c
@@ -0,0 +1,196 @@
+#include "fft.h"
+#include <string.h>
+#include <gsl/gsl_fft_halfcomplex.h>
+
+/* These macros are used to extract the 
+ * real and imaginary parts of a complex
+ * vector */
+#define REAL(a,stride,i) ((a)[2*(stride)*(i)])
+#define IMAG(a,stride,i) ((a)[2*(stride)*(i)+1])
+
+
+/* Computes the (forward) real DFT of a vector
+ * and returns the results as a complex vector
+ */
+gsl_vector_complex* vector_fft(gsl_vector *data) {
+  /* Create a copy of the input data to
+   * preserve it: GSL functions work in-place.
+   */
+  gsl_vector *fdata = gsl_vector_alloc(data->size);
+  gsl_vector_memcpy(fdata, data);
+
+  /* Compute the forward Fourier transform */
+  gsl_fft_real_wavetable *rtable =
+    gsl_fft_real_wavetable_alloc(fdata->size);
+  gsl_fft_real_workspace *wspace =
+    gsl_fft_real_workspace_alloc(fdata->size);
+
+  gsl_fft_real_transform(
+    fdata->data,   // array of data to transform
+    fdata->stride, // stride of the array, i.e. # steps between elements
+    fdata->size,   // number of elements
+    rtable,        // wavetable (real)
+    wspace);       // workspace
+
+  /* The data in `data` is in the half-complex
+   * packed format, which reduces memory usage
+   * by using the fact that
+   * 
+   *   z_i = z̅_(n-i)
+   *
+   * We unpack it to the normal packing and use
+   * the array to define a gsl_vector_complex.
+   */
+  gsl_vector_complex *cdata = gsl_vector_complex_calloc(fdata->size);
+  gsl_fft_halfcomplex_unpack(
+      fdata->data,
+      cdata->data,
+      cdata->stride,
+      fdata->size);
+
+  // clear memory
+  gsl_fft_real_wavetable_free(rtable);
+  gsl_fft_real_workspace_free(wspace);
+  gsl_vector_free(fdata);
+
+  return cdata;
+}
+
+
+/* Inverse function of gsl_fft_halfcomplex_unpack.
+ * `gsl_fft_halfcomplex_pack(c, hc, stride, n)`
+ * creates a halfcomplex packed version of a
+ * complex packaged array `hc` with stride `stride`
+ * and `n` elements.
+ */
+int gsl_fft_halfcomplex_pack(
+  const gsl_complex_packed_array c,
+  double hc[],
+  const size_t stride, const size_t n) {
+
+  hc[0] = REAL(c, stride, 0);
+
+  size_t i;
+  for (i = 1; i < n - i; i++) {
+    hc[(2 * i - 1) * stride] = REAL(c, stride, i);
+    hc[2 * i * stride]       = IMAG(c, stride, i);
+  }
+
+  if (i == n - i)
+    hc[(n - 1) * stride] = REAL(c, stride, i);
+
+  return 0;
+}
+
+
+/* `fft_deconvolve(data, kernel)` tries to deconvolve
+ * `kernel` from `data` by factoring out the `kernel`
+ * in the Fourier space.
+ */
+gsl_histogram* fft_deconvolve(
+  gsl_histogram *data,
+  gsl_histogram *kernel) {
+  /* Size of the original data
+   * before being convoluted.
+   *
+   * Notation of sizes:
+   *  - original: m
+   *  - kernel: n
+   *  - "full" convolution: m + n - 1
+   */
+  size_t orig_size = data->n - kernel->n + 1;
+
+  /* Create vector views */
+  gsl_vector vdata =
+    gsl_vector_view_array(data->bin, data->n).vector;
+  gsl_vector vkernel =
+    gsl_vector_view_array(kernel->bin, kernel->n).vector;
+
+  /* 0-pad the kernel to make it the
+   * same size as the convoluted data,
+   * which is m+n-1:
+   *
+   * 1. create a new vector
+   * 2. create a subvector view to the center
+   * 3. copy the kernel to the view
+   */
+  gsl_vector *vpadded = gsl_vector_calloc(data->n);
+  gsl_vector center = gsl_vector_subvector(
+    vpadded,            // vector: padded kernel (n+m-1)
+    orig_size/2,        // offset: half of original data size (m/2)
+    kernel->n).vector;  // length: kernel size (n)
+  gsl_vector_memcpy(&center, &vkernel);
+
+  /* Compute the DFT of the data and
+   * divide it by the DFT of the kernel.
+   */
+  gsl_vector_complex *fdata   = vector_fft(&vdata);
+  gsl_vector_complex *fkernel = vector_fft(vpadded);
+  gsl_vector_complex_div(fdata, fkernel);
+
+  /* Pack the result in the half-complex
+   * format before computing the inverse DFT
+   */
+  double *res = calloc(fdata->size, sizeof(double));
+  gsl_fft_halfcomplex_pack(fdata->data, res,
+                           fdata->stride, fdata->size);
+
+  /* Compute the inverse DFT of the result */
+  gsl_fft_halfcomplex_wavetable *htable =
+    gsl_fft_halfcomplex_wavetable_alloc(data->n);
+  gsl_fft_real_workspace *wspace =
+    gsl_fft_real_workspace_alloc(data->n);
+  gsl_fft_halfcomplex_inverse(
+    res,           // array of data to transform
+    fdata->stride, // stride of the array
+    fdata->size,   // number of elements
+    htable,        // wavetable (complex)
+    wspace);       // workspace
+
+  /* Restore the natural order of frequency
+   * in the result of the DFT (negative→positive).
+   * To do that roll over the array by a half:
+   *   1. create a temp array of half size
+   *   2. copy first half over to temp
+   *   3. copy second half to the first
+   *   4. copy back the first to the second half
+   */
+  size_t half_size = data->n/2 * sizeof(double);
+  double *temp = malloc(half_size);
+  memcpy(temp, res, half_size);
+  memcpy(res, res + data->n/2, half_size);
+  memcpy(res + data->n/2, temp, half_size);
+
+  /* Create a histogram with the same edges
+   * as `data`, but with the original size,
+   * to return the cleaned result
+   */
+  gsl_histogram *hist = gsl_histogram_calloc(orig_size);
+
+  /* Set the same bin edges as `data`*/
+  double max = gsl_histogram_max(data);
+  double min = gsl_histogram_min(data);
+  gsl_histogram_set_ranges_uniform(hist, min, max);
+
+  /* Remove the extra size of the convolution
+   * (where the overlap is partial) when copying
+   * over the result to the histogram.
+   * This amounts to (n-1)/2 on each end of the
+   * vector.
+   */
+  memcpy(
+    hist->bin,
+    res + (kernel->n - 1)/2,
+    orig_size * sizeof(double));
+
+  // free memory
+  gsl_vector_free(vpadded);
+  gsl_vector_complex_free(fdata);
+  gsl_vector_complex_free(fkernel);
+  gsl_fft_halfcomplex_wavetable_free(htable);
+  gsl_fft_real_workspace_free(wspace);
+  free(res);
+  free(temp);
+
+  return hist;
+}
diff --git a/ex-6/fft.h b/ex-6/fft.h
new file mode 100644
index 0000000..818ca66
--- /dev/null
+++ b/ex-6/fft.h
@@ -0,0 +1,15 @@
+#include <gsl/gsl_fft_real.h>
+#include <gsl/gsl_histogram.h>
+#include <gsl/gsl_vector.h>
+
+/* Computes the (forward) real DFT of a vector
+ * and returns the results as a complex vector
+ */
+gsl_vector_complex* vector_fft(gsl_vector *data);
+
+
+/* `fft_deconvolve(data, noise)` tries to deconvolve
+ * `noise` from `data` by factoring out the `noise`
+ * in the Fourier space.
+ */
+gsl_histogram* fft_deconvolve(gsl_histogram *data, gsl_histogram *noise);
diff --git a/ex-6/main.c b/ex-6/main.c
new file mode 100644
index 0000000..6005f52
--- /dev/null
+++ b/ex-6/main.c
@@ -0,0 +1,232 @@
+#include <math.h>
+#include <string.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_sf.h>
+#include "fft.h"
+#include "rl.h"
+
+/* Program options */
+struct options {
+  int convolved;
+  int deconvolved;
+  int original;
+  size_t num_events;
+  size_t bins;
+  double sigma;
+  size_t rounds;
+  const char* mode;
+  double noise;
+};
+
+
+/* Intensity function parameters */
+struct param {
+  double a;  // aperture radius
+  double k;  // wavenumber
+  double e;  // wave amplitude
+  double l;  // screen distance
+};
+
+
+/* Intensity of the EM field at a large distance
+ * from a circular aperture diffraction of a plane
+ * wave.
+ *
+ * The intensity is given as a function of θ,
+ * the diffraction angle, all the other physical
+ * parameter are controlled by the structure `param`.
+ *
+ * See Fraunhöfer diffraction on "Hect - Optics" for 
+ * a derivation of the formula.
+ */
+double intensity(double theta, struct param p) {
+  double x = p.k * p.a * sin(theta);
+  double R = p.l / cos(theta);
+
+  /* The intensity function has an eliminable
+   * discontinuoty at the origin, which must
+   * be handled separately.
+   */
+  if (fabs(x) < 1e-15) {
+    return 0.5 * pow(p.e/R * M_PI * pow(p.a, 2), 2);
+  }
+
+  double y = 2*M_PI*pow(p.a, 2) * gsl_sf_bessel_J1(x) / x;
+  return 0.5 * pow(p.e * y/R, 2);
+}
+
+
+/* `gaussian_for(hist, sigma)` generates a histogram
+ * with the same bin width of `hist` but smaller,
+ * with a gaussian PDF with μ at the central bin
+ * and σ equal to `sigma`.
+ */
+gsl_histogram* gaussian_for(gsl_histogram *hist, double sigma) {
+  /* Set the size to 6% of the histogram.
+   * Always choose n even to correctly center the
+   * gaussian in the middle.
+   */
+  size_t n = ((double) hist->n * 6) / 100;
+  n = n % 2 ? n+1 : n;
+
+  /* Calculate the (single) bin width assuming
+   * the ranges are uniformely spaced
+   */
+  double min, max;
+  gsl_histogram_get_range(hist, 0, &min, &max);
+  double dx = max - min;
+  gsl_histogram *res = gsl_histogram_alloc(n);
+  gsl_histogram_set_ranges_uniform(res, 0, dx * n);
+
+  /* The histogram will be such that
+   * the zero falls in the central bin.
+   */
+  long int offset = res->n/2;
+
+  for (long int i = 0; i < (long int)res->n; i++)
+    res->bin[i] = gsl_ran_gaussian_pdf(
+        ((double)(i - offset) + 0.5) * dx, sigma * dx);
+
+  return res;
+}
+
+
+/* Convolves two histograms while keeping the
+ * original bin edges. This is a simple wrapper
+ * function around `gsl_vector_convolve`.
+ */
+gsl_histogram* histogram_convolve(gsl_histogram *a, gsl_histogram *b) {
+  gsl_histogram *res = gsl_histogram_calloc(a->n + b->n - 1);
+
+  /* Set the same edges as `a`*/
+  double max = gsl_histogram_max(a);
+  double min = gsl_histogram_min(a);
+  gsl_histogram_set_ranges_uniform(res, min, max);
+
+  /* Create vector views for everybody */
+  gsl_vector_view va  = gsl_vector_view_array(a->bin, a->n);
+  gsl_vector_view vb  = gsl_vector_view_array(b->bin, b->n);
+  gsl_vector_view vres = gsl_vector_view_array(res->bin, res->n);
+
+  gsl_vector_convolve(&va.vector, &vb.vector, &vres.vector);
+
+  return res;
+}
+
+
+int main(int argc, char **argv) {
+  struct options opts;
+  /* Set default options */
+  opts.convolved   = 0;
+  opts.deconvolved = 0;
+  opts.original    = 1;
+  opts.num_events  = 50000;
+  opts.bins        = 150;
+  opts.sigma       = 0.8;
+  opts.rounds      = 3;
+  opts.mode        = "fft";
+  opts.noise       = 0;
+    
+  /* Process CLI arguments */
+  for (int i = 1; i < argc; i++) {
+         if (!strcmp(argv[i], "-c")) opts.convolved   = 1;
+    else if (!strcmp(argv[i], "-d")) opts.deconvolved = 1;
+    else if (!strcmp(argv[i], "-o")) opts.original    = 1;
+    else if (!strcmp(argv[i], "-e")) opts.num_events  = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-b")) opts.bins        = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-s")) opts.sigma       = atof(argv[++i]);
+    else if (!strcmp(argv[i], "-r")) opts.rounds      = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-m")) opts.mode        = argv[++i];
+    else if (!strcmp(argv[i], "-n")) opts.noise       = atof(argv[++i]);
+    else {
+      fprintf(stderr, "Usage: %s -[cdoebsh]\n", argv[0]);
+      fprintf(stderr, "\t-h\t\tShow this message.\n");
+      fprintf(stderr, "\t-c\t\tPrint the convolved histogram to stdout.\n");
+      fprintf(stderr, "\t-d\t\tAttempt and print the deconvolved histogram.\n");
+      fprintf(stderr, "\t-o\t\tPrint the original histogram to stdout.\n");
+      fprintf(stderr, "\t-e N\t\tThe number of events.\n");
+      fprintf(stderr, "\t-b N\t\tThe number of θ bins.\n");
+      fprintf(stderr, "\t-s SIGMA\tThe sigma of gaussian kernel.\n");
+      fprintf(stderr, "\t-r N\t\tThe number of RL deconvolution rounds.\n");
+      fprintf(stderr, "\t-n MU\t\tThe mean (μ) of Poisson noise to add to the convolution.\n");
+      return EXIT_FAILURE;
+    }
+  }
+
+  /* Initialize an RNG. */
+  gsl_rng_env_setup();
+  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
+
+  gsl_histogram* hist = gsl_histogram_alloc(opts.bins);
+  gsl_histogram_set_ranges_uniform(hist, 0, M_PI/2);
+
+  double theta;
+  struct param p = { 0.01, 0.0001, 1e4, 1 };
+
+  /* Sample events following the intensity
+   * of a circular aperture diffraction I(θ)
+   * while producing a histogram.
+   */
+  fputs("# event sampling\n", stderr);
+  fprintf(stderr, "max itensity: %.2f\n", intensity(0, p));
+  fprintf(stderr, "1. generating %ld events...", opts.num_events);
+  for (size_t i = 0; i < opts.num_events; i++){
+    do {
+      theta = acos(1 - gsl_rng_uniform(r));
+    } while(intensity(0, p) * gsl_rng_uniform(r) > intensity(theta, p));
+    gsl_histogram_increment(hist, theta);
+  }
+  fputs("done\n", stderr);
+
+  /* Generate the gaussian kernel, convolve the
+   * sample histogram with it and try to deconvolve it.
+   */
+  fputs("\n# convolution\n", stderr);
+  fputs("1. generating gaussian kernel...", stderr);
+  gsl_histogram *kernel = gaussian_for(hist, opts.sigma);
+  fputs("done\n", stderr);
+
+  fputs("2. convolving...", stderr);
+  gsl_histogram *conv = histogram_convolve(hist, kernel);
+  fputs("done\n", stderr);
+
+  /* Add Poisson noise with μ=opts.noise to
+   * the convolution.
+   */
+  if (opts.noise > 0) {
+    fputs("2.1 adding poisson noise...", stderr);
+    for (size_t i = 0; i < conv->n; i++)
+      conv->bin[i] += gsl_ran_poisson(r, opts.noise);
+    fputs("done\n", stderr);
+  }
+
+  fprintf(stderr, "3. %s deconvolution...", opts.mode);
+  gsl_histogram *clean;
+  if (!strcmp(opts.mode, "fft"))
+    clean = fft_deconvolve(conv, kernel);
+  else if (!strcmp(opts.mode, "rl"))
+    clean = rl_deconvolve(conv, kernel, opts.rounds);
+  else {
+    fputs("\n\nerror: invalid mode. select either 'fft' or 'rl'\n", stderr);
+    return EXIT_FAILURE;
+  }
+  fputs("done\n", stderr);
+
+  /* Print the selected histogram*/
+  fputs("\n# histogram \n", stderr);
+  gsl_histogram *showing;
+       if (opts.convolved)   showing = conv;
+  else if (opts.deconvolved) showing = clean;
+  else if (opts.original)    showing = hist;
+  gsl_histogram_fprintf(stdout, showing, "%g", "%g");
+
+  // free memory
+  gsl_rng_free(r);
+  gsl_histogram_free(hist);
+  gsl_histogram_free(kernel);
+  gsl_histogram_free(conv);
+  gsl_histogram_free(clean);
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-6/plot.py b/ex-6/plot.py
new file mode 100755
index 0000000..eb57c84
--- /dev/null
+++ b/ex-6/plot.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+from pylab import *
+import sys
+
+a, b, f = loadtxt(sys.stdin, unpack=True)
+suptitle('Fraunhofer diffraction')
+title(sys.argv[1] if len(sys.argv) > 1 else "", loc='right')
+hist(a, np.insert(b, 0, a[0]), weights=f/sum(f),
+     color='#dbbf0d', edgecolor='#595856', linewidth=0.5)
+show()
diff --git a/ex-6/rl.c b/ex-6/rl.c
new file mode 100644
index 0000000..72cdb56
--- /dev/null
+++ b/ex-6/rl.c
@@ -0,0 +1,152 @@
+#include "rl.h"
+#include <gsl/gsl_blas.h>
+#include <math.h>
+
+
+/* `gsl_vector_convolv(a, b, res)` computes the linear
+ * convolution of two vectors. The resulting vector
+ * has size `a->n + b->n - 1` and is stored in `res`.
+ */
+int gsl_vector_convolve(gsl_vector *a, gsl_vector *b, gsl_vector *res) {
+  size_t m = a->size;
+  size_t n = b->size;
+
+  /* Vector views for the overlap dot product */
+  gsl_vector_view va, vb;
+
+  /* Reverse `b` before convolving */
+  gsl_vector_reverse(b);
+
+  double dot;
+  size_t start_a, start_b, many;
+  for (size_t i = 0; i < m + n - 1; i++) {
+    /* Calculate the starting indices
+     * of the two vectors a,b overlap.
+     */
+    if (i < n) start_a = 0;
+    else       start_a = i - (n - 1);
+
+    if (i < n) start_b = (n - 1) - i;
+    else       start_b = 0;
+
+    /* Calculate how many elements overlap */
+         if (i < n)  many = i + 1;
+    else if (i >= m) many = n - (i - m) - 1;
+    else             many = n;
+
+    /* Take the dot product of the overlap
+     * and store it in the resulting histogram
+     */
+    va = gsl_vector_subvector(a, start_a, many);
+    vb = gsl_vector_subvector(b, start_b, many);
+    gsl_blas_ddot(&va.vector, &vb.vector, &dot);
+    gsl_vector_set(res, i, dot);
+  }
+
+  /* Put b back together */
+  gsl_vector_reverse(b);
+
+  return GSL_SUCCESS;
+}
+
+
+/* Performs the Richardson-Lucy deconvolution.
+ * In pseudo-python:
+ *
+ *   def rl_deconvolve(data, kernel, rounds):
+ *     est = np.full(data, 0.5)
+ *     for _ in range(rounds):
+ *       est_conv = convolve(est, kernel)
+ *       est *= convolve(data / est_conv, reversed)
+ *
+ */
+gsl_histogram* rl_deconvolve(
+  gsl_histogram *data,
+  gsl_histogram *kernel,
+  size_t rounds) {
+  /* Size of the original data
+   * before being convoluted.
+   *
+   * Notation of sizes:
+   *  - original: m
+   *  - kernel: n
+   *  - "full" convolution: m + n - 1
+   */
+  size_t orig_size = data->n - kernel->n + 1;
+
+  /* Create a histogram with the same edges
+   * as `data`, but with the original size,
+   * to return the cleaned result
+   */
+  gsl_histogram *hist = gsl_histogram_calloc(orig_size);
+
+  /* Set the same bin edges as `data`*/
+  double max = gsl_histogram_max(data);
+  double min = gsl_histogram_min(data);
+  gsl_histogram_set_ranges_uniform(hist, min, max);
+
+  /* Vector views of the result, kernel
+   * and data. These are used to perform
+   * vectorised operations on histograms.
+   */
+  gsl_vector est =
+    gsl_vector_view_array(hist->bin, hist->n).vector;
+  gsl_vector vkernel =
+    gsl_vector_view_array(kernel->bin, kernel->n).vector;
+  gsl_vector vdata =
+    gsl_vector_view_array(data->bin, data->n).vector;
+  gsl_vector center;
+
+  /* Create a flipped copy of the kernel */
+  gsl_vector *vkernel_flip = gsl_vector_alloc(kernel->n);
+  gsl_vector_memcpy(vkernel_flip, &vkernel);
+  gsl_vector_reverse(vkernel_flip);
+
+  /* More vectors to store partial
+   * results
+   */
+  gsl_vector* est_conv = gsl_vector_alloc(data->n);
+  gsl_vector* rel_blur = gsl_vector_alloc(data->n);
+
+  /* The zero-order estimate is simply
+   * all elements at 0.5 */
+  gsl_vector_set_all(&est, 0.5);
+
+  for (size_t iter = 0; iter < rounds; iter++) {
+    /* The current estimated convolution is the
+     * current estimate of the data with
+     * the kernel */
+    gsl_vector_convolve(&est, &vkernel, est_conv);
+
+    /* Divide the data by the estimated
+     * convolution to calculate the "relative blur".
+     */
+    gsl_vector_memcpy(rel_blur, &vdata);
+    gsl_vector_div(rel_blur, est_conv);
+
+    /* Set NaNs to zero */
+    for (size_t i = 0; i < rel_blur->size; i++)
+      if (isnan(gsl_vector_get(rel_blur, i))) {
+        fprintf(stderr, "gotcha: %ld!!\n", i);
+        double y = (i > 0)? gsl_vector_get(rel_blur, i - 1) : 1;
+        gsl_vector_set(rel_blur, i, y); 
+      }
+
+    /* Convolve the blur by the kernel
+     * and multiply the current estimate
+     * of the data by it.
+     */
+    center = gsl_vector_subvector(rel_blur, (kernel->n-1)/2, orig_size).vector;
+    gsl_vector_convolve(&center, vkernel_flip, est_conv);
+    center = gsl_vector_subvector(est_conv, (kernel->n-1)/2, orig_size).vector;
+    gsl_vector_mul(&est, &center);
+  }
+
+  // free memory
+  gsl_vector_free(est_conv);
+  gsl_vector_free(rel_blur);
+  gsl_vector_free(vkernel_flip);
+
+  return hist;
+
+}
diff --git a/ex-6/rl.h b/ex-6/rl.h
new file mode 100644
index 0000000..1f63ae4
--- /dev/null
+++ b/ex-6/rl.h
@@ -0,0 +1,20 @@
+#include <gsl/gsl_histogram.h>
+#include <gsl/gsl_vector.h>
+
+/* `rl_deconvolve(data, noise, rounds)`
+ * tries to deconvolve `noise` from
+ * `data` in `rounds` rounds.
+ * */
+gsl_histogram* rl_deconvolve(
+  gsl_histogram *data,
+  gsl_histogram *noise,
+  size_t rounds);
+
+/* `convolve(a, b)` computes the linear convolution
+ * of two histograms. The convolted histogram
+ * has length `a->n + b->n - 1`.
+ */
+int gsl_vector_convolve(
+  gsl_vector *a,
+  gsl_vector *b,
+  gsl_vector *res);
diff --git a/ex-7/main.c b/ex-7/main.c
new file mode 100644
index 0000000..34a6232
--- /dev/null
+++ b/ex-7/main.c
@@ -0,0 +1,433 @@
+#include <stdio.h>
+#include <math.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_linalg.h>
+
+/* Parameters for bivariate
+ * gaussian PDF
+ */
+struct par {
+  double x0;       // x mean
+  double y0;       // y mean
+  double sigma_x;  // x standard dev
+  double sigma_y;  // y standard dev
+  double rho;      // correlation: cov(x,y)/σx⋅σy
+};
+
+
+/* A sample of N 2D points is an
+ * N×2 matrix, with vectors as rows.
+ */
+typedef struct {
+  struct par p;
+  gsl_matrix *data;
+} sample_t;
+
+
+/* Create a sample of `n` points */
+sample_t* sample_t_alloc(size_t n, struct par p) {
+  sample_t *x = malloc(sizeof(sample_t));
+  x->p    = p;
+  x->data = gsl_matrix_alloc(n, 2);
+  return x;
+}
+
+/* Delete a sample */
+void sample_t_free(sample_t *x) {
+  gsl_matrix_free(x->data);
+  free(x);
+}
+
+
+/* `generate_normal(r, n, p)` will create
+ * a sample of `n` points, distributed
+ * according to a bivariate gaussian distribution
+ * of parameters `p`.
+ */
+sample_t* generate_normal(
+  gsl_rng *r, size_t n, struct par *p) {
+  sample_t *s = sample_t_alloc(n, *p);
+
+  for (size_t i = 0; i < n; i++) {
+    /* Generate a vector (x,y) with
+     * a standard (μ = 0) bivariate
+     * gaussian PDF.
+     */
+    double *x = gsl_matrix_ptr(s->data, i, 0);
+    double *y = gsl_matrix_ptr(s->data, i, 1);
+    gsl_ran_bivariate_gaussian(
+      r,
+      p->sigma_x, p->sigma_y, p->rho,               
+      x, y);
+
+    /* Shift the vector to (x₀,y₀) */
+    *x += p->x0;
+    *y += p->y0;
+  }
+
+  return s;
+}
+
+
+/* Builds the covariance matrix Σ
+ * from the standard parameters (σ, ρ)
+ * of a bivariate gaussian.
+ */
+gsl_matrix* normal_cov(struct par *p) {
+  double var_x  = pow(p->sigma_x, 2);
+  double var_y  = pow(p->sigma_y, 2);
+  double cov_xy = p->rho * p->sigma_x * p->sigma_y;
+
+  gsl_matrix *cov = gsl_matrix_alloc(2, 2);
+  gsl_matrix_set(cov, 0, 0, var_x);
+  gsl_matrix_set(cov, 1, 1, var_y);
+  gsl_matrix_set(cov, 0, 1, cov_xy);
+  gsl_matrix_set(cov, 1, 0, cov_xy);
+
+  return cov;
+}
+
+
+/* Builds the mean vector of
+ * a bivariate gaussian.
+ */
+gsl_vector* normal_mean(struct par *p) {
+  gsl_vector *mu = gsl_vector_alloc(2);
+  gsl_vector_set(mu, 0, p->x0);
+  gsl_vector_set(mu, 1, p->y0);
+
+  return mu;
+}
+
+
+/* `fisher_proj(c1, c2)` computes the optimal
+ * projection map, which maximises the separation
+ * between the two classes.
+ * The projection vector w is given by
+ *
+ *   w = Sw⁻¹ (μ₂ - μ₁)
+ * 
+ * where Sw = Σ₁ + Σ₂ is the so-called within-class
+ * covariance matrix.
+ */
+gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) {
+  /* Construct the covariances of each class... */
+  gsl_matrix *cov1 = normal_cov(&c1->p);
+  gsl_matrix *cov2 = normal_cov(&c2->p);
+
+  /* and the mean values */
+  gsl_vector *mu1 = normal_mean(&c1->p);
+  gsl_vector *mu2 = normal_mean(&c2->p);
+
+  /* Compute the inverse of the within-class
+   * covariance Sw⁻¹.
+   * Note: by definition Σ is symmetrical and
+   * positive-definite, so Cholesky is appropriate.
+   */
+  gsl_matrix_add(cov1, cov2);
+  gsl_linalg_cholesky_decomp(cov1);
+  gsl_linalg_cholesky_invert(cov1);
+
+  /* Compute the difference of the means. */
+  gsl_vector *diff = gsl_vector_alloc(2);
+  gsl_vector_memcpy(diff, mu2);
+  gsl_vector_sub(diff, mu1);
+
+  /* Finally multiply diff by Sw.
+   * This uses the rather low-level CBLAS
+   * functions gsl_blas_dgemv:
+   *
+   *  ___ double          ___ 1 ___ nothing
+   * /                   /     /      
+   * dgemv computes y := α op(A)x + βy
+   *  \  \__matrix-vector           \____ 0
+   *   \__ A is symmetric
+   */
+  gsl_vector *w = gsl_vector_alloc(2);
+  gsl_blas_dgemv(
+    CblasNoTrans,  // do nothing on A
+    1,             // α = 1
+    cov1,          // matrix A
+    diff,          // vector x
+    0,             // β = 0
+    w);            // vector y
+
+  // free memory
+  gsl_matrix_free(cov1);
+  gsl_matrix_free(cov2);
+  gsl_vector_free(mu1);
+  gsl_vector_free(mu2);
+  gsl_vector_free(diff);
+
+  return w;
+}
+
+
+/* Computes the determinant from the
+ * Cholesky decomposition of matrix.
+ * In this case it's simply the product
+ * of the diagonal elements, squared.
+ */
+double gsl_linalg_cholesky_det(gsl_matrix *LL) {
+  gsl_vector diag = gsl_matrix_diagonal(LL).vector;
+  double det = 1;
+  for (size_t i = 0; i < LL->size1; i++)
+    det *= gsl_vector_get(&diag, i);
+  return det * det;
+}
+
+
+/* `fisher_cut(ratio, w, c1, c2)` computes
+ * the threshold (cut), on the line given by
+ * `w`, to discriminates the classes `c1`, `c2`;
+ * with `ratio` being the ratio of their prior
+ * probabilities.
+ *
+ * The cut is fixed by the condition of
+ * conditional probability being the
+ * same for each class:
+ *
+ *  P(c₁|x)    p(x|c₁)⋅p(c₁)
+ *  ------- = --------------- = 1;
+ *  P(c₂|x)    p(x|c₁)⋅p(c₂)
+ *
+ * together with x = t⋅w.
+ *
+ * If p(x|c) is a bivariate normal PDF the
+ * solution is found to be:
+ *
+ *   t = (b/a) + √((b/a)² - c/a);
+ *
+ * where
+ *
+ *  1. a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
+ *  2. b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
+ *  3. c = (μ₁, Σ₁⁻¹μ₁) - (μ₂, Σ₂⁻¹μ₂) + log|Σ₂|/log|Σ₁| - 2 log(α)
+ *  4. α = p(c₁)/p(c₂)
+ *
+ */
+double fisher_cut(
+  double ratio,
+  gsl_vector *w,
+  sample_t *c1, sample_t *c2) {
+
+  /* Construct the covariances of each class... */
+  gsl_matrix *cov1 = normal_cov(&c1->p);
+  gsl_matrix *cov2 = normal_cov(&c2->p);
+
+  /* and the mean values */
+  gsl_vector *mu1 = normal_mean(&c1->p);
+  gsl_vector *mu2 = normal_mean(&c2->p);
+
+  /* Temporary vector/matrix for
+   * intermediate results.
+   */
+  gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2);
+  gsl_vector *vtemp = gsl_vector_alloc(w->size);
+
+  /* Invert Σ₁ and Σ₂ in-place:
+   * we only need the inverse matrices
+   * in the steps to follow.
+   */
+  gsl_linalg_cholesky_decomp(cov1);
+  gsl_linalg_cholesky_decomp(cov2);
+  // store determinant for later
+  double det1 = gsl_linalg_cholesky_det(cov1);
+  double det2 = gsl_linalg_cholesky_det(cov2);
+  gsl_linalg_cholesky_invert(cov1);
+  gsl_linalg_cholesky_invert(cov2);
+
+  /* Compute the first term:
+   *
+   *  a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
+   *
+   */
+  // mtemp = cov1 - cov2
+  gsl_matrix_memcpy(mtemp, cov1);
+  gsl_matrix_sub(mtemp, cov2);
+
+  // vtemp = mtemp ⋅ vtemp
+  gsl_vector_memcpy(vtemp, w);
+  gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp);
+
+  // a = (w, vtemp)
+  double a; gsl_blas_ddot(w, vtemp, &a);
+
+  /* Compute the second term:
+   *
+   *  b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
+   *
+   */
+  // vtemp = cov1 ⋅ mu1
+  // vtemp = cov2 ⋅ mu2 - vtemp
+  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
+  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp);
+
+  // b = (w, vtemp)
+  double b; gsl_blas_ddot(w, vtemp, &b);
+
+  /* Compute the last term:
+   *
+   *  c = log|Σ₂|/|Σ₁| + (μ₂, Σ₂⁻¹μ₂) - (μ₁, Σ₁⁻¹μ₁)
+   *
+   */
+  double c, temp;
+  c = log(det1 / det2) - 2*log(ratio);
+
+  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1,  0, vtemp);
+  gsl_blas_ddot(mu1, vtemp, &temp);
+  c += temp;
+
+  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
+  gsl_blas_ddot(mu2, vtemp, &temp);
+  c -= temp;
+
+  /* To get the thresold value we have to
+   * multiply t by |w| if not normalised
+   */
+  double norm; gsl_blas_ddot(w, w, &norm);
+
+  // free memory
+  gsl_vector_free(mu1);
+  gsl_vector_free(mu2);
+  gsl_vector_free(vtemp);
+  gsl_matrix_free(cov1);
+  gsl_matrix_free(cov2);
+  gsl_matrix_free(mtemp);
+
+  return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm;
+}
+
+
+/* `fisher_cut2(ratio, w, c1, c2)` computes
+ * the threshold (cut), on the line given by
+ * `w`, to discriminates the classes `c1`, `c2`;
+ * with `ratio` being the ratio of their prior
+ * probabilities.
+ *
+ * The cut is fixed by the condition of
+ * conditional probability being the
+ * same for each class:
+ *
+ *  P(c₁|x)    p(x|c₁)⋅p(c₁)
+ *  ------- = --------------- = 1;
+ *  P(c₂|x)    p(x|c₁)⋅p(c₂)
+ *
+ * where p(x|c) is the probability for point x
+ * along the fisher projection line. If the classes
+ * are bivariate gaussian then p(x|c) is simply
+ * given by a normal distribution:
+ *
+ *   Φ(μ=(w,μ), σ=(w,Σw))
+ *
+ * The solution is then
+ *
+ *   t = (b/a) + √((b/a)² - c/a);
+ *
+ * where
+ *
+ *  1. a = S₁² - S₂²
+ *  2. b = M₂S₁² - M₁S₂²
+ *  3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α)
+ *  4. α = p(c₁)/p(c₂)
+ *
+ */
+double fisher_cut2(
+  double ratio,
+  gsl_vector *w,
+  sample_t *c1, sample_t *c2) {
+
+  gsl_vector *vtemp = gsl_vector_alloc(w->size);
+
+  /* Construct the covariances of each class... */
+  gsl_matrix *cov1 = normal_cov(&c1->p);
+  gsl_matrix *cov2 = normal_cov(&c2->p);
+
+  /* and the mean values */
+  gsl_vector *mu1 = normal_mean(&c1->p);
+  gsl_vector *mu2 = normal_mean(&c2->p);
+
+  /* Project the distribution onto the
+   * w line to get a 1D gaussian
+   */
+  // mean
+  double m1; gsl_blas_ddot(w, mu1, &m1);
+  double m2; gsl_blas_ddot(w, mu2, &m2);
+
+  // variances
+  gsl_blas_dgemv(CblasNoTrans, 1, cov1, w,  0, vtemp);
+  double var1; gsl_blas_ddot(w, vtemp, &var1);
+  gsl_blas_dgemv(CblasNoTrans, 1, cov2, w,  0, vtemp);
+  double var2; gsl_blas_ddot(w, vtemp, &var2);
+
+  double a = var1 - var2;
+  double b = m2*var1 + m1*var2;
+  double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio);
+
+  // free memory
+  gsl_vector_free(mu1);
+  gsl_vector_free(mu2);
+  gsl_vector_free(vtemp);
+  gsl_matrix_free(cov1);
+  gsl_matrix_free(cov2);
+
+  return (b/a) + sqrt(pow(b/a, 2) - c/a);
+}
+
+
+int main(int argc, char **args) {
+  // initialize RNG
+  gsl_rng_env_setup();
+  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
+
+  /* Generate two classes of normally
+   * distributed 2D points with different
+   * paramters: signal and noise.
+   */
+  struct par par_sig   = { 0, 0, 0.3, 0.3, 0.5 };
+  struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 };
+
+  // sample sizes
+  size_t nsig   =  800;
+  size_t nnoise = 1000;
+
+  sample_t *signal = generate_normal(r, nsig, &par_sig);
+  sample_t *noise  = generate_normal(r, nnoise, &par_noise);
+
+  /* Fisher linear discriminant
+   *
+   * First calculate the direction w onto
+   * which project the data points. Then the
+   * cut which determines the class for each
+   * projected point.
+   */
+  gsl_vector *w = fisher_proj(signal, noise);
+  double t_cut  = fisher_cut2(nsig / (double)nnoise,
+                              w, signal, noise);
+
+  fputs("# Linear Fisher discriminant\n\n", stderr);
+  fprintf(stderr, "* w: [%.3f, %.3f]\n",
+      gsl_vector_get(w, 0),
+      gsl_vector_get(w, 1));
+  fprintf(stderr, "* t_cut: %.3f\n", t_cut);
+
+  gsl_vector_fprintf(stdout, w, "%g");
+  printf("%f\n", t_cut);
+
+  /* Print data to stdout for plotting.
+   * Note: we print the sizes  to be able
+   * to set apart the two matrices.
+   */
+  printf("%ld %ld %d\n", nsig, nnoise, 2);
+  gsl_matrix_fprintf(stdout, signal->data, "%g");
+  gsl_matrix_fprintf(stdout, noise->data,  "%g");
+
+  // free memory
+  gsl_rng_free(r);
+  sample_t_free(signal);
+  sample_t_free(noise);
+
+  return EXIT_SUCCESS;
+}
diff --git a/ex-7/plot.py b/ex-7/plot.py
new file mode 100755
index 0000000..9d4515f
--- /dev/null
+++ b/ex-7/plot.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+from pylab import *
+import sys
+
+w = loadtxt(sys.stdin, max_rows=2)
+cut = float(input())
+
+n, m, d = map(int, input().split())
+data  = loadtxt(sys.stdin).reshape(n + m, d)
+signal, noise = data[:n].T, data[n:].T
+
+figure()
+scatter(*signal, c='xkcd:grey blue', label='signal')
+scatter(*noise,  c='xkcd:baby blue', label='noise')
+scatter(*(cut * w/np.dot(w,w)), c='red')
+arrow(*(-abs(w)), *4*abs(w))
+legend()
+
+figure()
+sig_proj   = np.dot(w, signal)
+noise_proj = np.dot(w, noise)
+hist(sig_proj,   color='xkcd:grey blue')
+hist(noise_proj, color='xkcd:baby blue')
+axvline(cut, c='r')
+show()
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..3a93937
--- /dev/null
+++ b/makefile
@@ -0,0 +1,33 @@
+GMP_FLAGS = -L$(GMP_PATH) -lgmp
+GSL_FLAGS = $(shell pkg-config --libs gsl) 
+CFLAGS := -Wall --debug $(GMP_FLAGS) $(GSL_FLAGS)
+
+CCOMPILE = \
+	mkdir -p $(@D); \
+	$(CC) $(CFLAGS) $^ -o $@
+
+ex-1/bin/%: ex-1/%.c
+	$(CCOMPILE)
+
+ex-2/bin/%: ex-2/%.c
+	$(CCOMPILE)
+
+ex-3/bin/main: ex-3/common.c ex-3/likelihood.c ex-3/chisquared.c
+	$(CCOMPILE)
+
+ex-4/bin/main: ex-4/main.c
+	$(CCOMPILE)
+
+ex-5/bin/%: ex-5/%.c
+	$(CCOMPILE)
+
+ex-6/bin/main: ex-6/rl.c ex-6/fft.c
+	$(CCOMPILE)
+
+ex-7/main: ex-7/main.c
+	$(CCOMPILE)
+
+misc/pdfs: misc/pdfs.c
+
+clean:
+	rm -rf ex-*/bin
diff --git a/misc/lessons/images/binomiale.png b/misc/lessons/images/binomiale.png
new file mode 100644
index 0000000..c975953
Binary files /dev/null and b/misc/lessons/images/binomiale.png differ
diff --git a/misc/lessons/images/chi2.png b/misc/lessons/images/chi2.png
new file mode 100644
index 0000000..1d7190d
Binary files /dev/null and b/misc/lessons/images/chi2.png differ
diff --git a/misc/lessons/images/correlazione.png b/misc/lessons/images/correlazione.png
new file mode 100644
index 0000000..d21310b
Binary files /dev/null and b/misc/lessons/images/correlazione.png differ
diff --git a/misc/lessons/images/exponential.png b/misc/lessons/images/exponential.png
new file mode 100644
index 0000000..656b546
Binary files /dev/null and b/misc/lessons/images/exponential.png differ
diff --git a/misc/lessons/images/fischer.png b/misc/lessons/images/fischer.png
new file mode 100644
index 0000000..717b2f0
Binary files /dev/null and b/misc/lessons/images/fischer.png differ
diff --git a/misc/lessons/images/gaussian.png b/misc/lessons/images/gaussian.png
new file mode 100644
index 0000000..d543ba8
Binary files /dev/null and b/misc/lessons/images/gaussian.png differ
diff --git a/misc/lessons/images/kurtosis.png b/misc/lessons/images/kurtosis.png
new file mode 100644
index 0000000..018070f
Binary files /dev/null and b/misc/lessons/images/kurtosis.png differ
diff --git a/misc/lessons/images/landau.png b/misc/lessons/images/landau.png
new file mode 100644
index 0000000..ea19913
Binary files /dev/null and b/misc/lessons/images/landau.png differ
diff --git a/misc/lessons/images/logistic.png b/misc/lessons/images/logistic.png
new file mode 100644
index 0000000..17e6d81
Binary files /dev/null and b/misc/lessons/images/logistic.png differ
diff --git a/misc/lessons/images/poisson.png b/misc/lessons/images/poisson.png
new file mode 100644
index 0000000..a7e84e7
Binary files /dev/null and b/misc/lessons/images/poisson.png differ
diff --git a/misc/lessons/images/skewness.png b/misc/lessons/images/skewness.png
new file mode 100644
index 0000000..43efb6d
Binary files /dev/null and b/misc/lessons/images/skewness.png differ
diff --git a/misc/lessons/images/student.png b/misc/lessons/images/student.png
new file mode 100644
index 0000000..72a76af
Binary files /dev/null and b/misc/lessons/images/student.png differ
diff --git a/misc/lessons/images/uniform.png b/misc/lessons/images/uniform.png
new file mode 100644
index 0000000..f573dac
Binary files /dev/null and b/misc/lessons/images/uniform.png differ
diff --git a/misc/lessons/images/wigner.png b/misc/lessons/images/wigner.png
new file mode 100644
index 0000000..eecef49
Binary files /dev/null and b/misc/lessons/images/wigner.png differ
diff --git a/misc/lessons/makefile b/misc/lessons/makefile
new file mode 100644
index 0000000..86a1b30
--- /dev/null
+++ b/misc/lessons/makefile
@@ -0,0 +1,17 @@
+sections = $(shell find sections/ -name '*.md' | sort -V)
+
+pandoc = \
+	@ pandoc $(1)         \
+	--toc                 \
+	--standalone          \
+	--pdf-engine=xelatex  \
+	-F pandoc-crossref    \
+	-o $(2).pdf
+
+all: lessons.pdf
+
+clean:
+	rm lessons.pdf
+
+lessons.pdf: $(sections) images
+	$(call pandoc, $(sections), lessons)
diff --git a/misc/lessons/sections/0.md b/misc/lessons/sections/0.md
new file mode 100644
index 0000000..b5aa1d4
--- /dev/null
+++ b/misc/lessons/sections/0.md
@@ -0,0 +1,23 @@
+---
+title: Analisi statistica
+subtitle: Riassunto delle slides
+numbersections: true
+documentclass: article
+fontsize: 12pt
+
+geometry:
+  - width=150mm
+  - top=20mm
+  - bottom=30mm
+
+header-includes: |
+  ```{=latex}
+  % start new page with each section
+  \usepackage{etoolbox}
+  \usepackage{tikz}
+  \usepackage{amsmath}
+  \usepackage{amssymb}
+  \usepackage{siunitx}
+  \pretocmd{\section}{\clearpage}{}{}
+  ```
+---
diff --git a/misc/lessons/sections/modulo-1.md b/misc/lessons/sections/modulo-1.md
new file mode 100644
index 0000000..92e409b
--- /dev/null
+++ b/misc/lessons/sections/modulo-1.md
@@ -0,0 +1,140 @@
+# Calcolo numerico
+
+## Compilare un programma
+
+Per eseguire un programma, un computer fa questa cosa: legge le informazioni
+dall'input e le mette nella memoria; poi da qui legge le istruzioni
+sequenzialmente e se c'è qualche calcolo da fare, lo fa fare alla ALU
+(aritmetic logic unit) e memorizza il risultato nella memoria; alla fine passa
+tutto in output.  
+Per essere eseguibile, un programma deve essere scritto in codice macchina
+e per questo si usano i linguaggi di programmazione che, attraverso il
+compilatore, vengono tradotti in codice macchina:
+
+- codice sorgente,
+- compilatore,
+- codice oggetto,
+- linker (aggiunge le librerie),
+- codice eseguibile.
+
+## Errori di implementazione
+
+Quando si passa dal modello astratto a quello implementato, si distinguono
+tre tipi di errori:
+
+- errori analitici: causati dalla necessità di una aritmetica discreta,
+  cioè il fatto che non si possa rappresentare una cosa continua e quindi
+  la si fa a punti;
+- errori inerenti o algoritmici (o di round-off): causati dal numero finito di
+  cifre significative. Dipendono dall'algoritmo utilizzato, perché ci sono
+  metodi migliori di altri per evitare questo tipo di problemi.
+
+## Rappresentazione di un numero
+
+L'obiettivo è quello di tenere sotto controllo gli errori di round-off.
+Per rappresentare un numero, esistono diverse rappresentazioni, tra cui quella
+di Von Neumann: si dedicano $n$ cifre significative alla mantissa e poi un tot
+anche alla caratteristica, cioè all'ordine di grandezza (che generalmente è in
+base 2).  
+Con $n$ cifre significative in base 2, il numero più alto rappresentabile è
+$2^n -1$ perché con due cifre (0 e 1), ci sono $2^n$ possibili numeri
+rappresentabili e, se posti in ordine crescente, ognuno dista dal precedente
+un'unità, con il minimo che vale 0 (tutti 0), per cui il numero più alto che si
+può rappresentare è $2^n -1$:  
+
+- 0 è l'1,
+- 1 e il 2,
+- ...$2^n -1$ è il $2^n$-esimo.
+
+questo numero corrisponde a un numero in base dieci di $m$ cifre significative,
+per cui:
+
+$$
+  m = \log(2^n -1)
+$$
+
+quindi per un numero con 24 bit per la mantissa, si hanno circa 7 cifre
+significative in base 10.  
+In precisione semplice (floating point):
+
+- un bit per il segno,
+- 23 bit per la mantissa (quindi 7 cifre significative),
+- 8 bits per la caratteristica.
+
+In precisione doppia (duble) si raddoppiano i bite alla mantissa e le cifre
+significative diventano 17, però il tempo per le moltiplicazioni è triplicato
+e si occupa più memoria, quindi non è che convenga moltissimo.
+
+Esiste anche la rappresentazione fixed-point, in cui c'è un bit per il segno
+e i restanti per il numero in sé, in cui da qualche parte c'è la virgola.
+Nel caso del fixed pointd, l'errore relativo può variare moltissimo a seconda
+del numero rappresentato, mentre per i float è lo stesso per ogni numero: la
+densità relativa non è costante.  
+Quando un insieme di rappresentazione non è chiuso rispetto ad un'operazione,
+il risultato presenta sicuramente errore di round-off e deve essere
+approssimato.
+
+
+## Errori algoritmici
+
+Per numeri troppo grandi o troppo piccoli che non si riesce a
+rappresentare, si parla rispettivamente di overflow o underflow. Per questo
+motivo, somme e soprattutto differenze di numeri grandi portano grandi errori
+dovuti all'arrotondamento: bisogna cercare di evitarlo se possibile. Per
+esempio, per calcolare un polinomio, conviene usare il metodi di
+Ruffini-Horner perché riduce il numero di operazioni da effettuare:
+
+- metodo semplice:
+
+  $$
+    p(x) = a_0 + a_1x + a_2x^2 + ... +a_nx^n
+  $$
+
+  che sono n addizioni e n(n+1)/2 moltiplicazioni, quindi $\sim n^2$.
+- metodo di R-H:
+
+  $$
+  p (x) = (((((a_n)x +a_{n-1})x +a_{n-2}x +...
+  $$
+
+  che sono n addizioni e n moltiplicazioni, quindi $\sim n$.
+
+Il problema si verifica anche se si sottraggono due numeri molto simili tra
+loro, perché il risultato è un numero molto piccolo che potrebbe finire in
+underflow (si parla di "cancellazione catastrofica").
+
+\textcolor{orange}{oss:} Il costo computazionale della risoluzione di un
+sistema lineare in $n$ equazioni è asintoticamente uguale al costo del
+prodotto di due matrici $nxn$. Esistono algoritmi che non richiedono più di
+$k \cdot n^{\alpha}$ operazioni, col il più piccolo valore noto di $\alpha$
+pari a 0.2375.
+
+Alcuni algoritmi sono definiti "instabili": accade quando gli errori di
+round-off si accumulano fino a portare a risultati completamente errati.  
+Esistono problemi detti "mal condizionati" che con qualsiasi algoritmo danno
+errori talmente elevati da rendere il risultato privo di significato. In
+questi casi, piccole variazioni dei dati iniziali portano a grandi variazioni
+nei risultati. Si chiama "numero di condizionamento del problema" il seguente
+rapporto:
+
+$$
+  \frac{\Delta r}{\Delta \alpha} =
+  \frac{\text{\% errore risultato}}{\text{\% errore dato iniziale}}
+$$
+
+che può quindi essere espresso in questo modo:
+
+$$
+  \Delta \alpha = \frac{x + h - x}{x} = \frac{h}{x}
+  \hspace{100pt}
+  \Delta r = \frac{f(x + h)-f(x)}{f(x)}
+$$
+$$
+  \frac{\Delta r}{\Delta \alpha} =
+  \frac{f(x + h)-f(x)}{h} \cdot \frac{x}{f(x)} =
+  f'(x) \cdot \frac{x}{f(x)}
+$$
+
+Se questo numero è $<< 1$, allora il problema è poco sensibile ai dati
+iniziali.
+
diff --git a/misc/lessons/sections/modulo-2-1.md b/misc/lessons/sections/modulo-2-1.md
new file mode 100644
index 0000000..1b02c79
--- /dev/null
+++ b/misc/lessons/sections/modulo-2-1.md
@@ -0,0 +1,257 @@
+# Probabilità
+
+## Concetto di probabilità
+
+Consideriamo un insieme S di eventi detto "spazio campionario" e due eventi
+casuali A e B. Essi sono soggetti agli assiomi di Kolmogorov:
+
+$\forall A \subseteq S,  0 \leqslant P(A) \leqslant 1$  
+$P(S) = 1$  
+$A \cap B = \varnothing \Longrightarrow P(A \cup B) = P(A) + P(B)$
+
+Si definisce probabilità condizionata di A dato B:
+
+$$
+  P(A|B) = \frac{P(A \cap B)}{P(B)}
+$$
+
+Se due eventi sono indipendenti, allora vale che:
+
+$$
+  P(A \cap B) = P(A) \cdot P(B) \Longrightarrow
+  P(A|B) = \frac{P(A) \cdot P(B)}{P(B)} = P(A)
+$$
+
+Esistono diversi approcci per definire la probabilità:
+
+- approccio frequentista:
+
+  $$
+    P(A) = \lim_{n \rightarrow +\infty} \frac{\#A}{n}
+  $$
+
+  ma c'è il problema che è impossibile fare un numero infinito di tentativi.
+- approccio bayesiano:  
+  utilizzare il teorema di Bayes, per esempio:
+
+  $$
+    P(\text{teoria|dati}) \propto P(\text{dati|teoria}) \cdot P(\text{teoria})
+  $$
+  si tratta quindi di un approccio "soggettivo", almeno per l'ultimo termine
+  dell'equazione qua sopra, perché la probabilità della validità della teoria
+  può essere data sulla base di ragionamenti e osservazioni riguardanti il
+  fenomeno di cui si sta parlando.
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+### Esempio di probabilità condizionata
+
+Si osservano dei decadimenti e se ne misurano indipendentemente $N_a$ e $N_b$;
+in comune ne sono stati misurati $N_{ab}$. Possiamo stimare il numero totale
+di eventi $N$ e l'efficienza totale $\epsilon$.
+
+$$
+  P(a) = \frac{N_a}{N} \hspace{100pt} P(b) = \frac{N_b}{N}
+$$
+$$
+  P(ab) = \frac{N_{ab}}{N} = P(a) \cdot P(b) = \frac{N_a \cdot N_b}{N^2}
+  \Longrightarrow N = \frac{N_a \cdot N_b}{N_{ab}}
+$$
+$$
+  \epsilon = P(a \cup b) = P(a) + P(b) -P(a \cap b) =
+  \frac{N_a + N_b - N_{ab}}{N}
+$$
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+## Teorema di Bayes
+
+$$
+  P(A|B) = \frac{P(A \cap B)}{P(B)}
+  \hspace{100pt}
+  P(B|A) = \frac{P(B \cap A)}{P(A)}
+$$
+$$
+  P(A|B) = \frac{P(B|A) \cdot P(A)}{P(B)}
+$$
+
+Dal teorema di Bayes è possibile dedurre il teorema della probabilità totale:
+consideriamo un insieme $S$ diviso in sottoinsiemi disgiunti $A_i$ la cui
+unione dà l'insieme di partenza:
+
+$$
+  \cup_i A_i = S
+$$
+
+e consideriamo un insieme $B$ anch'esso interno ad $S$:
+
+$$
+  B = B \cap S = B \cap (\cup_i A_i) = \cup_i (B \cap A_i)
+$$
+$$
+  \Longrightarrow P(B) = \sum_i P(B \cap A_i)
+$$
+$$
+  \Longrightarrow P(B) = \sum_i P(B|A_i)P(A_i)
+$$
+
+da cui, per uno specifico insieme $A_j$, attraverso il teorema di Bayes:
+
+$$
+  P(A_j|B) = \frac{P(B|A_j) \cdot P(A_j)}{\sum_i P(B|A_i)P(A_i)}
+$$
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+### Esempio di probabilità totale: segnale e fondo
+
+Un rilevatore misura segnale e fondo con relative efficienze $P(R|S)$ e
+$P(R|F)$. Se sono note a priori la probabilità di segnale e di fondo
+$P(S)$ e $P(F)$, allora si può risalire alla probabilità, data una
+misurazione, di aver misurato il segnale:
+
+$$
+  P(S|R) = \frac{P(R|S) \cdot P(S)}{P(R|S) \cdot P(S) + P(R|F) \cdot P(F)}
+$$
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+### Esempio di probabilità totale: AIDS 
+
+$$
+  P(\text{AIDS}) = 0.001 \hspace{50pt} P(\text{no AIDS}) = 0.999
+$$
+$$
+  P(\text{+|AIDS}) = 0.98 \hspace{50pt} P(\text{-|AIDS}) = 0.02
+$$
+$$
+  P(\text{+|no AIDS}) = 0.03 \hspace{50pt} P(\text{-|no AIDS}) = 0.97
+$$
+
+Quindi se il test è positivo, la probabilità di avere davvero preso l'AIDS è:
+
+$$
+  P(\text{AIDS|+}) = \frac{P(\text{+|AIDS}) \cdot P(\text{AIDS})}
+  {P(\text{+|AIDS}) \cdot P(\text{AIDS}) + P(\text{+|no AIDS}) \cdot
+  P(\text{no AIDS})} = 0.032
+$$
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+### Approccio bayesiano con una teoria
+
+Cominciamo com un esempio: con un esperimento è stata misurata la massa di
+un elettrone e sono stati trovati i valori {$m_i$}, di cui il valore medio
+è $m_e = \SI{520 \pm 10}{KeV}$. Si assume quindi che il valore vero sia
+compreso tra \SI{510}{KeV} e \SI{530}{KeV} con una probabilità del 68% data
+dal confidence level. Per cui, la probabilità che la massa vera sia proprio
+\SI{520}{KeV} è data da:
+
+$$
+  P(m_e|{m_i}) = \frac{}{}
+$$
+
+\textcolor{red}{**rivedere negli appunti questa formula**}
+
+Quando si fa una misurazione, si misurano $N$ valori e si calcolano il valore
+medio $\bar{x}$ e la deviazione standard $\sigma$ e si dice che il risultato è:
+
+$$
+  \mu = \bar{x} \pm \frac{\sigma}{\sqrt{N}}
+$$
+
+e di solito lo si interpreta dicendo che:
+
+$$
+  P \left( \bar{x} - \frac{\sigma}{\sqrt{N}} \leqslant \mu \leqslant
+  \bar{x} + \frac{\sigma}{\sqrt{N}} \right) = 68\%
+$$
+
+ma in realtà quello che sappiamo è solo che:
+
+$$
+  P \left( \mu - \frac{\sigma_{true}}{\sqrt{N}} \leqslant \bar{x} \leqslant
+  \mu + \frac{\sigma_{true}}{\sqrt{N}} \right) = 68\%
+$$
+
+**\textcolor{red}{WHAT?!?}**
diff --git a/misc/lessons/sections/modulo-2-2.md b/misc/lessons/sections/modulo-2-2.md
new file mode 100644
index 0000000..06bacc9
--- /dev/null
+++ b/misc/lessons/sections/modulo-2-2.md
@@ -0,0 +1,480 @@
+# Statistica
+
+## Distribuzioni di probabilità
+
+Una funzione di densità di probabilità $f$ è definita in modo che la probabilità
+che una variabile $x$ sia compresa tra $x$ e $x + dx$ sia data da:
+
+$$
+  P(x \subset [x, x + dx]) = f(x)dx
+$$
+
+dunque vale che:
+
+$$
+  \int\limits_{- \infty}^{+ \infty} dx f(x) = 1
+$$
+
+Si definisce funzione cumulante:
+
+$$
+  F(x) = \int\limits_{- \infty}^x dx' f(x')
+$$
+
+e quantile di ordine $\alpha$ il valore di $x$ per cui $F(x) = \alpha$.  
+Nel caso multidimensionale in cui si abbiano due o più variabili, si parla di
+joint pdf:
+
+$$
+  f(x, y) \hspace{30pt} \Longrightarrow \hspace{30pt} 
+  \int\limits_{- \infty}^{+ \infty} \int\limits_{- \infty}^{+ \infty}
+  dx dy f(x, y) = 1
+$$
+
+e si definiscono due distribuzioni marginali:
+
+$$
+  f_x (x) = \int\limits_{- \infty}^{+ \infty} dy f(x, y)
+  \hspace{50pt}
+  f_y (y) = \int\limits_{- \infty}^{+ \infty} dx f(y, x)
+$$
+
+dunque due variabili $x$ e $y$ sono indipendenti se $f(x) = f_x(x) \cdot
+f_y(y)$. Ora, se $A$ è l'evento di probabilità $f_x(x)dx$, mentre $B$ ha
+probabilità $f_y(y)dy$, allora si possono definire le pdf condizionali come
+segue:
+
+$$
+  P(B|A) = \frac{P (A \cap B)}{P(A)} = \frac{f(x, y)dxdy}{f_x(x)dx}
+  \hspace{20pt} \Longrightarrow \hspace{20pt} h(y|x) = \frac{f(x, y)}{f_x(x)}
+$$
+
+per cui il teorema di Bayes diventa:
+
+$$
+  g(x|y) = \frac{h(y|x)f_x(x)}{f_y(y)}
+$$
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+### Paradosso di Borel-Kolmogorov
+
+Si considerino dei punti distribuiti uniformemente sulla superficie del pianeta
+Terra: ci si aspetterebbe che i punti siano uniformemente distribuiti anche
+lungo un parallelo o un meridiano... ma consideriamo un meridiano: esso giace
+per il 25% a nord del 45'esimo parallelo e quindi, secondo la logica di prima,
+anche il 25% dei punti che si trovano su di esso. Però non è vero che il 45%
+della superficie terrestre è al di sopra del 45'esimo parallelo!  
+Il paradosso è risolto perché non ci si può basare su un insieme di misura
+nulla quale il meridiano (perché è unidimensionale). Lo si vede chiaramente
+adottando la terminologia poc'anzi introdotta:  
+Se la distribuzione è uniforme, la probabilità di trovare un punto in una
+certa superficie è dato dal rapporto tra l'angolo solido descritto da tale
+superficie e l'angolo solido totale:
+
+$$
+  f(\theta, \phi) d\theta d\phi= \frac{d\phi d\theta \cos(\theta)}{4 \pi}
+$$
+
+da cui è possibile determinare la due probabilità marginali:
+
+$$
+  f_{\phi}(\phi) = \int\limits_{0}^{\pi} f(\theta, \phi) d\theta =
+  \int\limits_{0}^{\pi} \frac{\cos(\theta)}{4 \pi} = \frac{\cos(\theta)}{2}
+$$
+$$
+  f_{\theta}(\theta) = \int\limits_{0}^{2 \pi} f(\theta, \phi) d\phi =
+  \frac{1}{2 \pi}
+$$
+
+per cui si tratta di due costanti rispetto alle rispettive variabili. Da ciò
+si può dunque dedurre che, mentre la densità lungo un parallelo è
+effettivamente costante, lo stesso non si può dire riguardo a un meridiano.
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+Una funzione di una variabile casuale è essa stessa una variabile casuale.
+Consideriamo la pdf $f(x)$ e una funzione $a(x)$ di cui si vuole trovare la pdf
+$g(a)$. Nel caso in cui l'inversa di $a(x)$ sia univoca, definita $dS$ la
+regione delle $x$ per cui $a \subset [a, a +da]$:
+
+$$
+  g(a)da = \int\limits_{dS} dxf(x)
+  = \left| \int\limits_{x(a)}^{x(a +da)} f(x')dx' \right|
+  = \int\limits_{x(a) +  \left| \frac{dx}{da} \right| da}^{x(a +da)} f(x')dx'
+$$
+
+Ovvero:
+
+$$
+  g(a) = f(x(a)) \left| \frac{dx}{da} \right|
+$$
+
+e se $x(a)$ non è univoca, allora bisogna considerare tutti gli intervalli $dS$
+di $dx$ che corrispondono a $da$.  
+Nel caso di funzioni di $N$ variabili, siccome vale che:
+
+$$
+  g(a')da' = \int \dots \int\limits_{dS} f(x_1 \dots x_N) dx_1 \dots dx_N
+$$
+
+con $dS$ regione dello spazio delle $x$ compreso tra le isosuperfici:
+
+$$
+  a(\vec{x}) = a' \hspace{10pt} \wedge \hspace{10pt} a(\vec{x}) = a' + da'
+$$
+
+Nel caso in cui $z = x \cdot y$, si trova la convoluzione di Mellin:
+
+$$
+  g(z)dz = \int\limits_{dS} dxdy f(x, y)
+         = \int\limits_{-\infty}^{+\infty} dx
+            \int\limits_{\frac{z}{x}}^{\frac{z + dz}{x}} dy f(x, y)
+$$
+
+**\textcolor{red}{Non ho capito questa parte...}**
+
+## Propagazione degli errori
+
+Consideriamo una variabile $x$ con pdf $f(x)$. Si definisce valore di
+aspettazione o media (e lo si indica spesso con $\mu$):
+
+$$
+  E[x] = \int dx f(x) x
+$$
+
+Nel caso di una variabile $y(x)$ con pdf $g(x)$, invece:
+
+$$
+  E[y] = \int dy \cdot y \cdot g(y) = \int dx f(x) g(x)
+$$
+
+Mentre si definisce varianza (e la si indica spesso con $\sigma^2$, mentre
+con deviazione standard si intende $\sigma$):
+
+$$
+  V[x] = E[x - E[x]^2] = E[x^2] - \mu^2
+$$
+
+Più in generale si definiscono 'momenti algebrici' $E[x^n] =\mu'_n$ con
+$\mu'_1 = \mu$ e 'momenti centrali' $E[(x -\mu)^n] = \mu_n$ con $\mu_2 =
+\sigma^2$.  
+Si definiscono inoltre due grandezze di correlazione. La covarianza:
+
+$$
+  \text{cov} [x, y] = E[xy] - E[x]E[y] = E[xy] - \mu_x \mu_y
+$$
+
+che equivale a:
+
+\begin{align*}
+  \text{cov} [x, y]
+  &= E[(x -\mu_x)(y -\mu_y)]                       \\
+  &= E[xy -x\mu_y -y\mu_x + \mu_x\mu_Y]            \\
+  &= E[xy] -\mu_y E[x] -\mu_x E[y] + \mu_x \mu_y   \\
+  &= E[xy] -\mu_y mu_x - \mu_x \mu_y + \mu_x \mu_y \\
+  &= E[xy] - \mu_x \mu_y
+\end{align*}
+
+Notare che se $x$ e $y$ sono indipendenti, allora $f(x, y) = f_x(x)f_y(y)$,
+perciò:
+
+$$
+  E[xy] = \int dx \int dy xy f(x, y) = \mu_x \mu_y
+  \hspace{20pt} \Longrightarrow \hspace{20pt} \text{cov} [x, y] = 0
+$$
+
+e il coefficiente di correlazione:
+
+$$
+  \rho_{xy} = \frac{\text{cov} [xy]}{\sigma_x \sigma_y}
+$$
+
+![Esempio di correlazione tra due
+grandezze.](images/correlazione.png){width=70%}
+
+Anche se la $f(\vec{x})$ non è completamente nota, è comunque possibile stimare
+il valore medio e la varianza di una grandezza $y(\vec{x})$ conoscendo solo le
+stime di media e varianza della pdf. Espandiamo attraverso la serie di
+Taylor:
+
+$$
+  y(\vec{x}) = y(\vec{\mu}) + \sum_{i= 1}^N \left[
+  \frac{\partial y}{\partial x_i} \right]_{\vec{x}
+  = \vec{\mu}} (x_i - \mu_i)
+$$
+$$
+  \Longrightarrow \hspace{20pt} E[y] = y(\vec{\mu})
+  \Longleftarrow \hspace{20pt} E[x_i] = \mu_i
+$$
+
+Mentre per la varianza servono $E[y^2]$ ed $E[y]$. Sempre passando
+attraverso uno sviluppo di Taylor attorno al valore medio:
+
+\begin{align*}
+  E[y^2] &= y^2(\vec{\mu}) + 2y(\vec{\mu}) \sum_{i = 1}^{N}
+  \left[ \frac{\partial y} {\partial x_i} \right]_{\vec{x} = \vec{\mu}}
+  E[x_i - \mu_i] \\
+  &+ E \left[ \left( \sum_{i_1}^N
+  \left[ \frac{\partial y}{\partial x_i} \right]_{\vec{x} = \vec{\mu}}
+  (x_i - \mu_i) \right) \left( \sum_{j = 1}^N \left[ \frac{\partial y}
+  {\partial x_i} \right]_{\vec{x} = \vec{\mu}} (x_j - \mu_j) \right) \right]
+\end{align*}
+
+Siccome il secondo termine si annulla sempre perché $E[x_i] = \mu_i$, allora
+rimane che:
+
+$$
+  V[y] = E[y^2] - E[y]^2 =  \sigma_y^2 = \sum_{i,j = 1}^N \left[
+  \frac{\partial y}{\partial x_i}
+  \frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij}
+$$
+
+Con $V_{ij}$ che è la matrice di covarianza, che ha come entrate:
+
+$$
+  V_{ij} = E[(x_i - \mu_i)(x_j - \mu_j)] = \rho_{ij} \sigma_i \sigma_j
+$$
+
+e quindi, nel caso in cui le variabili siano scorrelate, si ottiene che:
+
+$$
+  V_{ij} = \sigma_i^2 \delta_{ij}
+  \hspace{20pt} \Longrightarrow \hspace{20pt}
+  \sigma_y^2 = \sum_{i = 1}^N \left[ \frac{\partial y}{\partial x_i}
+  \right]_{\vec{x} = \vec{\mu}}^2 \sigma_i^2
+$$
+
+Cioè dice quanto cambia la $y$ al variare del 'dato iniziale' $\vec{x}$.
+Ma quindi, per quanto visto prima:
+
+$$
+  \text{cov} [x_i, x_j] = E[(x_i - \mu_i)(x_j - \mu_j)] = V_{ij}
+$$
+
+Più in generale, date $\vec{y}$ variabili dipendenti da $\vec{x}$, vale che:
+
+$$
+  U = AVA^T \hspace{30pt} \text{con} \hspace{30pt} A_{ij} = \left[
+  \frac{\partial y_i}{\partial x_j} \right]_{\vec{x} = \vec{\mu}}
+  \hspace{30pt} \text{e con} \hspace{30pt} U_{kl} = \text{cov}[y_k, y_l]
+$$
+
+dove $U$ è detta matrice di covarianza delle $y$.  
+Attenzione: quanto detto fin'ora, che descrive in che modo gli errori di
+$\vec{x}$ influenzano $y$, vale solo nel caso in cui $y$ sia lineare nelle $x$.
+Quindi, in casi come $y(x) = 1/x$, non si può fare questo discorso.
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+### Esempio
+
+Consideriamo:
+
+$$
+  y = x_1 - x_2
+$$
+$$
+ \text{con} \hspace{30pt} \mu_1 = \mu_2 = 10 \hspace{30pt} \wedge
+ \hspace{30pt} \sigma_1 = \sigma_2 = 1
+$$
+
+allora abbiamo che $y = y(x_1, x_2)$, quindi:
+
+$$
+  E[y] = y(\mu_1, \mu_2) = 10 - 10 = 0
+$$
+$$
+  V[y] = \sum_{i, j = 1}^2 \left[ \frac{\partial y}{\partial x_i}
+  \frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij} =
+  1 \cdot V_{11} + 1 \cdot V_{22} -1 \cdot 2 \cdot V_{12}
+$$
+
+Se le correlazioni sono nulle, allora $V_{12} = 0 \Longrightarrow V[y] = 2
+\Longrightarrow \sigma_y = 1.4$, se invece $x_1$ e $x_2$ sono correlate, nel
+caso in cui il coefficiente di correlazione sia unitario si ha che $V[y] =
+0$. Quindi la correlazione può cambiare di molto le cose.
+
+\begin{tikzpicture}
+  \draw [thick, pink] (0,0) -- (1,0);
+  \draw [red] (1.5,0) circle [radius=0.1];
+  \draw [thick, pink] (2,0) -- (3,0);
+  \draw [red] (3.5,0) circle [radius=0.1];
+  \draw [thick, pink] (4,0) -- (5,0);
+  \draw [red] (5.5,0) circle [radius=0.1];
+  \draw [thick, pink] (6,0) -- (7,0);
+  \draw [red] (7.5,0) circle [radius=0.1];
+  \draw [thick, pink] (8,0) -- (9,0);
+  \draw [red] (9.5,0) circle [radius=0.1];
+  \draw [thick, pink] (10,0) -- (11,0);
+  \draw [red] (11.5,0) circle [radius=0.1];
+  \draw [thick, pink] (12,0) -- (13,0);
+  \draw [red] (13.5,0) circle [radius=0.1];
+  \draw [thick, pink] (14,0) -- (15,0);
+\end{tikzpicture}
+
+### Errori sistematici
+
+Consideriamo due grandezze $x_1$ e $x_2$ con un errore sistematico in comune
+$S$:
+
+\begin{align*}
+  &x_1 = x_{1_0} + x_{1_s} \\
+  &x_2 = x_{2_0} + x_{2_s}
+\end{align*}
+
+si avrà che i termini con pedice $0$ sono indipendenti tra loro, mentre gli
+altri due saranno correlati. Dato che gli errori si sommano in quadratura, la
+matrice di covarianza sarà quindi:
+
+$$
+\text{cov}[x_1, x_2] = S^2 \hspace{30pt} \Longrightarrow \hspace{30pt}
+V = \begin{pmatrix}
+\sigma_1^2 + S^2 & S^2\\
+S^2 & \sigma_2^2 + S^2
+\end{pmatrix}
+$$
+
+perché:
+
+\begin{align*}
+  \text{cov}[x_1, x_2] &= E[x_1 x_2] - E[x_1]E[x_2] = \\
+  &= E[(x_{1_0} + x_{1_s})(x_{2_0} + x_{2_s})] - E[x_{1_0} + x_{1_s}]
+  E[x_{2_0} + x_{2_s}] =  \\
+  &= E[x_{1_0}x_{2_0}] + E[x_{1_0}x_{2_s}] + E[x_{1_s}x_{2_0}]
+  + E[x_{1_s}x_{2_s}] +   \\
+  &\hspace{13pt} - E[x_{1_0}]E[x_{2_0}] - E[x_{1_0}]E[x_{2_2}]
+  - E[x_{1_s}]E[x_{2_0}] - E[x_{1_s}]E[x_{2_s}] = \\
+  &= \mu_1 \mu_2 + \mu_1E[x_{2_s}] + E[x_{1_s}]\mu_2 +E[x_{1_s}x_{2_s}] + \\
+  &\hspace{13pt} - \mu_1 \mu_2 - \mu_1 E[x_{2_s}] - E[x_{1_s}] \mu_2
+  - E[x_{1_s}] E[x_{2_s}] = \\
+  &= E[x_{1_s} x_{2_s}] - E[x_{1_s}] E[x_{2_s}] = \text{cov}[x_{1_s}, x_{2_s}]
+\end{align*}
+
+### Trasformazione ortogonale
+
+Può tornare utile fare un cambio di variabile che permetta di ottenere una
+matrice di covarianza delle $y$ diagonale.  
+Consideriamo le solite variabili $Y_i$ legate linearmente alle $x_j$:
+
+$$
+  y_i = \sum_j = A^i_j x_j
+  \hspace{30pt} \Longrightarrow \hspace{30pt}
+  U_{ij} = \sum_{k,l} A_{ik} V_{kl} A^T_{lj}
+$$
+
+Si tratta quindi di diagonalizzare la matrice $U$: la soluzione è semplice,
+la matrice $A$ è quella formata dagli autovalori di $V$. Questo concetto è
+utile nel caso della scelta delle coordinate da utilizzare.  
+Se immaginiamo di star utilizzando le coordinate polari $\vec{x} = (x, y)$, la
+matrice di covarianza sarà:
+
+$$
+V = \begin{pmatrix}
+\sigma_1^2 & \rho \sigma_1 \sigma_2 \\
+\rho \sigma_1 \sigma_2 & \sigma_2^2
+\end{pmatrix}
+$$
+
+Diagonaliziamola: prima di tutto troviamo gli autovalori della matrice $V -
+\lambda I$:
+
+$$
+  \begin{vmatrix}
+  \sigma_1^2  - \lambda & \rho \sigma_1 \sigma_2 \\
+  \rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda
+  \end{vmatrix}
+  = (\sigma_1^2 - \lambda) (\sigma_2^2 - \lambda) - \rho^2 \sigma_1^2
+  \sigma_2^2 = 0
+$$
+\begin{align*}
+  &\Longrightarrow \hspace{30pt}
+  \lambda^2 -(\sigma_1^2 + \sigma_2^2) \lambda + \sigma_1^2 \sigma_2^2
+  (1 - \rho^2) = 0 \\
+  &\Longrightarrow \hspace{30pt}
+  \lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm
+  \sqrt{\sigma_1^4 + \sigma_2^4 +2 \sigma_1^2 \sigma_2^2
+  - 4 \sigma_1^2 \sigma_2^2 + 4\rho^2 \sigma_1^2 \sigma_2^2}}{2} = \\
+  &\Longrightarrow \hspace{30pt}
+  \lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm
+  \sqrt{(\sigma_1^2 - \sigma_2^2)^2 +4 \rho^2 \sigma_1^2 \sigma_2^2}}{2}
+\end{align*}
+
+e ora calcoliamo gli autovettori:
+
+$$
+  (V - \lambda I)\vec{r} = 0
+  \hspace{30pt} \Longrightarrow \hspace{30pt}
+  \begin{pmatrix}
+  \sigma_1^2  - \lambda & \rho \sigma_1 \sigma_2 \\
+  \rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda
+  \end{pmatrix}
+  \begin{pmatrix}
+  r_1 \\
+  r_2
+  \end{pmatrix}
+  = 
+  \begin{pmatrix}
+  r_1 \\
+  r_2
+  \end{pmatrix}
+$$
+$$
+  \Longrightarrow \hspace{30pt}
+  \begin{cases}
+  (\sigma_1^2 - \lambda)r_1 + \rho \sigma_1 \sigma_2 r_2 = r_1 \\
+  \rho \sigma_1 \sigma_2 r_1 + (\sigma_2^2 - \lambda)r_2 = r_2
+  \end{cases}
+  \hspace{30pt} \Longrightarrow \hspace{30pt}
+  r_1 = \frac{}{} r_2
+$$
+
+eccetera eccetera...
diff --git a/misc/lessons/sections/modulo-2-3.md b/misc/lessons/sections/modulo-2-3.md
new file mode 100644
index 0000000..43272d2
--- /dev/null
+++ b/misc/lessons/sections/modulo-2-3.md
@@ -0,0 +1,466 @@
+ Distribuzioni di probabilità
+
+## Distribuzione binomiale
+
+![Distribuzione binomiale: p = 0.3, n = 10,
+N = 1000.](images/binomiale.png)
+
+Si considerino $N$ tentativi di un esperimento che può avere come esiti
+soltanto successo o fallimento e che la probabilità di ogni successo sia $p$.
+Definiamo $n$ il numero dei successi. Dunque la probabilità di ottenere $n$
+successi su $N$ tentativi totali è data da:
+
+$$
+  P(N, n, p) = \binom{N}{n} p^n (1 - p)^{N -n}
+$$
+
+perché:
+
+- la probabilità che un successo si verifichi è $p$;
+- la probabilità che $k$ successi si verifichino è data dal prodotto di tutte
+  le probabilità: $p^n$;
+- lo stesso discorso vale per gli insuccessi: ognuno ha probabilità $(1_p)$ e
+  se ne verificano $N -n$;
+- il termine binomiale rappresenta tutte le possibili permutazioni: il
+  concetto è semplice se si immagina di posizionare successi e fallimenti
+  all'interno di una griglia con $N$ possibili posizioni: un successo è un
+  pallino bianco e un fallimento è un pallino nero. In quanti posti posso
+  mettere il primo successo? $N$. E il secondo? $N -1$. E il terzo? E così
+  via, finché ho messo tutti i successi, che occupano $n$ posizioni, l'ultima
+  delle quali è stata scelta tra $N - (n -1)$ posizioni, per cui:
+
+  $$
+    N \cdot (N -1) \dots (N -n + 1) =
+    \frac{N \cdot (N -1) \dots (N -n + 1) \cdot 2 \cdot 1}
+    {(N -n) \cdot (N - n -1) \dots 2 \cdot 1} = \frac{N!}{(N -n)!}
+  $$
+
+  ma non bisogna considerare che poi tutte le posizioni delle palline nere
+  sono uguali, quindi va ulteriormente diviso per $n!$ per le stesse ragioni.
+  Da cui:
+
+  $$
+  \binom{N}{n} = \frac{N!}{n! (N -n)!}
+  $$
+
+Per la normalizzazione, vale che:
+
+$$
+  \sum_{n = 0}^N P(N, n, p) = 1
+$$
+
+Possiamo definire un valore di aspettazione e una varianza:
+
+\begin{align*}
+  &E[n] = \sum_{n = 0}^N n P(N, n, p) = Np \\
+  &V[n] = E[n^2] - E[n]^2 = Np(1 -p)
+\end{align*}
+
+## Distribuzione multinomiale
+
+È la generalizzazione della pdf precedente nel caso in cui ci siano $m$
+possibili risultati, ciascuno con una probabilità $P_m$ di verificarsi. Per
+esempio, è il caso di un istogramma riguardo al quale ci si domanda quale sia la
+probabilità di trovarlo esattamente con quelle specifiche entrate.  
+La probabilità è conseguentemente data da:
+
+$$
+  P(N, \vec{n}, \vec{p}) = \frac{N!}{n_1! n_2! \dots n_m!}
+  p_1^{n_1} p_2^{n_2} \dots p_m^{n_m}
+$$
+
+E come valore di aspettazione e deviazione standard si ottiene che:
+
+\begin{align*}
+  &E[n_i] = Np_i \\
+  &V[n_i] = Np_i (1 -p_i)
+\end{align*}
+
+### Legge dei grandi numeri
+
+La legge dei grandi numeri afferma che la media sperimentale di una variabile
+$x$, per un numero di tentativi $N$ che tende all'infinito, si avvicina molto
+alla media vera. Questa legge può essere utilizzata per stimare le probabilità
+$P_i$ di una distribuzione muiltinomiale tramite le frequenze con cui i diversi
+eventi si verificano.  
+Si consideri la frequenza $f_j$ con cui l'evento j-esimo si verifica, dato un
+set di $N$ tentativi:
+
+$$
+  f_j = \frac{1}{N} \sum_{i = 1}^N x_i = \frac{x_j}{N}
+$$
+
+dove $x_i$ è una variabile che vale 1 se l'evento j-esimo si è verificato e
+vale 0 quando se ne è verificato un altro e $x_j$ è quindi il numero di volte
+che l'evento j-esimo si è verificato.  
+A differenza di $P_j$, $f_j$ è una variabile casuale perché dipende da $x_j$
+che è la somma di variabili casuali. Definiamo valore medio:
+
+$$
+  E(f_j) = \frac{E(x_j)}{N} = P_j
+$$
+
+e calcoliamo la varianza:
+
+$$
+  V \left[ f_j \right] = V \left[ \frac{x_j}{N} \right] =
+  E \left[ \frac{x_j^2}{N^2} \right] - \left( E \left[ \frac{x_j}{N}
+  \right] \right)^2 = \frac{1}{N^2} V \left[ x_j \right]
+$$
+
+ora, $x_j$ è esattamente $n_j$ della multinomiale, perciò:
+
+$$
+  V[x_j] = NP_j (1 - P_j)
+  \hspace{30pt} \Longrightarrow \hspace{30pt}
+  V[f_j] = \frac{1}{N} P_j (1 - P_j) \leqslant \frac{1}{N}
+$$
+
+## Distribuzione di Poisson
+
+![Distribuzione Poissoniana: $\nu$ = 1,
+N = 1000.](images/poisson.png)
+
+Se si considera la distribuzione binomiale e ci si pone nel limite in cui il
+numero di tentativi ripetuti tenda all'infinito e che la probabilità di
+successo tenda a zero (con il vincolo che $N \cdot p = cost = \nu$), si ottiene
+la distribuzione di Poisson:
+
+$$
+  P(N, n, \nu) = \frac{\nu^n}{n!} e^{-\nu}
+$$
+
+con:
+
+\begin{align*}
+  &E[n] = \nu \\
+  &V[n] = \nu
+\end{align*}
+
+dove $\nu = NP = \frac{}{}$ numero medio di successi.  
+Quando $N$ è talmente grande da non essere definito (come nel caso in cui si
+osservino i decadimenti di un atomo e dunque i tentativi sono le osservazioni,
+che sono dunque continue), non è più possibile definire una probabilità di
+successo per ogni evento (perché sarebbe nulla, da cui il motivo per cui
+la Poissoniana è definita con questi due limiti), e quindi $\nu$ va definita in
+un altro modo. Infatti la distribuzione di Poisson, trattandosi di un limite in
+$N$ e $p$, non dipende più esplicitamente da queste due grandezze.  
+Nel caso in cui si osservi il decadimento di un atomo, si è soliti procedere in
+questo modo: si suddivide il tempo di osservazione in intervalli (il che
+significa aver suddiviso gli infiniti tentativi in sottoinsiemi di infiniti
+tentativi) e si misura quante volte in ognuno di questi intervalli si verifica
+un successo. L'esperimento è ora praticamente suddiviso in più esperimenti
+minori da cui è possibile dedurre un numero medio frequentistico di successi.
+Per esempio:
+
+----------------------------------------------------------
+# successi     0      1     2     3    4    5   6     7
+------------- ------ ----- ----- ---- ---- --- ----- -----
+# intervalli   1042   860   307   78   15   3   0     0
+
+Poisson        1064   823   318   82   16   2   0.3   0.3
+----------------------------------------------------------
+
+Table: Decadimento di un atomo. Il tempo di osservazione è stato suddiviso in
+intervalli e per ogni intervallo è stato contato il numero di successi
+osservati.
+
+\newpage
+
+Il numero medio di eventi è:
+
+$$
+  \frac{1042 \cdot 0 + 860 \cdot 1 + 307 \cdot 2 + 78 \cdot 3 + 15 \cdot 4
+        + 3 \cdot 5 + 0 \cdot 6 + 0 \cdot 7}{1064 + 860 + 307 + 78 + 15 + 3
+        + 0 + 0} = 0.77
+$$
+
+Da cui è possibile calcolare i valori sempre riportati nella tabella precedente.
+
+## Distribuzione uniforme
+
+![Distribuzione uniforme: $a = 0$, $b = 100$.](images/uniform.png)
+
+Una pdf di numeri che hanno tutti uguale probabilità di verificarsi è detta
+uniforme:
+
+$$
+  P (n, a, b) = \begin{cases}
+  \frac{1}{b - a} \hspace{30pt} a \leqslant x \leqslant b \\
+  0 \hspace{42pt} \text{altrove}
+  \end{cases}
+$$
+
+con:
+
+\begin{align*}
+  E[n] = \frac{1}{2} (a + b) \\
+  V[n] = \frac{1}{12} (a + b)^2
+\end{align*}
+
+Se una variabile è distribuita secondo una pdf $f(x)$, la sua cumulante è
+uniformemente distribuita. Intuitivamente è semplice perché basta vederla in
+questo modo: si immagini il grafico della pdf; ogni volta che si estrae un
+numero, questo cadrà in un punto casuale nell'area al di sotto della pdf,
+lasciando uno spazio casuale alla sua sinistra (che è il valore della
+cumulante)
+
+## Distribuzione Gaussiana e CLT
+
+![Distribuzione Gaussiana: $\mu = 30$,
+$\sigma = 5$.](images/gaussian.png)
+
+La distribuzione Gaussiana (o normale) è definita come:
+
+$$
+  P (x, \mu, \sigma) = \frac{1}{\sqrt{2 \pi} \sigma}
+  e^{\frac{(x - \mu)^2}{2 \sigma^2}}
+$$
+
+con:
+
+\begin{align*}
+  E[x] = \mu \\
+  V[x] = \sigma^2
+\end{align*}
+
+La error function, che è la cumulativa di questa pdf, è molto utile in
+laboratorio e i suoi valori sono tabulati.  
+Il teorema centrale del limite afferma che date $n$ variabili casuali
+indipendenti distribuite con una pdf comune e varianze $\sigma_i^2$, nel
+limite in qui $n \rightarrow + \infty$, la somma di queste variabili segue un
+andamento gaussiano con valore medio la somma dei valori medi e varianza la
+somma delle varianze.  
+Ciò può essere sfruttato per generare numeri casuali distribuiti secondo una
+distribuzione normale.
+
+
+Per grandi valori di $\mu$ (vale a dire qualche unità), la distribuzione di
+Poisson tende a quella Gaussiana con $\mu = \nu$ e $\sigma = \sqrt{\nu}$.
+Analogamente per $N \rightarrow + \infty$ la binomiale tende alla Gaussiana
+con $\mu = Np$ e $\sigma = \sqrt{Np (1 - p)}$.
+
+## Distribuzione Gaussiana multivariata
+
+Nel caso multidimensionale, la pdf per il vettore $\vec{x} = {x_1 ... n_n}$ è
+data da:
+
+$$
+  f(\vec{x}, \vec{\mu}, V) = \frac{1}{(2 \pi)^{N/2} \mid V \mid^{1/2}}
+  \exp \left[ - \frac{1}{2} (\vec{x} - \vec{\mu})^t V^{-1} (\vec{x}
+  - \vec{\mu}) \right]
+$$
+
+con $E[x_i] = \mu_i$ e $\text{cov}[x_i, x_j] = V_{ij}$
+
+## Media pesata
+
+Quando si hanno misure con diversi errori, vanno combinate attraverso il
+concetto di media pesata:
+
+$$
+  E[x] = \frac{\sum_{i = 1}^N \frac{x_i}{\sigma_i^2}}{\sum_{i = 1}^N
+  \frac{1}{\sigma_i^2}}
+$$
+$$
+  V[x] = \frac{1}{\sum_{i = 1}^N \frac{1}{\sigma_i^2}}
+$$
+
+Ma non ha senso mediare valori che non sono compatibili!
+
+## Distribuzione di Breit-Wigner
+
+![Distribuzione di Breit-Wigner: $x_0 = 20$,
+$\Gamma = 10$.](images/wigner.png)
+
+Esistono alcune distribuzioni che hanno momenti non ben definiti e che per
+questo si dicono "patologiche". Un esempio è la distribuzione di Breit-Wigner:
+
+$$
+  f (x, \Gamma, x_o) = \frac{1}{\pi} \cdot
+  \frac{\Gamma/2}{\Gamma^2/4 + (x - x_0)^2}
+$$
+
+Un caso particolare è quello in cui $x_0 = 0$ e $\Gamma = 2$, caso in cui è
+detta distribuzione di Cauchy:
+
+$$
+  f(x, 2, 0) = f(x) = \frac{1}{\pi} \cdot \frac{1}{1 + x^2}
+$$
+
+Il valore medio e la varianza non sono definiti perché l'integrale è
+divergente. Conviene usare la moda e l'ampiezza a mezza altezza, che sono
+rispettivamente $x_0$ e $\Gamma$.  
+Nella libreria *GSL*, la pdf è scritta in questo modo:
+
+$$
+  p(x) = \frac{1}{a \pi (1 + (x/a))^2}
+  \hspace{50pt} \Longrightarrow \hspace{50pt}
+  a = \Gamma/2
+$$
+
+## Distribuzione di Landau
+
+Per una particella carica con $\beta = v/c$ che attraversa un materiale sottile
+di spessore $d$, la perdita di energia $\Delta$ segue la distribuzione di
+Landau:
+
+![Distribuzione di Landau.](images/landau.png)
+
+Ha una forma complicatissima che racchiude integrali, logaritmi... Anche in
+questo caso non si possono definire i momenti algebrici perché l'integrale
+diverge.
+
+## Distribuzione del chi-quadro
+
+![Distribuzione del $\chi^2$: $n = 5$.](images/chi2.png)
+
+Date $N$ grandezze distribuite ciascuna con una propria distribuzione
+Gaussiana, la somma dei loro quadrati segue la distribuzione $\chi^2$.
+Formalmente è definita così:
+
+$$
+  f(z, n) \frac{1}{2^{n/2} \Gamma (n/2)} z^{n/2 - 1}e^{-z/2}
+$$
+
+\begin{align*}
+  &E[z] = n \\
+  &V[z] = 2n
+\end{align*}
+
+dove $z$ è la variabile e $n$ è il numero di fradi di libertà.  
+Quando si fa un esperimento e si campiona $y(x)$ e poi si fittano i dati
+trovati con una funzione teorica $f(x)$, ciascun valore $y(x)$ si assume
+distribuito come una gaussiana attorno al suo valore vero, che assumiamo
+essere $f(x)$: dunque i residui, che sono la differenza $R(x)= y(x) - f(x)$,
+sono ancora una gaussiana, ma centrata in zero. Il chi quadro è definito come:
+
+$$
+  \sum_i \frac{[y(x_i) - f(x_i)]^2}{f(x_i)}
+  \hspace{50pt} \text{oppure} \hspace{50pt}
+  \sum_i \frac{[y(x_i) - f(x_i)]^2}{\sigma_i^2}
+$$
+
+Ne consegue che il chi quadro segua appunto la distribuzione del chi quadro.
+
+Nella libreria *GSL* la distribuzione $\chi^2$ corrisponde alla distribuzione
+gamma con $a = n/2$ e $b = 2$.
+
+## Distribuzione esponenziale
+
+![Distribuzione del esponenziale: $\lambda = 3$.](images/exponential.png)
+
+$$
+  f(x, \lambda) = \lambda e^{-\lambda x}
+$$
+
+\begin{align*}
+  &E[z] = \frac{1}{\lambda} \\
+  &V[z] = \frac{1}{\lambda^2}
+\end{align*}
+
+## Distribuzione t di Student
+
+![Distribuzione t di Student: $\nu = 3$.](images/student.png)
+
+È la distribuzione seguita dalla media di una popolazione gaussiana quando
+la si stima con un piccolo campione e senza conoscere la deviazione standard.
+Se $y_1$ è distribuita come una Gaussiana e $y_2$ come un $\chi^2$, se $\nu$
+sono i gradi di libertà, allora $x$ segue la t di Student:
+
+$$
+  x = \frac{y_1}{\sqrt{\frac{y_2}{\nu}}}
+$$
+
+che è così definita:
+
+$$
+  f(x, \nu) = \frac{\Gamma \left( \frac{\nu + 1}{2} \right)}{\sqrt{\nu \pi}
+  \Gamma \left( \frac{\nu}{2} \right)} \left( 1 + \frac{x^2}{\nu}
+  \right)^{- \frac{\nu + 1}{2}}
+$$
+
+\begin{align*}
+  &E[z] = 0 \\
+  &V[z] =
+  \begin{cases}
+    \frac{\nu}{\nu - 2} \hspace{15pt} \nu \greater 2 \\
+    \infty \hspace{30pt} \nu \leqslant 2
+  \end{cases}
+\end{align*}
+
+
+## Distribuzione di Fischer-Snedecor
+
+![Distribuzione di Fischer: $n = 3$, $m = 4$.](images/fischer.png)
+
+Se si hanno due campioni $\vec{x}$ e $\vec{y}$ di variabili che seguono le
+rispettive Gaussiane, si può usare la distribuzione di Fisher-Snedecor per
+comparare le due varianze. Se nel primo caso le variabili sono $n$ e nel
+secondo sono $m$, allora la distribuzione di Fisher con gradi di libertà
+$n-1$ e $m-1$ dà la distribuzione del rapporto:
+
+$$
+  \frac{S^2_x / S^2_y}{\sigma^2_x / \sigma^2_y} =
+  \frac{S^2_x / \sigma^2_x}{S^2_y / \sigma^2_y}
+$$
+
+con:
+
+$$
+  S^2_x = \frac{1}{n -1} \sum_{i = 1}^n (x_i - \mu_i)^2
+  \hspace{50pt} \text{,} \hspace{50pt}
+  S^2_y = \frac{1}{m -1} \sum_{i = 1}^m (y_i - \mu_i)^2
+$$
+
+che quindi è il rapporto di due grandezze distribuite secondo il $chi^2$.  
+La definizione della pdf è complicata...
+
+## Funzione caratteristica
+
+Si definisce funzione caratteristica di una variabile $x$ distribuita secondo
+una $f(x)$, la trasformata di Fourier di quest'ultima:
+
+$$
+  \hat{f}(k) = E[e^{ikx}] = \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx}
+$$
+
+come per ogni trasformata, tutte le informazioni contenute nella funzione
+originaria sono contenute anche nella funzione caratteristica, perché per
+tornare alla prima è sufficiente calcolare la trasformata inversa:
+
+$$
+  f(x) = \frac{1}{2 \pi} \int\limits_{-\infty}^{+\infty} dx \hat{f}(k) e^{-ikx}
+$$
+
+la funzione caratteristica è utile per semplificare alcuni conti. Se
+$x_1 \dots x_N$ sono variabili casuali indipendenti:
+
+\begin{align*}
+  z = \sum_{i = 1}^N x_i
+  \hspace{20pt} \Longrightarrow \hspace{20pt}
+  \hat{f}_z(k) &= \int dx_1 \dots dx_N f_1(x_1)
+  \dots f_N(x_N) e^{ik \sum_{i=1}^N x_i} = \\
+  &= \int dx_1 f_1(x_1) e^{ikx_1} \dots \int dx_N f_N(x_N) e^{ikx_N} = \\
+  &= \hat{f}_1(k) \dots \hat{f}_N(k)
+\end{align*}
+
+Inoltre vale anche che:
+
+$$
+  \frac{d^m}{dk^m} \hat{f}(k) \big|_{k = 0} =
+  \frac{d^m}{dk^m}
+  \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} \big|_{k = 0} =
+  i^m \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} x^m \big|_{k = 0} =
+  i^m \mu_m = i^m E[x^m]
+$$
+
+che è il momento algebrico di ordine $m$.  
+Per esempio, nel caso di due variabili indipendenti $x$ e $y$ gaussiane, si
+può notare subito che la loro somma è una gaussiana con $\mu = \mu_x + \mu_y$ e
+$\sigma^2 = \sigma_x^2 + \sigma_y^2$. Analogamente per la Poissoniana.  
+Inoltre è facile osservare quale sia il comportamento delle pdf nei vari limiti
+che abbiamo visto in precedenza: se si manda $N \rightarrow \infty$ mantenendo
+il valore medio costante nella funzione caratteristica di una binomiale, si
+ottiene la funzione caratteristica di una Poissoniana. Anche il teorema
+centrale del limite si può dimostrare in questo modo.
diff --git a/misc/lessons/sections/modulo-3.md b/misc/lessons/sections/modulo-3.md
new file mode 100644
index 0000000..4b8d693
--- /dev/null
+++ b/misc/lessons/sections/modulo-3.md
@@ -0,0 +1,281 @@
+# BPH
+
+## Statistica descrittiva
+
+Si possono distinguere due tipi di analisi dei dati: "model independent"
+(statistica descrittiva) e "model dependent", che si basano su un modello
+teorico. In questo capitolo studiamo quelli del primo tipo.  
+Alcuni argomenti tipici di statistica descrittiva sono:
+
+- test per stabilire se due datasets provengono dalla stessa distribuzione
+  $f(x)$;
+- test per stabilire la correlazione tra due datasets (test di ipotesi);
+- metodi per determinare i momenti di una distribuzione;
+- metodi per lo smoothing dei dati sperimentali.
+
+### Momenti di una distribuzione
+
+Definire i momenti di una distribuzione ha senso quando gli eventi che la
+costituiscono hanno la tendenza ad agglomerarsi attorno ad un valore centrale.
+Se i dati sono discreti, si usano le seguenti definizioni:
+
+Media campionaria: se $n_i$ è la frequenza con cui si presenta ciascun valore
+$x_j$:
+
+$$
+  \bar{x} = \frac{1}{N} \sum_{i = 1}^N x_i
+          = \frac{1}{N} \sum_{j = 1}^{N'} n_j x_j
+$$
+
+Momento centrale di ordine $r$:
+
+$$
+  V_r = \frac{1}{N}   \sum_{j=1}^{N'} n_j^r (x_j - \bar{x})^r
+$$
+
+Esistono anche altri due valori "centrali", che nel caso continuo diventano:
+mediana:
+
+$$
+  \int\limits_{-\infty}^{x_{\text{med}}} dx \, f(x) = \frac{1}{2}
+$$
+
+moda: valore per cui $f(x)$ è massima, ovvero valore che si ripete con
+maggiore frequenza.  
+Se la pdf ha code molto estese, è possibile che gli integrali non convergano
+e questi valori non siano definiti. Per questo motivo la mediana è uno
+stimatore del valore centrale più robusto della media.
+
+I momenti centrali definiscono il modo in cui i dati si distribuiscono attorno
+al valore centrale: quanto sono "diffusi". Il primo è la varianza (si noti la
+correzione di Bessel per cui $N \rightarrow N -1$ al denominatore):
+
+$$
+  V = \frac{1}{N - 1} \sum_{i = 1}^N (x_i - \bar{x})^2
+$$
+
+La skewness (letteralmente "asimmetria") descrive quanto i valori siano
+distribuiti in modo disuniforme attorno al valore medio:
+
+$$
+  \gamma = \frac{1}{\sigma^3} E[(x - \bar{x})^3]
+$$
+
+dove $\sigma$ è la deviazione standard.
+
+![Skewness.](images/skewness.png){width=12cm}
+
+Quanto una pdf è più o meno piccata rispetto ad una gaussuana è dato dalla
+kurtosis ("curved", "arching"):
+
+$$
+  K = \frac{1}{\sigma^4} E[(x -\bar{x})^4] -3
+$$
+
+![Kurtosis.](images/kurtosis.png){width=8cm}
+
+Esiste una stima per le deviazioni standard di questi parametri nel caso di
+distribizioni circa gaussiane:
+
+\begin{align*}
+  &V(\sigma^2) = \frac{2 \sigma^4}{N} \\
+  &V(\gamma) \approx \frac{15}{N}     \\
+  &V(K) \approx \frac{96}{N}
+\end{align*}
+
+### Smoothing dei dati
+
+Lo smoothing dei dati si rende necessario quando i dati sono corrotti da un
+rumore casuale. Solitamente si attua una media su finestre che inglobano dati
+contigui. Fare una media, però, significa abbassare inevitabilmente il valore
+nei picchi, perché la maggior parte delle volte conservano l'area al di sotto
+del picco e la posizione, ma non l'altezza.  
+Uno dei più efficienti metodi di smoothing è il filtro di Savitsky-Golay.
+
+Il segnale viene analizzato a gruppi di punti incentrati ciascuno in $Y_i$, con
+$i$ che scorre su tutto l'array. Chiamiamo $y_0$ il punto centrale e $Y_N$ e
+$Y_{-N}$ gli estremi. $Y_0$ viene sostituito con un valore calcolato in un modo
+spiegato di seguito. Durante questo processo, i valori di $Y_i$ non vengono
+sostituiti con $f_i$, bensì si crea un array parallelo che sarà poi quello
+definitivo smoothato.  
+I valori di $Y_i$ si ottengono tramite un fit sui punti della finestra con
+un polinomio di grado arbitrario $g$: $P_g(j)$. Il polinomio viene poi
+valutato in zero e sostituito al valore di $y_0$.
+
+### Test di ipotesi
+
+Supponiamo di voler dimostrare che una certa variabile casuale $x$ segua una
+pdf $f(x)$: questa è detta ipotesi nulla $H_0$. Se $f(x)$ non dipende da alcun
+parametro, si parla di ipotesi semplice, altrimenti di dice composta. Oltre
+alla ipotesi nulla si possono avere una o più ipotesi alternative $H_1$,
+$H_2$...  
+Consideriamo il semplice caso in cui abbiamo una sola ipotesi alternativa
+$H_1$ che proponga a sua volta una pdf. Per valutare l'accordo tra i dati e
+un'ipotesi nulla si costruisce una statistica di test $t(x)$, che è una
+variabile che dipende da $\vec{x}$ che definisco per determinare se l'ipotesi
+nulla sia vera oppure no (vedi $t_{\text{cut}}$ oppure la discrepanza...) e che
+segue a sua volta due pdf, una prevista da $H_0$ e una da $H_1$.
+
+\begin{center}
+\begin{tikzpicture}
+
+  \draw [thick, ->] (0,0) -- (12,0);
+  \draw [thick, ->] (0,0) -- (0,6);
+  \node [left] at (0,6) {g(t)};
+  \node [below] at (12,0) {t};
+  \draw [thick, dashed] (6,0) -- (6,6);
+  \node [below] at (6,0) {$t_{\text{cut}}$};
+  \draw [thick, blue] (0,0) to [out = 20, in = 180] (3,5)
+        to [out = 0, in = 180] (8,0);
+  \draw [thick, red] (4,0) to [out = 20, in = 180] (7,3)
+        to [out = 0, in = 180] (11,0);
+  \node [blue] at (2.5, 2) {$g(t \, | \, H_0)$};
+  \node [red] at (8, 1) {$g(t \, | \, H_1)$};
+
+\end{tikzpicture}
+\end{center}
+
+Si definisce 'significanza del criterio di test' $\alpha$ (mentre $(1 -
+\alpha)$ è il 'livello di confidenza del criterio di test', o 'efficienza'):
+
+$$
+  \alpha = \int\limits_{t_{\text{cut}}}^{+ \infty} dt \, g(t \, | \, H_0)
+$$
+
+mentre $\beta$ è chiamato 'potenza del test' (mentre $(1 - \beta)$ è
+detto 'purezza'):
+
+$$
+  \beta = \int\limits_{-\infty}^{t_{\text{cut}}} dt \, g(t \, | \, H_1)
+$$
+
+Si chiamano:
+
+- errore di prima specie: rigezione di $H_0$ qualora questa sia vera (con
+  relativa probabilità $P_1$);
+- errore di seconda specie: accettazion di $H_0$ qualora questa sia falsa
+  (con relativa probabilità $P_2$);
+
+Per $t < t_{\text{cut}}$ deciso arbitrariamente, imponiamo che l'ipotesi
+nulla sia verificata. Ne consgue che $\alpha = P_1$ e $\beta = P_2$.  
+La scelta migliore di $y_{\text{cut}}$ è quella che dà la massima purezza data
+una certa efficienza. Nel caso 1D lo si ottiene automaticamente (vedi esempio),
+altrimenti può essere complicato.
+
+Facciamo un esempio in cui applichiamo il lemma di Neyman-Pearson.  
+Immaginiamo di avere i valori $\vec{x} = (x_1 ... x_N)$ che appartengono ad
+una distribuzione normale la cui varianza $\sigma$ è nota e si deve distinguere
+tra due valori medi $\mu_0$ e $\mu_1$, cioé:
+
+$$
+  H_0 = [\mu = \mu_0]
+  \hspace{100pt}
+  H_1 = [\mu = \mu_1]
+$$
+
+A questo punto le pdf previste da $H_0$ e $H_1$ sono due gaussiane centrate
+ciascuna nel proprio valore medio. Secondo il lemma di cui sopra, dobbiamo
+calcolare la Likelihood, che è la produttoria su tutte le misure effettuate
+$x_i$ della pdf prevista di un'ipotesi calcolata in $x_i$:
+
+$$
+  L(\vec{x}, \mu, \sigma) = \frac{1}{(\sigma \sqrt{2 \pi})^N} \Pi_{i=1}^N
+  N(x_i, \nu, \sigma)
+$$
+
+dove con $N$ si indica la distribuzione normale. Si tratta, cioè, della
+probabilità di avere ottenuto quelle misure secondo l'ipotesi considerata.
+Vorremo, quindi, che $L(H_0) >> L(H_1)$. A questo scopo si guarda $r$,
+parametro previsto dal lemma, che vale:
+
+$$
+  r = \frac{(L(\vec{x}) \, | \, H_0)}{(L(\vec{x}) \, | \, H_1)}
+  \hspace{30pt} \Longrightarrow \hspace{30pt}
+  \ln{r} = \ln{L(\vec{x}, \mu_0, \sigma)} - \ln{L(\vec{x}, \mu_0, \sigma)}
+$$
+
+Che deve essere a sua volta molto grande. La regione in cui si deve accettare
+l'ipotesi nulla è infatti quella con $r > c$, dove $c$ deve ancora essere
+valutato.
+
+$$
+  \ln{r} = R(\vec{x}) > \ln{c}
+  \hspace{30pt} \Longrightarrow \hspace{30pt}
+  \vec{x} > (\text{oppure} <) \, g(c) = t_{\text{cut}}
+$$
+
+Per scegliere $k$, si impone che:
+
+$$
+  P_1 = \alpha = Pr(\vec{x} > (\text{oppure} <) \, t_{\text{cut}} \,
+  | \, H_0) 
+$$
+
+Quindi ciò che può essere scelto arbitrariamente, alla fine dei conti, è
+$\alpha$, che solitamente si impone $= 5 \%$.
+
+### Discriminante lineare di Fisher
+
+In che modo si possono definire $f(t \, | \, H_0)$ e $f(t \, | \, H_1)$? Si
+possono fare degli *ansatz* riguardo alla forma di $t$. Il modello di Fischer
+utilizza una funzione lineare:
+
+$$
+  t = \sum_{i = 1}^N a_i x_i = \vec{a} \cdot \vec{x}
+$$
+
+dove il vettore $\vec{a}$ è da determinare. Definiamo l'insieme dei valori medi
+e delle "varianze" delle variabili misurate come segue: $\mu_{k, i}$ è il valore
+medio della variabile $i$-esima secondo l'ipotesi $k$-esima:
+
+$$
+  \mu_{k,i} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N
+  \, x_i f(\vec{x} \, | \, H_k)
+$$
+
+dove $k$ può quindi essere 0 o 1; mentre:
+
+$$
+  (V_k)_{i,j} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N
+  \, (x_i - \mu_{k,i})(x_j - \mu_{k,j}) f(\vec{x} \, | \, H_k)
+$$
+
+Si può dimostrare che, per funzioni
+gaussiane, la migliore statistica di test (ovvero che massimizza $1 - \beta$
+per un dato $a$) è quella per cui:
+
+$$
+  \vec{a} = \frac{1}{w} (\vec{\nu}_0 - \vec{\nu}_1)
+  \hspace{40pt} \text{con} \hspace{40pt}
+  W_{i,j} = (V_0 + V_i)_{i,j}
+$$
+
+In genere si introduce anche un offset:
+
+$$
+  t = a_0 + \sum_{i = 1}^N a_i x_i
+$$
+
+### Reti neuronali
+
+Si può dimostrare che se si usa il discriminante lineare di Fisher, allora dati
+i dati $\vec{x}$, la probabilità che sia giusta $H_0$ è:
+
+$$
+  P(H_0 | \vec{x}) = frac{1}{1 + e^{-t}}
+$$
+
+![Logistic function.](images/logistic.png){width=6cm}
+
+che è la funzione logistica. Se le due pdf $f(\vec{x} | H_0)$ e $f(\vec{x} |
+H_1)$ non sono gaussiane, allora il discriminante lineare di Fisher non è più
+ottimale e si può generalizzare $t(\vec{x})$ con un caso speciale di Artificial
+Neural Network (ANN).  
+Supponiamo di prendere
+
+$$
+  t(\vec{x}) = s_0 \left( a_0 \sum_{i = 1}^N a_i x_i \right)
+$$
+
+con $s$ detta funzione di attivazione e $a_0$ detta soglia. Siccome la sigmoide
+è monotona, questa ANN è equivalente ad un test lineare.
diff --git a/misc/pdfs.c b/misc/pdfs.c
new file mode 100644
index 0000000..9bbbe60
--- /dev/null
+++ b/misc/pdfs.c
@@ -0,0 +1,58 @@
+// This program generates a collection of random numbers which follow the
+// distribution whose name is given in input by the user when calling the
+// program and outputs an histogram of the result. Maximum and minimum of
+// the range must also be given. The random number distribution functions
+// are taken from the GSL library at:
+// https://www.gnu.org/software/gsl/doc/html/randist.html
+
+#include <stdio.h>
+#include <string.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_histogram.h>
+
+int main (int argc, char** argv)
+  {
+  // An histogram with 100 bins which range from 0 to 100 is set:
+  size_t bins = 100;
+  int low = atoi(argv[2]);
+  int upp = atoi(argv[3]);
+  gsl_histogram * histo = gsl_histogram_alloc(bins);
+  gsl_histogram_set_ranges_uniform(histo, low, upp);
+
+  // The random number generator is set:
+  gsl_rng* gen = gsl_rng_alloc(gsl_rng_taus);
+  // 100'000 random numbers are generated and put into the histogram:
+  size_t N = 100000;
+  for (size_t i = 0; i < N; i++)
+    {
+    if (strcmp(argv[1], "binomial") == 0)
+      gsl_histogram_increment(histo, gsl_ran_binomial(gen, 0.3, 10));
+    if (strcmp(argv[1], "poisson") == 0)
+      gsl_histogram_increment(histo, gsl_ran_poisson(gen, 1));
+    if (strcmp(argv[1], "uniform") == 0)
+      gsl_histogram_increment(histo, gsl_rng_get(gen)*100);
+    if (strcmp(argv[1], "gaussian") == 0)
+      gsl_histogram_increment(histo, gsl_ran_gaussian(gen, 5)+30);
+    if (strcmp(argv[1], "wigner") == 0)
+      gsl_histogram_increment(histo, gsl_ran_cauchy(gen, 10)+20);
+    if (strcmp(argv[1], "landau") == 0)
+      gsl_histogram_increment(histo, gsl_ran_landau(gen));
+    if (strcmp(argv[1], "chi2") == 0)
+      gsl_histogram_increment(histo, gsl_ran_gamma(gen, 5, 2));
+    if (strcmp(argv[1], "exponential") == 0)
+      gsl_histogram_increment(histo, gsl_ran_exponential(gen, 5));
+    if (strcmp(argv[1], "student") == 0)
+      gsl_histogram_increment(histo, gsl_ran_tdist(gen, 3));
+    if (strcmp(argv[1], "fischer") == 0)
+      gsl_histogram_increment(histo, gsl_ran_fdist(gen, 3, 4));
+    }
+  // The histogram is given in output:
+  gsl_histogram_fprintf (stdout, histo, "%g", "%g");
+
+  // The memory is freed.
+  gsl_rng_free(gen);
+  gsl_histogram_free(histo);
+
+  return 0;
+  }
diff --git a/misc/plot b/misc/plot
new file mode 100755
index 0000000..bb70e3f
--- /dev/null
+++ b/misc/plot
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+
+import matplotlib.pyplot as plt
+import numpy as np
+import sys
+
+lows, upps, values = np.loadtxt(sys.stdin, unpack=True,)
+dim = upps[1] - lows[1]
+values = values/np.sum((values)*dim)
+plt.bar(lows, align='edge', width=dim, height=values,
+        color='#dbbf0d', edgecolor='#595856')
+plt.annotate("N = 100000\nbins=100", (15, 0.16))
+plt.ylabel("pdf(x)")
+plt.xlabel("x")
+plt.show()
diff --git a/notes/images/conti-ex6.jpg b/notes/images/conti-ex6.jpg
new file mode 100644
index 0000000..8088b6a
Binary files /dev/null and b/notes/images/conti-ex6.jpg differ
diff --git a/notes/images/dip.png b/notes/images/dip.png
new file mode 100644
index 0000000..8b64727
Binary files /dev/null and b/notes/images/dip.png differ
diff --git a/notes/images/gamma-area.png b/notes/images/gamma-area.png
new file mode 100644
index 0000000..7f01fa5
Binary files /dev/null and b/notes/images/gamma-area.png differ
diff --git a/notes/images/landau-hist.png b/notes/images/landau-hist.png
new file mode 100644
index 0000000..9c4b3b7
Binary files /dev/null and b/notes/images/landau-hist.png differ
diff --git a/notes/makefile b/notes/makefile
new file mode 100644
index 0000000..cb0f269
--- /dev/null
+++ b/notes/makefile
@@ -0,0 +1,17 @@
+sections = $(shell find sections/ -name '*.md' | sort -V)
+
+pandoc = \
+	@ pandoc $(1)         \
+	--toc                 \
+	--standalone          \
+	--pdf-engine=xelatex  \
+	-F pandoc-crossref    \
+	-o $(2).pdf
+
+all: exercises.pdf
+
+clean:
+	rm exercises.pdf
+
+exercises.pdf: $(sections) images
+	$(call pandoc, $(sections), exercises)
diff --git a/notes/sections/0.md b/notes/sections/0.md
new file mode 100644
index 0000000..f42dc91
--- /dev/null
+++ b/notes/sections/0.md
@@ -0,0 +1,50 @@
+---
+title: Statistical Analysis
+subtitle: Exercises
+numbersections: true
+documentclass: article
+fontsize: 12pt
+
+geometry:
+  - width=150mm
+  - top=20mm
+  - bottom=30mm
+
+mainlang: english
+otherlang: russian
+
+header-includes: |
+  ```{=latex}
+  % start new page with each section
+  \usepackage{etoolbox}
+  \usepackage{tikz}
+  \usepackage{amsmath}
+  \usepackage{amssymb}
+  \usepackage{siunitx}
+  \pretocmd{\section}{\clearpage}{}{}
+
+  % strict figure placement
+  \usepackage{float}
+  \floatplacement{figure}{H}
+
+  % customer macros
+  %% "with" in formulas
+  \DeclareMathOperator*{\with}{%
+    \hspace{30pt} \text{with} \hspace{30pt}  
+  }
+  %% "thus" in formulas
+  \DeclareMathOperator*{\thus}{%
+    \hspace{30pt} \Longrightarrow \hspace{30pt}  
+  }
+  %% "et" in formulas
+  \DeclareMathOperator*{\et}{%
+  \hspace{30pt} \wedge \hspace{30pt}
+  }
+
+  % configure captions
+  \usepackage{caption}
+  \captionsetup{font={footnotesize,it}}
+  \captionsetup{width=11cm}
+  \usepackage{stmaryrd}
+  ```
+---
diff --git a/notes/sections/1.md b/notes/sections/1.md
new file mode 100644
index 0000000..8667d41
--- /dev/null
+++ b/notes/sections/1.md
@@ -0,0 +1,226 @@
+# Exercise 1
+
+## Random numbers following the Landau distribution
+
+The Landau distribution is a probability density function which can be defined
+as follows:
+
+$$
+  f(x) = \int \limits_{0}^{+ \infty} dt \, e^{-t log(t) -xt} \sin (\pi t)
+$$
+
+![Landau distribution.](images/landau-small.pdf){width=50%}
+
+The GNU Scientific Library (GSL) provides a number of functions for generating
+random variates following tens of probability distributions. Thus, the function
+for generating numbers from the Landau distribution, namely `gsl_ran_landau()`,
+was used.  
+For the purpose of visualizing the resulting sample, the data was put into
+an histogram and plotted with matplotlib. The result is shown in @fig:landau.
+
+![Example of N points generated with the `gsl_ran_landau()`
+function and plotted in a 100-bins histogram ranging from -10 to
+80.](images/landau.png){#fig:landau}
+
+
+## Randomness testing of the generated sample
+
+### Kolmogorov-Smirnov test
+
+In order to compare the sample with the Landau distribution, the
+Kolmogorov-Smirnov (KS) test was applied. This test statistically quantifies the
+distance between the cumulative distribution function of the Landau distribution
+and the one of the sample. The null hypothesis is that the sample was
+drawn from the reference distribution.  
+The KS statistic for a given cumulative distribution function $F(x)$ is:
+
+$$
+  D_N = \text{sup}_x |F_N(x) - F(x)|
+$$
+
+where:
+
+  - $x$ runs over the sample,
+  - $F(x)$ is the Landau cumulative distribution and function 
+  - $F_N(x)$ is the empirical cumulative distribution function of the sample.
+
+If $N$ numbers have been generated, for every point $x$,
+$F_N(x)$ is simply given by the number of points preceding the point (itself
+included) normalized by $N$, once the sample is sorted in ascending order.  
+$F(x)$ was computed numerically from the Landau distribution with a maximum
+relative error of $10^{-6}$, using the function `gsl_integration_qagiu()`,
+found in GSL.
+
+Under the null hypothesis, the distribution of $D_N$ is expected to
+asymptotically approach a Kolmogorov distribution:
+
+$$
+  \sqrt{N}D_N \xrightarrow{N \rightarrow + \infty} K
+$$
+
+where $K$ is the Kolmogorov variable, with cumulative
+distribution function given by:
+
+$$
+  P(K \leqslant K_0) = 1 - p = \frac{\sqrt{2 \pi}}{K_0}
+  \sum_{j = 1}^{+ \infty} e^{-(2j - 1)^2 \pi^2 / 8 K_0^2}
+$$
+
+Plugging the observed value $\sqrt{N}D_N$ in $K_0$, the $p$-value can be
+computed. At 95% confidence level (which is the probability of confirming the
+null hypothesis when correct) the compatibility with the Landau distribution
+cannot be disproved if $p > α = 0.05$.  
+To approximate the series, the convergence was accelerated using the Levin
+$u$-transform with the `gsl_sum_levin_utrunc_accel()` function. The algorithm
+terminates when the difference between two successive extrapolations reaches a
+minimum.
+
+For $N = 1000$, the following results were obtained:
+
+  - $D = 0.020$
+  - $p = 0.79$
+
+Hence, the data was reasonably sampled from a Landau distribution.
+
+**Note**:
+Contrary to what one would expect, the $\chi^2$ test on a histogram is not very
+useful in this case. For the test to be significant, the data has to be binned
+such that at least several points fall in each bin. However, it can be seen
+(@fig:Landau) that many bins are empty both in the right and left side of the
+distribution, so it would be necessary to fit only the region where the points
+cluster or use very large bins in the others, making the $\chi^2$ test
+unpractical.
+
+
+### Parameters comparison
+
+When a sample of points is generated in a given range, different tests can be
+applied in order to check whether they follow an even distribution or not. The
+idea which lies beneath most of them is to measure how far the parameters of
+the distribution are from the ones measured in the sample.  
+The same principle can be used to verify if the generated sample effectively
+follows the Landau distribution. Since it turns out to be a very pathological
+PDF, only two parameters can be easily checked: the mode and the
+full-width-half-maximum (FWHM).
+
+![Landau distribution with emphatized mode and
+  FWHM = ($x_+ - x_-$).](images/landau.pdf)
+
+\begin{figure}
+\hypertarget{fig:parameters}{%
+\begin{tikzpicture}[overlay]
+  \begin{scope}[shift={(0,0.4)}]
+  % Mode
+  \draw [thick, dashed] (7.57,3.1) -- (7.57,8.55);
+  \draw [thick, dashed] (1.9,8.55) -- (7.57,8.55);
+  \node [above right] at (7.6,3.1)  {$m_e$};
+  \node [below right] at (1.9,8.55) {$f(m_e)$};
+  % FWHM
+  \draw [thick, dashed] (1.9,5.95)  -- (9.05,5.95);
+  \draw [thick, dashed] (6.85,5.83) -- (6.85,3.1);
+  \draw [thick, dashed] (8.95,5.83)    -- (8.95,3.1);
+  \node [below right] at (1.9,5.95) {$\frac{f(m_e)}{2}$};
+  \node [above right] at (6.85,3.1) {$x_-$};
+  \node [above right] at (8.95,3.1)  {$x_+$};
+  \end{scope}
+\end{tikzpicture}
+}
+\end{figure}
+
+The mode of a set of data values is defined as the value that appears most
+often, namely: it is the maximum of the PDF. Since there is no way to find
+an analytic form for the mode of the Landau PDF, it was numerically estimated
+through a minimization method (found in GSL, called method 'golden section')
+with an arbitrary error of $10^{-2}$, obtaining:
+
+  - expected mode $= m_e = \SI{-0.2227830 \pm 0.0000001}{}$
+
+The minimization algorithm begins with a bounded region known to contain a
+minimum. The region is described by a lower bound $x_\text{min}$ and an upper
+bound $x_\text{max}$, with an estimate of the location of the minimum $x_e$.
+The value of the function at $x_e$ must be less than the value of the function
+at the ends of the interval, in order to guarantee that a minimum is contained
+somewhere within the interval.
+
+$$
+  f(x_\text{min}) > f(x_e) < f(x_\text{max})
+$$
+
+On each iteration the interval is divided in a golden section (using the ratio
+($3 - \sqrt{5}/2 \approx 0.3819660$ and the value of the function at this new
+point $x'$ is calculated. If the new point is a better estimate of the minimum,
+namely is $f(x') < f(x_e)$, then the current estimate of the minimum is
+updated.  
+The new point allows the size of the bounded interval to be reduced, by choosing
+the most compact set of points which satisfies the constraint $f(a) > f(x') <
+f(b)$ between $f(x_\text{min})$, $f(x_\text{min})$ and $f(x_e)$. The interval is
+reduced until it encloses the true minimum to a desired tolerance.
+
+In the sample, on the other hand, once the data were binned, the mode can be
+estimated as the central value of the bin with maximum events and the error
+is the half width of the bins. In this case, with 40 bins between -20 and 20,
+the following result was obtained:
+
+  - observed mode $= m_o = \SI{0 \pm 1}{}$
+
+In order to compare the values $m_e$ and $x_0$, the following compatibility
+t-test was applied:
+
+$$
+  p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
+  t = \frac{|m_e - m_o|}{\sqrt{\sigma_e^2 + \sigma_o^2}}
+$$
+
+where $\sigma_e$ and $\sigma_o$ are the absolute errors of $m_e$ and $m_o$
+respectively. At 95% confidence level, the values are compatible if $p > 0.05$.
+In this case:
+
+$$
+  p = 0.82
+$$
+
+Thus, the observed value is compatible with the expected one.
+
+---
+
+The same approach was taken as regards the FWHM. It is defined as the distance
+between the two points at which the function assumes half times the maximum
+value. Even in this case, there is not an analytic expression for it, thus it
+was computed numerically ad follow.  
+First, some definitions must be given:
+
+$$
+  f_{\text{max}} := f(m_e) \et \text{FWHM} = x_{+} - x_{-} \with
+  f(x_{\pm}) = \frac{f_{\text{max}}}{2}
+$$
+
+then the function $f'(x)$ was minimized using the same minimization method
+used for finding $m_e$, dividing the range into $[x_\text{min}, m_e]$ and
+$[m_e, x_\text{max}]$ (where $x_\text{min}$ and $x_\text{max}$ are the limits
+in which the points have been sampled) in order to be able to find both the
+minima of the function:
+
+$$
+  f'(x) = |f(x) - \frac{f_{\text{max}}}{2}|
+$$
+
+resulting in:
+
+$$
+  \text{expected FWHM} = w_e = \SI{4.0186457 \pm 0.0000001}{}
+$$
+
+On the other hand, the observed FWHM was computed as the difference between
+the center of the bins with the values closer to $\frac{f_{\text{max}}}{2}$
+and the error was taken as twice the width of the bins, obtaining:
+
+$$
+  \text{observed FWHM} = w_o = \SI{4 \pm 2}{}
+$$
+
+This two values turn out to be compatible too, with:
+
+$$
+  p = 0.99
+$$
+
diff --git a/notes/sections/2.md b/notes/sections/2.md
new file mode 100644
index 0000000..5ecbe83
--- /dev/null
+++ b/notes/sections/2.md
@@ -0,0 +1,311 @@
+# Exercise 2
+
+## Euler-Mascheroni constant
+
+The Euler-Mascheroni constant is defined as the limiting difference between the
+partial sums of the harmonic series and the natural logarithm:
+
+$$
+  \gamma = \lim_{n \rightarrow +\infty} \left( \sum_{k=1}^{n} \frac{1}{k}
+  - \ln(n) \right)
+$$ {#eq:gamma}
+
+and represents the limiting blue area in @fig:gamma. The first 30 digits of
+$\gamma$ are:
+
+$$
+  \gamma = 0.57721\ 56649\ 01532\ 86060\ 65120\ 90082 \dots
+$$ {#eq:exact}
+
+In complex analysis, this constant is related to many functions and can be
+evaluated through a variety of identities. In this work, five methods were
+implemented and their results discussed. In fact, evaluating $\gamma$ with a
+limited precision due to floating points number representation entails limited
+precision on the estimation of $\gamma$ itself due to roundoff errors. All the
+known methods involve sums, subtractions or products of very big or small
+numbers, packed in series, partial sums or infinite products. Thus, the
+efficiency of the methods lies on how quickly they converge to their limit.
+
+![The area of the blue region converges to the Euler–Mascheroni
+constant.](images/gamma-area.png){#fig:gamma width=7cm}
+
+## Computing the constant
+
+### Definition
+
+First, in order to have a quantitative idea of how hard it is to reach a good
+estimation of $\gamma$, it was naively computed using the definition given in
+@eq:gamma. The difference was computed for increasing value of $n$, with
+$n_{i+1} = 10 \cdot n_i$ and $n_1 = 20$, till the approximation starts getting
+worse, namely:
+
+$$
+  | \gamma(n_{i+1}) - \gamma | > | \gamma(n_i) - \gamma|
+$$
+
+and $\gamma (n_i)$ was selected as the result (see @tbl:1_results).
+
+-----------------------------------------------
+n           sum           $|\gamma(n)-\gamma|$
+----------- ------------- ---------------------
+\SI{2e1}{}                  \SI{2.48e-02}{}
+
+\SI{2e2}{}                  \SI{2.50e-03}{}
+
+\SI{2e3}{}                  \SI{2.50e-04}{}
+
+\SI{2e4}{}                  \SI{2.50e-05}{}
+
+\SI{2e5}{}                  \SI{2.50e-06}{}
+
+\SI{2e6}{}                  \SI{2.50e-07}{}
+
+\SI{2e7}{}                  \SI{2.50e-08}{}
+
+\SI{2e8}{}                  \SI{2.50e-09}{}
+
+\SI{2e9}{}                  \SI{2.55e-10}{}
+
+\SI{2e10}{}                 \SI{2.42e-11}{}
+
+\SI{2e11}{}                 \SI{1.44e-08}{}
+-----------------------------------------------
+
+Table: Partial results using the definition of $\gamma$ with double
+precision. {#tbl:1_results}
+
+The convergence is logarithmic: to fix the first $d$ decimal places about
+$10^d$  terms are needed. The double precision runs out at the
+10\textsuperscript{th} place, $n=\SI{2e10}{}$.  
+Since all the number are given with double precision, there can be at best 15
+correct digits but only 10 were correctly computed: this means that when the
+terms of the series start being smaller than the smallest representable double,
+the sum of all the remaining terms give a number $\propto 10^{-11}$.
+
+
+--------- -----------------------
+true:	     0.57721\ 56649\ 01533
+
+approx:    0.57721\ 56648\ 77325
+
+diff:	     0.00000\ 00000\ 24207
+--------- -----------------------
+
+Table: First method results. {#tbl:first}
+
+### Alternative formula
+
+As a first alternative, the constant was computed through the identity which
+relates $\gamma$ to the $\Gamma$ function as follow:
+
+$$
+  \gamma = \lim_{M \rightarrow + \infty} \sum_{k  = 1}^{M}
+  \binom{M}{k} \frac{(-1)^k}{k} \ln(\Gamma(k + 1))
+$$
+
+Varying $M$ from 1 to 100, the best result was obtained for $M = 41$ (see
+@tbl:second). It went sour: the convergence is worse than using the definition
+itself. Only two places were correctly computed.
+
+--------- -----------------------
+true:	     0.57721\ 56649\ 01533
+
+approx:    0.57225\ 72410\ 34058
+
+diff:	     0.00495\ 84238\ 67473
+--------- -----------------------
+
+Table: Second method results. {#tbl:second}
+
+Here, the problem lies in the binomial term: computing the factorial of a
+number greater than 18 goes over 15 places and so cannot be correctly
+represented. Furthermore, the convergence (even if this is not a series
+and, consequently, it is not properly a "convergence") is slowed down by
+the logarithmic factor.
+
+### Reciprocal $\Gamma$ function
+
+A better result was found using the well known reciprocal $\Gamma$ function
+formula:
+
+$$
+  \frac{1}{\Gamma(z)} = z e^{yz} \prod_{k = 1}^{+ \infty}
+  \left( 1 + \frac{z}{k} \right) e^{-z/k}
+$$
+
+which gives:
+
+$$
+  \gamma = - \frac{1}{z} \ln \left( z \Gamma(z)
+  \prod_{k = 1}^{+ \infty} \left( 1 + \frac{z}{k} \right) e^{-z/k} \right)
+$$
+
+The execution stops when there is no difference between two consecutive therms
+of the infinite product (it happens for $k = 456565794$, meaning that for this
+value of $k$, the term of the product is equal to 1). Different values of $z$
+were checked, with $z_{i+1} = z_i + 0.01$, ranging from 0 to 20 and the best
+result was found for $z = 9$. As can be seen in @tbl:3_results, it's only by
+chance, since all $|\gamma(z) - \gamma |$ are of the same order of magnitude.
+The best one is compared with the exact value of $\gamma$ in @tbl:third.
+
+---------------------------------------------------------------
+    z  $|\gamma(z) - \gamma |$      z   $|\gamma(z) - \gamma |$
+----- ------------------------ ------  ------------------------
+ 1     \SI{9.712e-9}{}          8.95    \SI{9.770e-9}{}
+ 
+ 3     \SI{9.320e-9}{}          8.96    \SI{9.833e-9}{}
+ 
+ 5     \SI{9.239e-9}{}          8.97    \SI{9.622e-9}{}
+ 
+ 7     \SI{9.391e-9}{}          8.98    \SI{9.300e-9}{}
+ 
+ 9     \SI{8.482e-9}{}          8.99    \SI{9.059e-9}{}
+ 
+ 11    \SI{9.185e-9}{}          9.00    \SI{8.482e-9}{}
+ 
+ 13    \SI{9.758e-9}{}          9.01    \SI{9.564e-9}{}
+ 
+ 15    \SI{9.747e-9}{}          9.02    \SI{9.260e-9}{}
+ 
+ 17    \SI{9.971e-9}{}          9.03    \SI{9.264e-9}{}
+ 
+ 19    \SI{10.084e-9}{}         9.04    \SI{9.419e-9}{}
+---------------------------------------------------------------
+
+Table: Differences between the obtained values of $\gamma$ and the exact
+one. {#tbl:3_results}
+
+--------- -----------------------
+true:	     0.57721\ 56649\ 01533
+
+approx:    0.57721\ 56564\ 18607
+
+diff:	     0.00000\ 00084\ 82925
+--------- -----------------------
+
+Table: Third method results for z = 9.00. {#tbl:third}
+
+This time, the convergence of the infinite product is fast enough to ensure the
+$8^{th}$ place.
+
+### Fastest convergence formula
+
+The fastest known convergence belongs to the following formula:  
+(source: http://www.numberworld.org/y-cruncher/internals/formulas.html):
+
+$$
+  \gamma = \frac{A(N)}{B(N)} -\frac{C(N)}{B^2(N)} - \ln(N)
+$$ {#eq:faster}
+
+with:
+
+\begin{align*}
+  &A(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!} \cdot H(k)
+          \with H(k) = \sum_{j=1}^{k} \frac{1}{j}                 \\
+  &B(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!}            \\
+  &C(N) = \frac{1}{4N} \sum_{k=0}^{2N}
+          \frac{((2k)!)^3}{(k!)^4 \cdot (16k)^2k}         \\
+\end{align*}
+
+The series $A$ and $B$ are computed till there is no difference between two
+consecutive terms.  
+The number of desired correct decimals is given in input and $N$ is
+consequently computed through a formula given in the same article
+above-mentioned. Results are shown in @tbl:fourth.
+
+--------- ------------------------------
+true:	     0.57721\ 56649\ 01532\ 75452
+
+approx:    0.57721\ 56649\ 01532\ 86554
+
+diff:	     0.00000\ 00000\ 00000\ 11102
+--------- ------------------------------
+
+Table: Fourth method results. {#tbl:fourth}
+
+Due to roundoff errors, the best results is obtained for $N = 10$. Since up to
+15 places were correctly computed, an approximation of $\gamma$ better than the
+one reached with the definition in @eq:gamma was obtained.
+
+### Arbitrary precision
+
+To overcome the issues related to the double representation, one can resort to a
+representation with arbitrary precision. In the GMP library (which stands for
+GNU Multiple Precision arithmetic), for example, real numbers can be
+approximated by a ratio of two integers (fraction) with arbitrary precision: 
+this means a check is performed on every operation and in case of an integer
+overflow, additional memory is requested and used to represent the larger result.
+Additionally, the library automatically switches to the optimal algorithm
+to compute an operation based on the size of the operands.
+
+The terms in @eq:faster can therefore be computed with arbitrarily large
+precision.  Thus, a program that computes the Euler-Mascheroni constant within
+a user controllable precision has been implemented. Unlike the previously
+mentioned programs, this one was more carefully optimized.
+
+The $A$ and $B$ series are computed up to an arbitrary limit $k_{\text{max}}$.
+Different values of $k_{\text{max}}$ were tested but, obviously, they all
+eventually reach a point where the approximation cannot guarantee the
+requested precision; the solution turned out to be to let $k_{\text{max}}$ depends on
+$N$.  Consequently, $k_{\text{max}}$ was chosen to be $5N$, after it has been
+verified to produce the correct digits up to 500 decimal places.
+
+The GMP library offers functions to perform some operations such as addition,
+multiplication, division, etc. however, the logarithm function is not
+implemented. Thus, most of the code carries out the $\ln(N)$ computation.
+First, it should be noted that the logarithm of only some special numbers can
+be computed with arbitrary precision, namely the ones of which a converging
+series is known. This forces $N$ to be rewritten in the following way:
+
+$$
+  N = N_0 \cdot b^e \thus \ln(N) = \ln(N_0) + e \cdot \ln(b)
+$$
+
+Since a fast converging series for $\ln(2)$ is known $b = 2$ was chosen.  As
+well as for the scientific notation, in order to get the mantissa $1 \leqslant
+N_0 < 2$, the number of binary digits of $N$ must be computed (conveniently, a
+dedicated function `mpz_sizeinbase()` can be found in GMP). If the digits are $n$:
+
+$$
+  e = b - 1 \thus N_0 = \frac{N}{2^{n - 1}}
+$$
+
+Then, by defining:
+
+$$
+  N_0 = \frac{1 + y}{1 - y} \thus y = \frac{N_0 - 1}{N_0 + 1} < 1
+$$
+
+and the following series (which is convergent for $y < 1$) can therefore be
+used:
+
+$$
+  \ln \left( \frac{1 + y}{1 - y} \right) =
+  2 \sum_{k = 0}^{+ \infty} \frac{y^{2k + 1}}{2k + 1}
+$$
+
+But when to stop computing the series?  
+Given a partial sum $S_k$ of the series, it is possible to know when a digit is
+definitely correct, meaning that no matter how large $k$ can be,
+it will not affect that decimal place. The key lies in the following concept.
+Letting $S$ the value of the series:
+
+$$
+  S_k + \frac{a_{k+1}}{1 - \frac{a_{k+1}}{a_k}} < S < S_k + a_k \frac{L}{1 -L}
+$$
+
+where $L$ is the limiting ratio of the series terms, which must be $< 1$ in
+order for it to converge (in this case, it is easy to prove that $L = y^2$).
+The width $\Delta S$ of the interval containing $S$, for a given $S_k$, gives
+the precision of the partial sum with respect to $S$. By imposing $\Delta S <
+1/10^D$ where $D$ is the correct decimal place required, the desired precision
+is obtained.  This is achieved by trials, checking at every step whether $\Delta
+S$ is less or greater than $1/10^D$.
+
+The same holds for $\ln(2)$, which is given by the following series:
+
+$$
+  \log(2) = \sum_{k=1}^{+ \infty} \frac{1}{k \cdot 2^k}
+$$
+
+In this case the ratio is $L = 1/2$.  
diff --git a/notes/sections/3.md b/notes/sections/3.md
new file mode 100644
index 0000000..a9ddb55
--- /dev/null
+++ b/notes/sections/3.md
@@ -0,0 +1,423 @@
+# Exercise 3 {#sec:3}
+
+## Meson decay events generation
+
+A number of $N = 50'000$ points on the unit sphere, each representing a
+particle detection event, must be generated according to then angular
+probability distribution function $F_0$:
+
+\begin{align*}
+  F_0 (\theta, \phi) = \frac{3}{4 \pi} \Bigg[ 
+  \frac{1}{2} (1 - \alpha_0) + \frac{1}{2} (3\alpha_0 - 1) \cos^2(\theta)& \\
+  - \beta_0 \sin^2(\theta) \cos(2 \phi)
+  - \gamma_0 \sqrt{2} \sin(2 \theta) \cos(\phi)& \Bigg]
+\end{align*}
+
+where $\theta$ and $\phi$ are, respectively, the polar and azimuthal angles, and
+
+$$
+  \alpha_0 = 0.65 \et \beta_0 = 0.06 \et \gamma_0 = -0.18
+$$
+
+To generate the points a *try and catch* method was employed:
+
+1. generate a point $(\theta , \phi)$ uniformly on a unit sphere
+2. generate a third value $Y$ uniformly in $[0, 1]$
+3. if @eq:requirement is satisfied save the point
+4. repeat from 1.
+
+$$
+  Y < F_0(\theta, \phi)
+$$ {#eq:requirement}
+
+To increase the efficiency of the procedure, the $Y$ values were actually
+generated in $[0, 0.2]$, since $\max F_0 = 0.179$ for the given parameters
+(an other option would have been generating the numbers in the range $[0 ; 1]$,
+but all the points with $Y_{\theta, \phi} > 0.179$ would have been rejected with
+the same probability, namely 1).   
+
+While $\phi$ can simply be generated uniformly between 0 and $2 \pi$, for
+$\theta$ one has to be more careful: by generating evenly distributed numbers
+between 0 and $\pi$, the points turn out to cluster on the poles. The point
+$(\theta,\phi)$ is *not* uniform on the sphere but is uniform on a cylindrical
+projection of it. (The difference can be appreciated in @fig:compare).
+
+The proper distribution of $\theta$ is obtained by applying a transform
+to a variable $x$ with uniform distribution in $[0, 1)$,
+easily generated by the GSL function `gsl_rng_uniform()`.
+
+<div id="fig:compare">
+  ![Uniformly distributed points with $\theta$ evenly distributed between
+  0 and $\pi$.](images/histo-i-u.pdf){width=45%}
+  ![Points uniformly distributed on a spherical
+  surface.](images/histo-p-u.pdf){width=45%}
+
+  ![Sample generated according to $F$ with $\theta$ evenly distributed between
+  0 and $\pi$.](images/histo-i-F.pdf){width=45%}
+  ![Sample generated according to $F$ with $\theta$ properly
+  distributed.](images/histo-p-F.pdf){width=45%}
+
+Examples of samples. On the left, points with $\theta$ evenly distributed
+between 0 and $\pi$; on the right, points with $\theta$ properly distributed.
+</div>
+
+The transformation can be found by imposing the angular PDF to be a constant:
+
+\begin{align*}
+  \frac{d^2 P}{d \Omega^2} = \text{const} = \frac{1}{4 \pi}
+  \hspace{20pt} &\Longrightarrow \hspace{20pt}
+  d^2P = \frac{1}{4 \pi} \, d \Omega^2 = \frac{1}{4 \pi} \,
+  d \phi \sin(\theta) d \theta
+  \\
+  &\Longrightarrow \hspace{20pt}
+  \frac{dP}{d \theta} = \int_0^{2 \pi} d \phi \, \frac{1}{4 \pi} \sin(\theta)=
+  \frac{1}{4 \pi} \sin(\theta) 2 \pi = \frac{1}{2} \sin(\theta)
+\end{align*}
+\begin{align*}
+  \theta = \theta (x)
+  \hspace{20pt} &\Longrightarrow \hspace{20pt}
+  \frac{dP}{d \theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d \theta} \right|
+  = \left. \frac{dP}{dx} \middle/ \, \left| \frac{d \theta}{dx} \right| \right. 
+  \\
+  &\Longrightarrow \hspace{20pt}
+  \frac{1}{2} \sin(\theta) = \left. 1 \middle/ \, \left|
+  \frac{d \theta}{dx} \right| \right. 
+\end{align*}
+
+Since $\theta \in [0, \pi]$, then the absolute value symbol can be omitted:
+
+\begin{align*}
+  \frac{d \theta}{dx} = \frac{2}{\sin(\theta)}
+  \hspace{20pt} &\Longrightarrow \hspace{20pt}
+  d \theta \sin(\theta) = 2 dx
+  \\
+  &\Longrightarrow \hspace{20pt}
+  - \cos (\theta') |_{0}^{\theta} = 2[x(\theta) - x(0)] = 2(x - 0) = 2x
+  \\
+  &\Longrightarrow \hspace{20pt}
+  - \cos(\theta) + 1 = 2x
+  \\
+  &\Longrightarrow \hspace{20pt}
+  \theta = \text{acos} (1 -2x)
+\end{align*}
+
+Since 1 is not in $[0 , 1)$, $\theta = \pi$ can't be generated. Being it just
+a single point, the effect of this omission is negligible.
+
+\vspace{30pt}
+
+
+## Parameter estimation
+
+The sample must now be used to estimate the parameters $\alpha_0$,
+$\beta_0$ and $\gamma_0$ of the angular distribution $F_0$.
+
+
+### Maximum Likelihood method
+
+The Maximum Likelihood method (MLM) is based on the observation that the best
+estimate {$\bar{\alpha}$, $\bar{\beta}$, $\bar{\gamma}$} of the parameters
+{$\alpha$, $\beta$, $\gamma$} should maximize the Likelihood function $L$
+defined in @eq:Like0, where the index $i$ runs over the sample and $F$ is the
+function $F_0$ with free parameters $\alpha$, $\beta$ and $\gamma$:
+
+$$
+  L = \prod_i F(\theta_i, \phi_i) = \prod_i F_i
+$$ {#eq:Like0}
+
+The reason is that the points were generated according to the probability
+distribution function $F$ with parameters $\alpha_0$, $\beta_0$ and $\gamma_0$,
+thus the probability $P$ of sampling this special sample is, by definition, the
+product of their probabilities $F_i$ and it must be maximum for {$\alpha$,
+$\beta$, $\gamma$} = {$\alpha_0$, $\beta_0$, $\gamma_0$}. Thus, by viewing $F_i$
+as a function of the parameters, by maximizing $P = L$ with respect
+to $\alpha$, $\beta$ and $\gamma$, a good estimate should be found.  
+Instead of actually maximising $L$, the function $-\ln(L)$
+was minimised, as minimisation methods as usually described in literature
+and $\ln$ since it simplifies the math:
+
+$$
+  L = \prod_i F_i \thus - \ln(L) = - \sum_{i} \ln(F_i)
+$$ {#eq:Like}
+
+
+The problem of multidimensional minimisation was tackled using the GSL library
+`gsl_multimin.h`, which implements a variety of methods, including the
+conjugate gradient Fletcher-Reeves algorithm, used in the solution, which requires
+the implementation of $-\ln(L)$ and its gradient.
+
+To minimise a function $f$, given an initial guess point $x_0$, the gradient
+$\nabla f$ is used to find the initial direction $v_0$ (the steepest descent) along
+which a line minimisation is performed: the minimum occurs where the
+directional derivative along $v_0$ is zero:
+$$
+  \frac{\partial f}{\partial v_0} = \nabla f \cdot v_0 = 0
+$$
+or, equivalently, at the point where the gradient is orthogonal to $v_0$.
+After this first step the following procedure is iterated:
+
+  1. Find the steepest descent $v_n$.
+  2. Compute the *conjugate* direction: $w_n = v_n + \beta_n w_{n-1}$
+  3. Perform a line minimisation along $w_n$
+  4. Update the position $x_{n+1} = x_n + \alpha_n w_n$
+
+where $\alpha_n$ is line minimum and $\beta_n$ is given by the Fletcher-Reeves
+formula:
+$$
+  \beta = \frac{\|\nabla f(x_n)\|^2}{\|\nabla f(x_{n-1})\|^2}
+$$
+
+The accuracy of each line minimisation is controlled the parameter `tol`,
+meaning:
+
+$$
+  w \cdot \nabla f < \text{tol} \, \|w\| \, \|\nabla f\|
+$$
+
+The minimisation is quite unstable and this forced the starting point to be
+taken very close to the solution, namely:
+
+$$
+  \alpha_{sp} = 0.79 \et
+  \beta_{sp} = 0.02 \et
+  \gamma_{sp} = -0.17
+$$
+
+The overall minimisation ends when the gradient module is smaller than $10^{-4}$.
+The program took 25 of the above iterations to reach the result shown in @eq:Like_res.
+
+The Cramér-Rao bound states that the covariance matrix of parameters estimated
+by MLM is greater than the of the inverse of the Hessian matrix of $-\log(L)$
+at the minimum. Thus, the Hessian matrix was computed
+analytically and inverted by a Cholesky decomposition, which states that
+every positive definite symmetric square matrix $H$ can be written as the
+product of a lower triangular matrix $L$ and its transpose $L^T$:
+
+$$
+  H = L \cdot L^T
+$$
+
+The Hessian matrix is, indeed, both symmetric and positive definite, when
+computed in a minimum. Once decomposed the inverse is given by
+
+$$
+  H^{-1} = (L \cdot L^T)^{-1} = (L^{-1})^T \cdot L^{-1}
+$$
+The inverse of $L$ is easily computed since it's a triangular matrix.
+
+The GSL library provides two functions `gsl_linalg_cholesky_decomp()` and
+`gsl_linalg_cholesky_invert()` to decompose and invert in-place a matrix.
+The result is shown below:
+
+$$
+  \bar{\alpha_L} = 0.6541 \et
+  \bar{\beta_L}  = 0.06125 \et
+  \bar{\gamma_L} = -0.1763
+$$ {#eq:Like_res}
+
+$$
+\text{cov}_L = \begin{pmatrix}
+ 9.225 \cdot 10^{-6} & -1.785 \cdot 10^{-6} & 2.41  \cdot 10^{-7} \\
+-1.785 \cdot 10^{-6} &  4.413 \cdot 10^{-6} & 1.58  \cdot 10^{-6} \\
+  2.41 \cdot 10^{-7} &  1.58  \cdot 10^{-6} & 2.602 \cdot 10^{-6}
+\end{pmatrix}
+$$ {#eq:Like_cov}
+
+Hence:
+\begin{align*}
+  \alpha_L &=  0.654  \pm 0.003  \\
+  \beta_L  &=  0.0613 \pm 0.0021 \\
+  \gamma_L &= -0.1763 \pm 0.0016
+\end{align*}
+
+
+### Least squares estimation
+
+Another method that can be used to estimate {$\alpha$, $\beta$, $\gamma$} are
+the least squares (LSQ), which is a multidimensional minimisation
+specialised to the case of sum of squared functions called residuals.  
+The residuals can be anything in general but are usually interpreted as the
+difference between an expected (fit) and an observed value. The least squares
+then correspond to the expected values that best fit the observations.
+
+To apply the LSQ method, the data must be properly binned, meaning that each
+bin should contain a significant number of events (say greater than four or
+five): in this case, 30 bins for $\theta$ and 60 for $\phi$ turned out
+to be satisfactory. The expected values were given as the product of $N$, $F$
+computed in the geometric center of the bin and the solid angle enclosed by the
+bin itself, namely:
+
+$$
+  E_{\theta, \phi} = N F(\theta, \phi) \, \Delta \theta \, \Delta \phi \,
+  \sin{\theta}
+$$
+
+Once the data are binned, the number of events in each bin plays the role of the
+observed value, while the expected one is given by the probability of finding
+an event in that bin multiplied by the total number of points, $N$. Then, the
+$\chi^2$, defined as follow, must be minimized with respect to the three
+parameters to be estimated:
+
+$$
+  \chi^2 = \sum_i f_i^2 = \|\vec f\|^2 \with f_i = \frac{O_i - E_i}{\sqrt{E_i}}
+$$
+
+where the sum runs over the bins, $O_i$ are the observed values and $E_i$ the
+expected ones. Besides the more generic `gsl_multimin.h` library, GSL provides a
+special one for least square minimisation, called `gsl_multifit_nlinear.h`. A
+trust region method was used to minimize the $\chi^2$.  
+In such methods, the $\chi^2$, its gradient and its Hessian matrix are computed
+in a series of points in the parameters space till the gradient and the matrix
+reach given proper values, meaning that the minimum has been well approached,
+where "well" is related to that values.  
+If {$x_1 \dots x_p$} are the parameters which must be estimated, first $\chi^2$
+is computed in the starting point $\vec{x_k}$, then its value in a point
+$\vec{x}_k + \vec{\delta}$ is modelled by a function $m_k (\vec{\delta})$ which
+is the second order Taylor expansion around the point $\vec{x}_k$, namely:
+
+$$
+  \chi^2 (\vec{x}_k + \vec{\delta}) \sim m_k (\vec{\delta}) =
+  \chi^2 (\vec{x}_k) + \nabla_k^T  \vec{\delta} + \frac{1}{2}
+  \vec{\delta}^T H_k \vec{\delta}
+$$
+
+where $\nabla_k$ and $H_k$ are the gradient and the Hessian matrix of $\chi^2$
+in the point $\vec{x}_k$ and the superscript $T$ stands for the transpose. The
+initial problem is reduced the so called trust-region subproblem (TRS), which
+is the minimisation of $m_k(\vec{\delta})$ in a region where
+$m_k(\vec{\delta})$ should be a good approximation of $\chi^2 (\vec{x}_k
++ \vec{\delta})$, given by the condition:
+$$
+  \|D_k \vec\delta\| < \Delta_k
+$$
+If $D_k = I$ the region is a hypersphere of radius $\Delta_k$
+centered at $\vec{x}_k$ but can be deformed according to $D_k$. This is
+necessary in the case one or more parameters have scale very different scales.
+
+Given an initial point $x_0$, radius $\Delta_0$, scale matrix $D_0$
+and step $\vec\delta_0$ the LSQ algorithm consist in the following iteration:
+
+  1. Construct the function $m_k(\vec\delta)$.
+  2. Solve the TRS for step $\vec\delta_k$.
+  3. Check whether the solution actually decreases $\chi^2$
+      1. If positive increase the trust region radius $\Delta_{k+1} =
+         \alpha\Delta_k$ and shift the position $\vec x_{k+1} = \vec x_k +
+         \vec \delta_k$.
+      2. If negative decrease the radius $\Delta_{k+1} = \Delta_k/\beta$.
+  4. Repeat
+
+This method is advantageous compared to a general minimisation because
+the TRS usually amounts to solving a linear system. There are many algorithms
+to solve the problem, in this case the Levenberg-Marquardt was used. It is based
+on a theorem that proves the existence of a number $\mu_k$ such that
+\begin{align*}
+   \Delta_k \mu_k = \|D_k \vec\delta_k\| &&
+    (H_k + \mu_k D_k^TD_k) \vec\delta_k = -\nabla_k
+\end{align*}
+Using the approximation[^1] $H\approx J^TJ$, obtained by computing the Hessian
+of the first-order Taylor expansion of $\chi^2$, $\vec\delta_k$ can
+be found by solving the system
+
+$$
+\begin{cases}
+  J_k \vec{\delta_k} + \vec{f_k} = 0 \\
+  \sqrt{\mu_k} D_k \vec{\delta_k} = 0
+\end{cases}
+$$
+
+[^1]: Here $J_{ij} = \partial f_i/\partial x_j$ is the Jacobian matrix of the
+vector-valued function $\vec f(\vec x)$.
+
+The algorithm terminates if on of the following condition are satisfied:
+
+  1. $|\delta_i| \leq \text{xtol} (|x_i| + \text{xtol})$ for every component
+     of $\vec \delta$.
+  2. $\max_i |\nabla_i \cdot \max(x_i, 1)| \leq \text{gtol}\cdot \max(\chi^2, 1)$
+  3. $\|\vec f(\vec x+ \vec\delta) - \vec f(\vec x)|| \leq \text{ftol}
+     \cdot \max(\|\vec f(\vec x)\|, 1)$
+
+These tolerance have all been set to \SI{1e-8}{}. The program converged in 7
+iterations giving the results below. The covariance of the parameters can again
+been estimated through the Hessian matrix at the minimum. The following results
+were obtained:
+
+$$
+  \bar{\alpha_{\chi}} = 0.6537 \et
+  \bar{\beta_{\chi}} = 0.05972 \et
+  \bar{\gamma_{\chi}} = -0.1736
+$$ {#eq:lsq-res}
+
+$$
+\text{cov}_{\chi} = \begin{pmatrix}
+ 9.244 \cdot 10^{-6} & -1.66  \cdot 10^{-6} & 2.383 \cdot 10^{-7} \\
+-1.66  \cdot 10^{-6} &  4.495 \cdot 10^{-6} & 1.505 \cdot 10^{-6} \\
+ 2.383 \cdot 10^{-7} &  1.505 \cdot 10^{-6} & 2.681 \cdot 10^{-6}
+\end{pmatrix}
+$$
+
+Hence:
+\begin{align*}
+  &\alpha_{\chi} = 0.654   \pm 0.003   \\
+  &\beta_{\chi}  = 0.0597  \pm 0.0021  \\
+  &\gamma_{\chi} = -0.1736 \pm 0.0016
+\end{align*}
+
+
+### Results compatibility
+
+In order to compare the values $x_L$ and $x_{\chi}$ obtained from both methods
+with the correct ones (namely {$\alpha_0$, $\beta_0$, $\gamma_0$}), the
+following compatibility t-test was applied:
+
+$$
+  p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
+  t = \frac{x_i - x_0}{\sqrt{\sigma_{x_i}^2 + \sigma_{x_0}^2}}
+  = \frac{x_i - x_0}{\sigma_{x_i}}
+$$
+
+where $i$ stands either for the MLM or LSQ parameters and $\sigma_{x_i}$ is the
+uncertainty of the value $x_i$. At 95% confidence level, the values are
+compatible if $p > 0.05$.
+
+Likelihood results:
+
+----------------------------
+ par         $p$-value
+------------ ---------------
+ $\alpha_L$  0.18
+ 
+ $\beta_L$   0.55
+ 
+ $\gamma_L$  0.023
+----------------------------
+
+Table: Likelihood results compatibility.
+
+$\chi^2$ results:
+
+---------------------------------
+ par              $p$-value
+----------------- ---------------
+ $\alpha_{\chi}$  0.22
+ 
+ $\beta_{\chi}$   0.89
+ 
+ $\gamma_{\chi}$  0.0001
+---------------------------------
+
+Table: $\chi^2$ results compatibility.
+
+It can be concluded that only the third parameter, $\gamma$ is not compatible
+with the expected one in both cases. An in-depth analysis of the algebraic
+arrangement of $F$ would be required in order to justify this outcome.
+
+\vspace{30pt}
+
+## Isotropic hypothesis testing
+
+What if the probability distribution function was isotropic? Could it be
+compatible with the found results?  If $F$ was isotropic, then $\alpha_I$,
+$\beta_I$ and $\gamma_I$ would be $1/3$, 0, and 0 respectively, since this
+gives $F_I = 1/{4 \pi}$. The t-test gives a $p$-value approximately zero for all
+the three parameters, meaning that there is no compatibility at all with this
+hypothesis.
diff --git a/notes/sections/4.md b/notes/sections/4.md
new file mode 100644
index 0000000..29b162c
--- /dev/null
+++ b/notes/sections/4.md
@@ -0,0 +1,269 @@
+# Exercise 4
+
+**Kinematic dip pdf derivation**
+
+Consider a great number of non-interacting particles, each of which with a
+random momentum $\vec{p}$ with module between 0 and $p_{\text{max}}$ randomly
+angled with respect to a coordinate system {$\hat{x}$, $\hat{y}$, $\hat{z}$}.
+Once the polar angle $\theta$ is defined, the momentum vertical and horizontal
+components of a particle, which will be referred as $p_v$ and $p_h$, are the
+ones shown in @fig:components.  
+If $\theta$ is evenly distributed on the sphere and the same holds for the
+module $p$, which distribution will the average value of the absolute value of
+$p_v$ follow as a function of $p_h$?
+
+\begin{figure}
+\hypertarget{fig:components}{%
+\centering
+\begin{tikzpicture}
+  % Axes
+  \draw [thick, ->] (5,2) -- (5,8);
+  \draw [thick, ->] (5,2) -- (2,1);
+  \draw [thick, ->] (5,2) -- (8,1);
+  \node at (1.5,0.9) {$x$};
+  \node at (8.5,0.9) {$y$};
+  \node at (5,8.4)   {$z$};
+  % Momentum
+  \definecolor{cyclamen}{RGB}{146, 24, 43}
+  \draw [ultra thick, ->, cyclamen] (5,2)     -- (3.8,6);
+  \draw [thick, dashed, cyclamen]   (3.8,0.8) -- (3.8,6);
+  \draw [thick, dashed, cyclamen]   (5,7.2)   -- (3.8,6);
+  \draw [ultra thick, ->, pink]     (5,2)     -- (5,7.2);
+  \draw [ultra thick, ->, pink]     (5,2)     -- (3.8,0.8);
+  \node at (4.8,1.1) {$\vec{p_h}$};
+  \node at (5.5,6.6) {$\vec{p_v}$};
+  \node at (3.3,5.5) {$\vec{p}$};
+  % Angle
+  \draw [thick, cyclamen] (4.4,4) to [out=80,in=210] (5,5);
+  \node at (4.7,4.2) {$\theta$};
+\end{tikzpicture}
+\caption{Momentum components.}\label{fig:components}
+}
+\end{figure}
+
+The aim is to compute $\langle |p_v| \rangle (p_h) dp_h$.
+
+Consider all the points with $p_h \in [p_h ; p_h - dp_h]$: the values of
+$p_v$ that these points can assume depend on $\theta$ and the total momentum
+length $p$.
+
+$$
+  \begin{cases}
+  p_h = p \sin{\theta} \\ p_v = p \cos{\theta}
+  \end{cases}
+  \thus |p_v| = p |\cos{\theta}| = p_h \frac{|\cos{\theta}|}{\sin{\theta}}
+  = |p_v| (\theta)
+$$
+
+It looks like the dependence on $p$ has disappeared, but obviously it has
+not. In fact, it lies beneath the limits that one has to put on the possible
+values of $\theta$. For the sake of clarity, take a look at @fig:sphere (the
+system is rotation-invariant, hence it can be drown at a fixed azimuthal angle).
+
+\begin{figure}
+\hypertarget{fig:sphere}{%
+\centering
+\begin{tikzpicture}
+  % p_h slice
+  \definecolor{cyclamen}{RGB}{146, 24, 43}
+  \filldraw [cyclamen!15!white] (1.5,-3.15) -- (1.5,3.15) -- (1.75,3.05)
+                             -- (2,2.85) -- (2,-2.85) -- (1.75,-3.05)
+                             -- (1.5,-3.15);
+  \draw [cyclamen] (1.5,-3.15) -- (1.5,3.15);
+  \draw [cyclamen] (2,-2.9)    -- (2,2.9);
+  \node [cyclamen, left] at (1.5,-0.3) {$p_h$};
+  \node [cyclamen, right] at (2,-0.3) {$p_h + dp_h$};
+  % Axes
+  \draw [thick, ->] (0,-4) -- (0,4);
+  \draw [thick, ->] (0,0)  -- (4,0);
+  \node at (0.3,3.8) {$z$};
+  \node at (4,0.3) {$hd$};
+  % p_max semicircle
+  \draw [thick, cyclamen] (0,-3.5) to [out=0, in=-90] (3.5,0);
+  \draw [thick, cyclamen] (0,3.5)  to [out=0, in=90]  (3.5,0);
+  \node [cyclamen, left] at (-0.2,3.5)  {$p_{\text{max}}$};
+  \node [cyclamen, left] at (-0.2,-3.5) {$-p_{\text{max}}$};
+  % Angles
+  \draw [thick, cyclamen, ->] (0,1.5) to [out=0, in=140] (0.55,1.2);
+  \node [cyclamen] at (0.4,2) {$\theta$};
+  \draw [thick, cyclamen] (0,0) -- (1.5,3.15);
+  \node [cyclamen, above right] at (1.5,3.15) {$\theta_x$};
+  \draw [thick, cyclamen] (0,0) -- (1.5,-3.15);
+  \node [cyclamen, below right] at (1.5,-3.15) {$\theta_y$};
+  % Vectors
+  \draw [ultra thick, cyclamen, ->] (0,0) -- (1.7,2.2);
+  \draw [ultra thick, cyclamen, ->] (0,0) -- (1.9,0.6);
+  \draw [ultra thick, cyclamen, ->] (0,0) -- (1.6,-2);
+\end{tikzpicture}
+\caption{Momentum space at fixed azimuthal angle ("$hd$" stands for
+"horizontal direction"). Some vectors with $p_h \in [p_h, p_h +dp_h]$
+are evidenced.}\label{fig:sphere}
+}
+\end{figure}
+
+As can be seen, $\theta_x$ and $\theta_y$ are the minimum and maximum tilts
+angles of these vectors respectively, because if a point had $p_h \in [p_h; p_h
++ dp_h]$ and $\theta < \theta_x$ or $\theta > \theta_y$, it would have $p >
+p_{\text{max}}$. Therefore their values are easily computed as follow:
+
+$$
+  p_h = p_{\text{max}} \sin{\theta_x} = p_{\text{max}} \sin{\theta_y}
+  \thus \sin{\theta_x} = \sin{\theta_y} = \frac{p_h}{p_{\text{max}}}
+$$
+
+Since the average value of a quantity is computed by integrating it over all the
+possible quantities it depends on weighted on their probability, one gets:
+
+$$
+  \langle |p_v| \rangle (p_h) dp_h = \int_{\theta_x}^{\theta_y} 
+  d\theta P(\theta) \cdot  P(p) \, dp \cdot |p_v| (\theta)
+$$
+
+where $d\theta P(\theta)$ is the probability of generating a point with $\theta
+\in [\theta; \theta + d\theta]$ and $P(p) \, dp$ is the probability of
+generating a point with $\vec{p}$ in the pink region in @fig:sphere, given a
+fixed $\theta$.  
+The easiest to deduce is $P(p) \, dp$: since $p$ is evenly distributed, it
+follows that:
+
+$$
+  P(p) \, dp = \frac{1}{p_{\text{max}}} dp
+$$
+
+with:
+
+$$
+  dp = p(p_h + dp_h) - p(p_h)
+     = \frac{p_h + dp_h}{\sin{\theta}} - \frac{p_h}{\sin{\theta}}
+     = \frac{dp_h}{\sin{\theta}}
+$$
+
+hence
+
+$$
+  P(p) \, dp = \frac{1}{p_{\text{max}}} \cdot \frac{1}{\sin{\theta}} \, dp_h
+$$
+
+For $d\theta P(\theta)$, instead, one has to do the same considerations done
+in @sec:3, from which:
+
+$$
+  P(\theta) d\theta = \frac{1}{2} \sin{\theta} d\theta
+$$
+
+Ultimately, having found all the pieces, the integral must be computed:
+
+\begin{align*}
+  \langle |p_v| \rangle (p_h) dp_h &= \int_{\theta_x}^{\theta_y} 
+  d\theta \frac{1}{2} \sin{\theta} \cdot
+  \frac{1}{p_{\text{max}}} \frac{1}{\sin{\theta}} \, dp_h \cdot
+  p_h \frac{|\cos{\theta}|}{\sin{\theta}}
+  \\
+  &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \int_{\theta_x}^{\theta_y}
+  d\theta \frac{|\cos{\theta}|}{\sin{\theta}} 
+  \\
+  &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \cdot \mathscr{O}
+\end{align*}
+
+Then, with a bit of math:
+
+\begin{align*}
+  \mathscr{O} &= \int_{\theta_x}^{\theta_y} d\theta
+  \frac{|\cos{\theta}|}{\sin{\theta}}
+  \\
+  &= \int_{\theta_x}^{\frac{\pi}{2}} d\theta \frac{\cos{\theta}}{\sin{\theta}}
+  - \int_{\frac{\pi}{2}}^{\theta_y} d\theta \frac{\cos{\theta}}{\sin{\theta}}
+  \\
+  &= \left[ \ln{(\sin{\theta})} \right]_{\theta_x}^{\frac{\pi}{2}}
+  - \left[ \ln{(\sin{\theta})} \right]_{\frac{\pi}{2}}^{\theta_y}
+  \\
+  &= \ln{(1)} -\ln{ \left( \frac{p_h}{p_{\text{max}}} \right) }
+  - \ln{ \left( \frac{p_h}{p_{\text{max}}} \right) } + \ln{(1)}
+  \\
+  &= 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
+\end{align*}
+
+\newpage
+Hence, in conclusion:
+
+\begin{align*}
+  \langle |p_v| \rangle (p_h) dp_h &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}}
+  \cdot 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
+  \\
+  &= \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
+  \frac{p_h}{p_{\text{max}}} dp_h
+\end{align*}
+
+Namely:
+
+\begin{figure}
+\hypertarget{fig:plot}{%
+\centering
+\begin{tikzpicture}
+  \definecolor{cyclamen}{RGB}{146, 24, 43}
+  % Axis
+  \draw [thick, <->] (0,5) -- (0,0) -- (11,0);
+  \node [below right] at (11,0) {$p_h$};
+  \node [above left] at (0,5) {$\langle |p_v| \rangle$};
+  % Plot
+  \draw [domain=0.001:10, smooth, variable=\x,
+         cyclamen, ultra thick] plot ({\x},{12*ln(10/\x)*\x/10});
+  \node [cyclamen, below] at (10,0) {$p_{\text{max}}$};
+\end{tikzpicture}
+\caption{Plot of the expected distribution.}\label{fig:plot}
+}
+\end{figure}
+
+**The code**
+
+The same distribution should be found by generating and binning points in a
+proper way.  
+A number of $N = 50'000$ points were generated as a couple of values ($p$,
+$\theta$), with $p$ evenly distrinuted between 0 and $p_{\text{max}}$ and
+$\theta$ given by the same procedure described in @sec:3, namely:
+
+$$
+  \theta = \text{acos}(1 - 2x)
+$$
+
+with $x$ uniformely distributed between 0 and 1.  
+The data binning turned out to be a bit challenging. Once $p$ was sorted and
+$p_h$ was computed, the bin in which the latter goes in must be found. If $n$ is
+the number of bins in which the range [0, $p_{\text{max}}$] is willing to be
+divided into, then the width $w$ of each bin is given by:
+
+$$
+  w = \frac{p_{\text{max}}}{n}
+$$
+
+and the $j^{th}$ bin in which $p_h$ goes in is:
+
+$$
+  j = \text{floor} \left( \frac{p_h}{w} \right)
+$$
+
+where 'floor' is the function which gives the upper integer lesser than its
+argument and the bins are counted starting from zero.  
+Then, a vector in which the $j^{\text{th}}$ entry contains the sum $S_j$ of all
+the $|p_v|$s relative to each $p_h$ fallen into the $j^{\text{th}}$ bin and the
+number num$_j$ of entries in the $j^{\text{th}}$ bin was reiteratively updated.
+At the end, the average value of $|p_v|_j$ was computed as $S_j /
+\text{num}_j$.  
+For the sake of clarity, for each sorted couple, it works like this:
+
+  - the couple $[p; \theta]$ is generated;
+  - $p_h$ and $p_v$ are computed;
+  - the $j^{\text{th}}$ bin in which $p_h$ goes in is computed;
+  - num$_j$ is increased by 1;
+  - $S_j$ (which is zero at the beginning of everything) is increased by a factor
+    $|p_v|$.
+
+At the end, $\langle |p_h| \rangle_j$ = $\langle |p_h| \rangle (p_h)$ where:
+
+$$
+  p_h = j \cdot w + \frac{w}{2} = w \left( 1 + \frac{1}{2} \right)
+$$
+
+The following result was obtained:
+
+![Histogram of the obtained distribution.](images/dip.pdf)
diff --git a/notes/sections/5.md b/notes/sections/5.md
new file mode 100644
index 0000000..bfa3971
--- /dev/null
+++ b/notes/sections/5.md
@@ -0,0 +1,113 @@
+# Exercize 5
+
+**Numerically compute an integral value via Monte Carlo approaches**
+
+The integral to be evaluated is the following:
+
+$$
+  I = \int\limits_0^1 dx \, e(x)
+$$
+
+whose exact value is 1.7182818285...  
+The three most popular MC methods where applied: plain Monte Carlo, Miser and
+Vegas (besides this popularity fact, these three method were chosen for being
+the only ones implemented in the GSL library).
+
+## Plain Monte Carlo
+
+When the integral $I$ over a volume $V$ of a function $f$ must be evaluated in
+a $n-$dimensional space, the simplest MC method approach is to sample $N$
+points $x_i$ evenly distributed in $V$ and approx $I$ as:
+
+$$
+  I \sim I_N = \frac{V}{N} \sum_{i=1}^N f(x_i) = V \cdot \langle f \rangle
+$$
+
+with $I_N \rightarrow I$ for $N \rightarrow + \infty$ for the law of large
+numbers. Hence, the variance can be merely extimated as:
+
+$$
+  \sigma^2 =
+  \frac{1}{N-1} \sum_{i = 1}^N \left( f(x_i) - \langle f \rangle \right)^2
+$$
+
+thus, the error on $I_N$ decreases as $1/\sqrt{N}$. Unlike in deterministic
+methods, the estimate of the error is not a strict error bound: random sampling
+may not uncover all the important features of the integrand that can result in
+an underestimate of the error.  
+Since the proximity of $I_N$ to $I$ is related to $N$, the accuracy of the
+method is determined by the number of function calls when implemented (proof
+in @tbl:MC).
+
+-----------------------------------------------------------------
+           500'000 calls     5'000'000 calls    50'000'000 calls
+--------- ----------------- ------------------ ------------------
+result     1.7166435813      1.7181231109       1.7183387184
+
+$\sigma$   0.0006955691      0.0002200309       0.0000695809
+-----------------------------------------------------------------
+
+Table: MC results and errors with different numbers of function
+calls. {#tbl:MC}
+
+## Miser
+
+The MISER algorithm is based on recursive stratified sampling.
+On each recursion step the integral and the error are estimated using a plain
+Monte Carlo algorithm. If the error estimate is larger than the required
+accuracy, the integration volume is divided into sub-volumes and the procedure
+is recursively applied to sub-volumes.  
+This technique aims to reduce the overall integration error by concentrating
+integration points in the regions of highest variance.  
+The idea of stratified sampling begins with the observation that for two
+disjoint regions $a$ and $b$ with Monte Carlo estimates of the integral
+$E_a (f)$ and $E_b (f)$ and variances $\sigma_a^2 (f)$ and $\sigma_b^2 (f)$,
+the variance $V (f)$ of the combined estimate $E (f)$:
+
+$$
+  E (f)= \frac {1}{2} \left( E_a (f) + E_b (f) \right)
+$$
+
+is given by,
+
+$$
+  V(f) = \frac{\sigma_a^2 (f)}{4 N_a} + \frac{\sigma_b^2 (f)}{4 N_b}
+$$
+
+It can be shown that this variance is minimized by distributing the points such that,
+
+$$
+  \frac{N_a}{N_a + N_b} = \frac{\sigma_a}{\sigma_a + \sigma_b}
+                          \cdot \frac{N_a}{N_a + N_b}
+                        = \frac{\sigma_a}{\sigma_a + \sigma b}
+$$
+
+Hence the smallest error estimate is obtained by allocating sample points in proportion
+to the standard deviation of the function in each sub-region.
+
+---
+
+---------------------------------------------------------
+       calls     plain MC        Miser          Vegas    
+------------ -------------- -------------- --------------
+    500'000   1.7166435813   1.7182850738   1.7182818354 
+
+  5'000'000   1.7181231109   1.7182819143   1.7182818289 
+
+ 50'000'000   1.7183387184   1.7182818221   1.7182818285 
+---------------------------------------------------------
+
+Table: Results of the three methods. {#tbl:results}
+
+---------------------------------------------------------
+       calls     plain MC        Miser          Vegas    
+------------ -------------- -------------- --------------
+    500'000   0.0006955691   0.0000021829   0.0000000137
+
+  5'000'000   0.0002200309   0.0000001024   0.0000000004
+
+ 50'000'000   0.0000695809   0.0000000049   0.0000000000
+---------------------------------------------------------
+
+Table: $\sigma$s of the three methods. {#tbl:sigmas}
+
diff --git a/notes/sections/6.md b/notes/sections/6.md
new file mode 100644
index 0000000..91a585b
--- /dev/null
+++ b/notes/sections/6.md
@@ -0,0 +1,85 @@
+# Exercise 6
+
+**Generating points according to Fraunhofer diffraction**
+
+The diffraction of a plane wave thorough a round slit must be simulated by
+generating $N =$ 50'000 points according to the intensity distribution
+$I(\theta)$ on a screen at a great distance $L$ from the slit iself:
+
+$$
+  I(\theta) = \frac{E^2}{2} \left( \frac{2 \pi a^2 \cos{\theta}}{L}
+  \frac{J_1(x)}{x} \right)^2 \with x = k a \sin{\theta}
+$$
+
+where:
+
+- $E$ is the electric field amplitude, default set $E = \SI{1e4}{V/m}$;
+- $a$ is the radius of the slit aperture, default set $a = \SI{0.01}{m}$;
+- $\theta$ is the angle specified in @fig:fenditure;
+- $J_1$ is the Bessel function of first order;
+- $k$ is the wavenumber, default set $k = \SI{1e-4}{m^{-1}}$;
+- $L$ default set $L = \SI{1}{m}$.
+
+\begin{figure}
+\hypertarget{fig:fenditure}{%
+\centering
+\begin{tikzpicture}
+  \definecolor{cyclamen}{RGB}{146, 24, 43}
+  % Walls
+  \draw [thick] (-1,3)  -- (1,3)  -- (1,0.3)  -- (1.2,0.3)  -- (1.2,3)
+                       -- (9,3);
+  \draw [thick] (-1,-3) -- (1,-3) -- (1,-0.3) -- (1.2,-0.3) -- (1.2,-3)
+                       -- (9,-3);
+  \draw [thick] (10,3) -- (9.8,3) -- (9.8,-3) -- (10,-3);
+  % Lines
+  \draw [thick, gray] (0.7,0.3)  -- (0.5,0.3);
+  \draw [thick, gray] (0.7,-0.3) -- (0.5,-0.3);
+  \draw [thick, gray] (0.6,0.3)  -- (0.6,-0.3);
+  \draw [thick, gray] (1.2,0)    -- (9.8,0);
+  \draw [thick, gray] (1.2,-0.1)  -- (1.2,0.1);
+  \draw [thick, gray] (9.8,-0.1)  -- (9.8,0.1);
+  \draw [thick, cyclamen] (1.2,0)  -- (9.8,-2);
+  \draw [thick, cyclamen] (7,0) to [out=-90, in=50] (6.6,-1.23);
+  % Nodes
+  \node at (0,0) {$2a$};
+  \node at (5.5,0.4) {$L$};
+  \node [cyclamen] at (5.5,-0.4) {$\theta$};
+  \node [rotate=-90] at (10.2,0) {screen};
+\end{tikzpicture}
+\caption{Fraunhofer diffraction.}\label{fig:fenditure}
+}
+\end{figure}
+
+Once again, $\theta$, which must be evenly distributed on half sphere, can be
+generated only as a function of a variable $x$ uniformely distributed between
+0 and 1. Therefore:
+
+\begin{align*}
+  \frac{d^2 P}{d\omega^2} = const = \frac{1}{2 \pi}
+  &\thus d^2 P = \frac{1}{2 \pi} d\omega^2 =
+  \frac{1}{2 \pi} d\phi \sin{\theta} d\theta \\
+  &\thus \frac{dP}{d\theta} = \int_0^{2 \pi} d\phi \frac{1}{2 \pi} \sin{\theta}
+  = \frac{1}{2 \pi} \sin{\theta} \, 2 \pi = \sin{\theta}
+\end{align*}
+
+\begin{align*}
+  \theta = \theta (x) &\thus
+  \frac{dP}{d\theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d\theta} \right|
+  = \left. \frac{dP}{dx} \middle/ \, \left| \frac{d\theta}{dx} \right| \right. 
+  \\
+  &\thus \sin{\theta} = \left. 1 \middle/ \, \left|
+  \frac{d\theta}{dx} \right| \right. 
+\end{align*}
+
+Since $\theta \in [0, \pi/2]$, then the absolute value symbol can be omitted:
+
+\begin{align*}
+  \frac{d\theta}{dx} = \frac{1}{\sin{\theta}}
+  &\thus d\theta \sin(\theta) = dx
+  \\
+  &\thus - \cos (\theta') |_{0}^{\theta} = x(\theta) - x(0) = x - 0 = x
+  \\
+  &\thus - \cos(\theta) + 1 =x
+  \\
+  &\thus \theta = \text{acos} (1 -x)
+\end{align*}
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..c3feca5
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,10 @@
+with import <nixpkgs> { };
+
+stdenv.mkDerivation {
+  name = "jeypsi";
+
+  GMP_PATH = "${gmp}/lib";
+
+  buildInputs = with pkgs; [ gdb gsl gmp pkgconfig ];
+  shellHook = "exec fish";
+}