initial commit

This commit is contained in:
Michele Guerini Rocco 2020-03-06 02:24:32 +01:00
commit cf27610d53
69 changed files with 6867 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
bin/
*.pdf
.stignore
todo

365
ex-1/main.c Normal file
View File

@ -0,0 +1,365 @@
#include <stdio.h>
#include <stdlib.h>
#include <gsl/gsl_min.h>
#include <gsl/gsl_sum.h>
#include <gsl/gsl_roots.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_integration.h>
/* Function that compare doubles for sorting:
* x > y 1
* x == y 0
* x < y -1
*/
int cmp_double (const void *xp, const void *yp) {
double x = *(double*)xp,
y = *(double*)yp;
return x > y ? 1 : -1;
}
/* This is a wrapper needed by `landau_cdf` because
* the numerical integration expects a function
* with parameters.
*/
double landau_pdf(double x, void* params) {
return gsl_ran_landau_pdf(x);
}
/* The cumulative function of the Landau distribution
* calculated by numerical integration.
*/
double landau_cdf(double x, void* params) {
// create the integrand
gsl_function pdf;
pdf.function = &landau_pdf;
pdf.params = NULL;
// set up the integration
double res, err;
size_t iter = 500;
gsl_integration_workspace* w =
gsl_integration_workspace_alloc(iter);
// clip values too small
if (x < -200) x = -200;
// We integrate the pdf in [x, +∞) instead
// of (-∞, x] to avoid a singularity and
// then return 1 - P.
gsl_integration_qagiu(
&pdf, // integrand
x, // lower bound
1e-10, 1e-6, // abs and rel error
iter, // max iteration
w, // "workspace"
&res, &err); // result, abs error
// free the memory
gsl_integration_workspace_free(w);
return 1 - res;
}
/* Another wrapper: this time it is needed by
* `landau_qdf` to solve the equation:
* `landau_cdf(x) = p0` for x
*/
double landau_cdf_root(double x, void* params) {
double p0 = *(double*)params;
return p0 - landau_cdf(x, NULL);
}
/* The quantile function (inverse CDF) of the Landau
* distribution calculated by numerical root method.
*/
double landau_qdf(double p0) {
// create function
gsl_function cdf;
cdf.function = &landau_cdf_root;
cdf.params = &p0;
// use the Brent method
gsl_root_fsolver* s =
gsl_root_fsolver_alloc(gsl_root_fsolver_brent);
// search interval
double low = -1000, // lower value
upp = 100000; // upper value
gsl_root_fsolver_set(s, &cdf, low, upp);
// iterative search
size_t iter = 1000; // max iteration
int stat = GSL_CONTINUE;
for (size_t i=0; stat==GSL_CONTINUE && i<iter; i++) {
stat = gsl_root_fsolver_iterate(s);
low = gsl_root_fsolver_x_lower(s);
upp = gsl_root_fsolver_x_upper(s);
stat = gsl_root_test_interval(low, upp, 0, 0.01);
}
return gsl_root_fsolver_root(s);
}
/* Kolmogorov distribution CDF
* for sample size n and statistic D
*/
double kolmogorov_cdf(double D, int n) {
double x = sqrt(n) * D;
// trick to reduce estimate error
x += 1/(6 * sqrt(n)) + (x - 1)/(4 * n);
// calculate the first n_terms of the series
// Σ_k=1 exp(-(2k - 1)²π²/8x²)
size_t n_terms = 30;
double *terms = calloc(n_terms, sizeof(double));
for (size_t k=1; k<n_terms; k++) {
terms[k] = exp(-pow((2*(double)k - 1)*M_PI/x, 2) / 8);
}
// do a transform to accelerate the convergence
double sum, abserr;
gsl_sum_levin_utrunc_workspace* s = gsl_sum_levin_utrunc_alloc(n_terms);
gsl_sum_levin_utrunc_accel(terms, n_terms, s, &sum, &abserr);
fprintf(stderr, "\n# Kolmogorov CDF\n");
fprintf(stderr, "accel sum: %f\n", sum);
fprintf(stderr, "plain sum: %f\n", s->sum_plain);
fprintf(stderr, "err: %f\n", abserr);
gsl_sum_levin_utrunc_free(s);
free(terms);
return sqrt(2*M_PI)/x * sum;
}
/* This is a wrapper needed by `numeric_mode` because
* the minimization expects a function to be minimized and not
* maximized.
*/
double neg_landau_pdf(double x, void* params) {
return (-1) * gsl_ran_landau_pdf(x);
}
/* Compute numerically the mode of the Landau distribution.
*/
double numeric_mode (double min, double max) {
// create funtion
gsl_function pdf;
pdf.function = &neg_landau_pdf;
pdf.params = NULL;
// initialize minimization
double guess = 0;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min, max);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
// Free memory
gsl_min_fminimizer_free (s);
return guess;
}
/* This is the function to be minimized in `numeric_FWHM`.
*/
double abs_landau_pdf(double x, void* params_) {
double* params = ((double *) params_);
return fabs(gsl_ran_landau_pdf(x) - params[0]);
}
/* Compute numerically the FWHM of the Landau distribution.
*/
double numeric_fwhm (double min_lim, double mode, double max_lim) {
// create funtion
gsl_function pdf;
pdf.function = &abs_landau_pdf;
double params [1];
params[0]= gsl_ran_landau_pdf(mode)/2;
pdf.params = params;
// initialize minimization for x₋
double guess = mode - 1;
double min, max;
int iter = 0;
int max_iter = 100;
double prec = 1e-7;
int status;
const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection;
gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T);
gsl_min_fminimizer_set(s, &pdf, guess, min_lim, mode);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_low = guess;
// initialize minimization for x₊
guess = mode + 1;
gsl_min_fminimizer_set(s, &pdf, guess, mode, max_lim);
// minimization
do {
iter++;
status = gsl_min_fminimizer_iterate (s);
guess = gsl_min_fminimizer_x_minimum (s);
min = gsl_min_fminimizer_x_lower (s);
max = gsl_min_fminimizer_x_upper (s);
status = gsl_min_test_interval (min, max, prec, prec);
} while (status == GSL_CONTINUE && iter < max_iter);
double x_upp = guess;
// Free memory
gsl_min_fminimizer_free (s);
return x_upp - x_low;
}
/* Here we generate random numbers in a uniform
* range and by using the quantile we map them
* to a Landau distribution. Then we generate an
* histogram to check the correctness.
*/
int main(int argc, char** argv) {
// initialize an RNG
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
// prepare histogram
size_t samples = 100000;
double* sample = calloc(samples, sizeof(double));
size_t bins = 40;
double min = -20;
double max = 20;
gsl_histogram* hist = gsl_histogram_alloc(bins);
gsl_histogram_set_ranges_uniform(hist, min, max);
/* sample points from the Landau distribution
* and fill the histogram
*/
double x;
for(size_t i=0; i<samples; i++) {
x = gsl_ran_landau(r);
sample[i] = x;
gsl_histogram_increment(hist, x);
}
// sort the sample
qsort(sample, samples, sizeof(double), &cmp_double);
// calculate Kolmogorov-Smirnov D
double D = 0;
double d;
for(size_t i=0; i<samples; i++) {
d = fabs(landau_cdf(sample[i], NULL) - ((double)i+1)/samples);
if (d > D)
D = d;
}
// Kolmogorov-Smirnov test
double beta = kolmogorov_cdf(D, samples);
fprintf(stderr, "\n# K-S test results\n");
fprintf(stderr, "D: %f\n", D);
fprintf(stderr, "α: %g\n", 1 - beta);
// Mode comparison
//
// find the bin with the maximum number of events
double mode_o, maxbin = 0;
double f_mode_o = 0;
double m1, m2 = 0;
for(size_t i=0; i<bins; i++) {
m1 = hist->bin[i];
if (m1 > m2){
m2 = m1;
maxbin = (double)i;
f_mode_o = hist->bin[i];
}
}
fprintf(stderr, "\nstep:%.2f\n ", (max - min)/bins);
f_mode_o = f_mode_o/samples;
mode_o = min + (maxbin + 0.5)*(max - min)/bins;
// print the results
double mode_e = numeric_mode(min, max);
fprintf(stderr, "\n# Numeric mode\n");
fprintf(stderr, "expected mode: %.7f\n", mode_e);
fprintf(stderr, "observed mode: %.3f\n", mode_o);
// FWHM comparison
//
// find the bins x₋ and x₊
double half = f_mode_o*samples/2;
m2 = samples;
double x_low = 0;
double x_upp = 0;
double diff;
for(size_t i=0; i<maxbin; i++) {
m1 = hist->bin[i];
diff = fabs(m1 - half);
if (diff < m2){
m2 = diff;
x_low = (double)i;
}
}
m2 = samples;
for(size_t i=maxbin; i<bins; i++) {
m1 = hist->bin[i];
diff = fabs(m1 - half);
if (diff < m2){
m2 = diff;
x_upp = (double)i;
}
}
x_low = min + (x_low + 0.5)*(max - min)/bins;
x_upp = min + (x_upp + 0.5)*(max - min)/bins;
double fwhm_o = x_upp - x_low;
// print the results
fprintf(stderr, "\n# Numeric FWHM\n");
double fwhm_e = numeric_fwhm(min, mode_e, max);
fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e);
fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o);
// print the counts
// gsl_histogram_fprintf(stdout, hist, "%g", "%g");
// clean up and exit
gsl_histogram_free(hist);
gsl_rng_free(r);
free(sample);
return EXIT_SUCCESS;
}

9
ex-1/pdf-plot.py Executable file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
from pylab import *
import sys
x, y = loadtxt(sys.stdin, unpack=True)
title('Landau distribution', loc='right')
plot(x, y, color='#92182b')
show()

18
ex-1/pdf.c Normal file
View File

@ -0,0 +1,18 @@
#include <stdio.h>
#include <gsl/gsl_randist.h>
int main(int argc, char** argv) {
size_t n = 1000;
double
left = -10,
right = +10,
dx = (right - left)/n;
double x = left;
for (size_t i = 0; i < n; i++) {
x += dx;
printf("%f %f\n", x, gsl_ran_landau_pdf(x));
}
return EXIT_SUCCESS;
}

10
ex-1/plot.py Executable file
View File

@ -0,0 +1,10 @@
#!/usr/bin/env python
from pylab import *
import sys
a, b, f = loadtxt(sys.stdin, unpack=True)
title('Landau distribution sampling', loc='right')
hist(a, np.insert(b, 0, a[0]), weights=f/sum(f))
annotate(f'N={sum(f)}\nbins={len(f)}', (50, 0.2))
show()

511
ex-2/fancier.c Normal file
View File

@ -0,0 +1,511 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <math.h>
#include <gmp.h>
/* The Euler-Mascheroni constant is here computed to
* arbitrary precision using GMP rational numbers
* with the formula:
*
* γ = A(N)/B(N) - C(N)/B(N)² - log(N)
*
* with:
*
* A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k)
* B(N) = Σ_(k = 0)^(k_max) (N^k/k!)
* C(N) = 1/4N Σ_(k = 0)^(2N) ((2k)!)^3/((k!)^4*(16N))^2k
* H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number
*
* where N is computed from D as written below and k_max is
* currently 5*N, chosen in a completely arbitrary way.
*
* source: http://www.numberworld.org/y-cruncher/internals/formulas.html
*
*/
/* `mpq_dec_str(z, n)`
* returns the decimal representation of the
* rational z as a string, up to n digits.
*/
char* mpq_dec_str(mpq_t z, size_t mantissa_size) {
mpz_t num, den, quot, rem;
mpz_inits(num, den, quot, rem, NULL);
// calculate integral part
mpq_get_num(num, z);
mpq_get_den(den, z);
mpz_tdiv_qr(quot, rem, num, den);
// store the integral part
size_t integral_size = mpz_sizeinbase(quot, 10);
char* repr = calloc(integral_size + 1 + mantissa_size + 1, sizeof(char));
mpz_get_str(repr, 10, quot);
// add decimal point
repr[integral_size] = '.';
// calculate the mantissa
mpz_t cur;
mpz_init(cur);
size_t digit;
for (size_t i = 0; i < mantissa_size; i++) {
// calculate i-th digit as:
// digit = (remainder * 10) / denominator
// new_ramainder = (remainder * 10) % denominator
mpz_set(cur, rem);
mpz_mul_ui(cur, rem, 10);
mpz_tdiv_qr(quot, rem, cur, den);
// store digit in mantissa
digit = mpz_get_ui(quot);
sprintf(repr + integral_size + 1 + i, "%ld", digit);
}
// free memory
mpz_clears(num, den, quot, rem, cur, NULL);
return repr;
}
void mpq_fprintf(FILE *fp, size_t digits, char* fmt, ...) {
va_list args;
va_start(args, fmt);
char* str;
char fmtc[2] = "%x";
for (char* p = fmt; *p; p++) {
if (*p != '%') {
fprintf(fp, "%c", *p);
continue;
}
switch (*++p) {
case 's':
str = va_arg(args, char*);
fputs(str, fp);
break;
case 'l':
if (*++p == 'd')
fprintf(fp, "%ld", va_arg(args, size_t));
break;
case 'q':
str = mpq_dec_str(*va_arg(args, mpq_t*), digits);
fputs(str, fp);
free(str);
break;
default:
fmtc[1] = *p;
gmp_fprintf(fp, fmtc, *va_arg(args, mpq_t*));
}
}
va_end(args);
}
/* `mpq_log2(z, D)`
* calculate log(2) to D decimal places
* using the series
*
* log(2) = Σ_{k=1} 1/(k 2^k)
*
* The error of the n-th partial sum is
* estimated as
*
* Δn = 1/(n(n+2) 2^(n-1)).
*
* So to fix the first D decimal places
* we evaluate up to n terms such that
*
* Δn < 1/10^D
*/
void mpq_log2(mpq_t rop, size_t digits) {
mpq_t sum, term;
mpq_inits(sum, term, NULL);
fprintf(stderr, "[mpq_log2] decimal digits: %ld\n", digits);
/* calculate n, the number of required terms */
size_t n;
mpz_t temp, prec, width; mpz_inits(temp, prec, width, NULL);
// prec = 10^digits
mpz_ui_pow_ui(prec, 10, digits + 2);
for (n = 1;; n++) {
// width = n*(n + 2)*2^(n - 1)
mpz_set_ui(width, n);
mpz_set_ui(temp, n);
mpz_add_ui(temp, temp, 2);
mpz_mul(width, width, temp);
mpz_sub_ui(temp, temp, 3);
mpz_mul_2exp(width, width, mpz_get_ui(temp));
if (mpz_cmp(width, prec) > 0) break;
}
fprintf(stderr, "[mpq_log2] series terms: %ld\n", n);
/* calculate series */
for (size_t i=1; i<n+1; i++) {
mpq_set_ui(term, 1, 1);
mpz_ui_pow_ui(mpq_denref(term), 2, i);
mpz_mul_ui(mpq_denref(term), mpq_denref(term), i);
mpq_add(sum, sum, term);
}
mpq_fprintf(stderr, digits > 50 ? 50 : digits,
"[mpq_log2] log(2)=%q...\n", &sum);
mpq_set(rop, sum);
// free memory
mpq_clears(sum, term, NULL);
mpz_clears(temp, prec, width, NULL);
}
/* `mpq_log_ui(z, x, D)`
* calculates the logarithm of x up
* to D decimal places using the formula
*
* log(x) = log(a(x) * 2^(n-1))
* = log(a) + (n-1) log(2)
*
* where 1<a<2 and n is the number of binary
* digits of x.
*
* log(a) is the computed from
*
* log((1+y)/(1-y)) = 2Σ_{k=0} y^(2k+1)/(2k+1)
*
* where y = (a-1)/(a+1).
*
* The error of the n-th partial sum is
* estimated as:
*
* Δ(n, y) = 4y^(2n + 3) /
* (4n^2y^4 - 8n^2y^2 + 4n^2 + 4ny^4 - 12ny^2 + 8n + y^4 - 4y^2 + 3)
*
* So to fix the first D decimal places
* we evaluate up to n terms such that
*
* Δ(n, y) < 1/10^D
*/
void mpq_log_ui(mpq_t rop, size_t x, size_t digits) {
/* calculate `a` such that x = a⋅2^(m-1) */
mpq_t a; mpq_init(a);
mpz_t xz, power; mpz_inits(xz, power, NULL);
mpz_set_ui(xz, x);
// m = digits in base 2 of x
size_t m = mpz_sizeinbase(xz, 2);
mpz_ui_pow_ui(power, 2, m - 1);
mpq_set_num(a, xz);
mpq_set_den(a, power);
fprintf(stderr, "[mpq_log_ui] binary digits: %ld\n", m);
fprintf(stderr, "[mpq_log_ui] a=%g\n", mpq_get_d(a));
/* calculate y = (a-1)/(a+1) */
mpq_t y, temp; mpq_inits(y, temp, NULL);
mpq_set(temp, a);
// decrement: a/b -= b/b
mpz_submul_ui(mpq_numref(temp),
mpq_denref(temp), 1);
mpq_set(y, temp);
mpq_set(temp, a);
// increment: a/b += b/b
mpz_addmul_ui(mpq_numref(temp),
mpq_denref(temp), 1);
mpq_div(y, y, temp);
fprintf(stderr, "[mpq_log_ui] y=%g\n", mpq_get_d(y));
/* calculate number of required terms:
* find smallest n such that Δ(n, y) < 1/10^digits.
*/
mpq_t y4, y2, y2n, t1, t2;
mpq_inits(y4, y2, y2n, t1, t2, NULL);
// y4 = y^4
mpz_pow_ui(mpq_numref(y4), mpq_numref(y), 4);
mpz_pow_ui(mpq_denref(y4), mpq_denref(y), 4);
// y2 = y^2
mpz_pow_ui(mpq_numref(y2), mpq_numref(y), 2);
mpz_pow_ui(mpq_denref(y2), mpq_denref(y), 2);
size_t n;
size_t p1, p2, p3;
for(n = 0;; n++) {
// polynomials in n
p1 = 4*pow(n, 2) + 4*n + 1;
p2 = 8*pow(n, 2) + 12*n - 4;
p3 = 4*pow(n, 2) + 3;
// t1 = y4 * p1
mpz_mul_ui(mpq_numref(t1), mpq_numref(y4), p1);
mpq_canonicalize(t1);
// t2 = y2 * p2
mpz_mul_ui(mpq_numref(t2), mpq_numref(y2), p2);
mpq_canonicalize(t2);
// t1 -= t2
mpq_sub(t1, t1, t2);
// t1 += p3
mpz_addmul_ui(mpq_numref(t1), mpq_denref(t1), p3);
// y2n = 4*y^(2n + 3)
mpq_set(y2n, y);
mpz_pow_ui(mpq_numref(y2n), mpq_numref(y2n), 2*n+3);
mpz_pow_ui(mpq_denref(y2n), mpq_denref(y2n), 2*n+3);
mpz_mul_ui(mpq_numref(y2n), mpq_numref(y2n), 4);
mpq_canonicalize(y2n);
// t2 = 10^digits * y2n
mpq_set_ui(t2, 1, 1);
mpz_ui_pow_ui(mpq_numref(t2), 10, digits + 1);
mpq_mul(t2, t2, y2n);
if (mpq_cmp(t1, t2) > 0) break;
}
fprintf(stderr, "[mpq_log_ui] series terms: %ld\n", n);
fprintf(stderr, "[mpq_log_ui] decimal digits: %ld\n", digits);
/* calculate the series of
* log(A) = log((1+y)/(1-y)) = 2 Σ_{k=0} y^(2k+1)/(2k+1)
*/
mpq_t sum, term; mpq_inits(sum, term, NULL);
for (size_t i = 0; i <= n; i++) {
// numerator: y^(2i+1)
mpq_set(temp, y);
mpz_pow_ui(mpq_numref(temp), mpq_numref(temp), 2*i + 1);
mpz_pow_ui(mpq_denref(temp), mpq_denref(temp), 2*i + 1);
mpq_canonicalize(temp);
mpq_set(term, temp);
// denominator: 2i+1
mpq_set_ui(temp, 2*i + 1, 1);
mpq_div(term, term, temp);
// sum the term
mpq_add(sum, sum, term);
}
// multiply by 2
mpz_mul_ui(mpq_numref(sum), mpq_numref(sum), 2);
mpq_canonicalize(sum);
// calculate log(2)*(m-1)
mpq_t log2; mpq_init(log2);
mpq_log2(log2, digits);
mpz_mul_ui(mpq_numref(log2), mpq_numref(log2), m-1);
mpq_canonicalize(log2);
// series += log(2)*(m-1)
mpq_add(sum, sum, log2);
mpq_set(rop, sum);
mpq_fprintf(stderr, digits > 50 ? 50 : digits,
"[mpq_log_ui] log(%ld)=%q...\n", x, sum);
// free memory
mpq_clears(a, y, temp, sum, term, log2,
y4, y2, y2n, t1, t2, NULL);
mpz_clears(xz, power, NULL);
}
/* A series */
void a_series(mpq_t rop, size_t N) {
mpq_t sum, term, factq, harm, recip;
mpz_t fact, power, stop;
mpq_inits(sum, term, factq, harm, recip, NULL);
mpz_inits(fact, power, stop, NULL);
mpz_set_ui(stop, N);
for (size_t k = 0; k < 5*N; k++) {
mpz_fac_ui(fact, k);
mpz_pow_ui(power, stop, k);
mpq_set_z(term, power);
mpq_set_z(factq, fact);
mpq_div(term, term, factq);
mpq_mul(term, term, term);
if (k > 0)
mpq_set_ui(recip, 1, k);
mpq_add(harm, harm, recip);
mpq_mul(term, term, harm);
mpq_add(sum, sum, term);
}
mpq_set(rop, sum);
// free memory
mpq_clears(sum, term, factq, harm, recip, NULL);
mpz_clears(fact, power, stop, NULL);
}
/* B series */
void b_series(mpq_t rop, size_t N){
mpq_t sum, term, factq;
mpz_t fact, power, stop;
mpq_inits(sum, term, factq, NULL);
mpz_inits(fact, power, stop, NULL);
mpz_set_ui(stop, N);
for (size_t k = 0; k < 5*N; k++) {
mpz_fac_ui(fact, k);
mpz_pow_ui(power, stop, k);
mpq_set_z(term, power);
mpq_set_z(factq, fact);
mpq_div(term, term, factq);
mpq_mul(term, term, term);
mpq_add(sum, sum, term);
}
mpq_set(rop, sum);
// free memory
mpq_clears(sum, term, factq, NULL);
mpz_clears(fact, power, stop, NULL);
}
/* C series */
void c_series(mpq_t rop, size_t N) {
mpq_t sum, term, stop2, fact2q;
mpz_t fact1, fact2, power, stop1;
mpq_inits(sum, term, stop2, fact2q, NULL);
mpz_inits(fact1, fact2, power, stop1, NULL);
mpq_set_ui(stop2, 4*N, 1);
for (size_t k = 0; k <= 2*N; k++) {
mpz_fac_ui(fact1, 2*k);
mpz_pow_ui(fact1, fact1, 3);
mpz_fac_ui(fact2, k);
mpz_pow_ui(fact2, fact2, 4);
mpz_set_ui(stop1, 16*N);
mpz_pow_ui(stop1, stop1, 2*k);
mpz_mul(fact2, fact2, stop1);
mpq_set_z(term, fact1);
mpq_set_z(fact2q, fact2);
mpq_div(term, term, fact2q);
mpq_add(sum, sum, term);
}
mpq_div(sum, sum, stop2);
mpq_set(rop, sum);
// free memory
mpq_clears(sum, term, stop2, fact2q, NULL);
mpz_clears(fact1, fact2, power, stop1, NULL);
}
int main(int argc, char** argv) {
/* if no argument is given, show the usage */
if (argc != 2) {
fprintf(stderr, "usage: %s D\n", argv[0]);
fprintf(stderr, "Computes γ up to D decimal places.\n");
return EXIT_FAILURE;
}
mpq_t exact, indicator;
mpq_inits(exact, indicator, NULL);
// value of γ up to 500 digits
mpq_set_str(
exact,
"47342467929426395252720948664321583815681737620205"
"78046578043524355342995304421105234825234769529727"
"60710541774258570118818437511973945213031180449905"
"77067110892216494927227098092412641670668896104172"
"59380467495735805599918127376547804186055507379270"
"09659478275476110597673948659342592173551684412254"
"47940426607883070678658320829056222506449877887289"
"02099638646695565284675455912794915138438516540912"
"22061900908485016328626399040675802962645141413722"
"567840383761005003563012969560659130635723002086605/"
"82018681765164048732047980836753451023877954010252"
"60062364748361667340168652059998708337602423525120"
"45225158774173869894826877890589130978987229877889"
"33367849273189687823618289122425446493605087108634"
"04387981302669131224273324182166778131513056804533"
"58955006355665628938266331979307689540884269372365"
"76288367811322713649805442241450184023209087215891"
"55369788474437679223152173114447113970483314961392"
"48250188991402851129033493732164230227458717486395"
"514436574417275149404197774547389507462779807727616",
10);
// number with decimal expansion equal to
// 0123456789 periodic: useful to count digits
mpq_set_str(
indicator,
"137174210/1111111111",
10);
// number of decimal places
size_t D = atol(argv[1]);
// number of terms
size_t N = floor(1.0/4 * log(10) * (double)D);
/* calculate series */
mpq_t A, B, C;
mpq_inits(A, B, C, NULL);
a_series(A, N);
b_series(B, N);
c_series(C, N);
/* calculate γ... */
mpq_t gamma, corr, logn;
mpq_inits(gamma, corr, logn, NULL);
mpq_log_ui(logn, N, D);
mpq_div(gamma, A, B);
mpq_div(corr, C, B);
mpq_div(corr, corr, B);
mpq_sub(gamma, gamma, corr);
mpq_sub(gamma, gamma, logn);
/* calculate difference */
mpq_t diff;
mpq_init(diff);
mpq_sub(diff, gamma, exact);
/* print results */
size_t d = D+1 > 50 ? 50 : D+1;
fprintf(stderr, "[main] N: %ld\n", N);
mpq_fprintf(stderr, d, "[main] approx:\t%q...\n", gamma);
mpq_fprintf(stderr, d, "[main] exact:\t%q...\n", exact);
mpq_fprintf(stderr, d, "[main] diff:\t%q...\n", diff);
mpq_fprintf(stderr, d, "[main] digits:\t%q\n", indicator);
mpq_fprintf(stdout, D+1, "%q\n", gamma);
// free memory
mpq_clears(A, B, C, gamma, corr, logn,
exact, indicator, diff, NULL);
return EXIT_SUCCESS;
}

89
ex-2/fancy.c Normal file
View File

@ -0,0 +1,89 @@
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
// The Euler-Mascheroni constant is computed through the formula:
//
// γ = A(N)/B(N) - ln(N)
//
// with:
//
// A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k)
// B(N) = Σ_(k = 0)^(k_max) (N^k/k!)
// H(k) = Σ_(j = 1)^(k) (1/k)
//
// where N is computed from D as written below and k_max is the value
// at which there is no difference between two consecutive terms of
// the sum because of double precision.
//
// source: http://www.numberworld.org/y-cruncher/internals/formulas.html
// Partial harmonic sum h
double harmonic_sum(double n) {
double sum = 0;
for (double k = 1; k < n+1; k++) {
sum += 1/k;
}
return sum;
}
// A series
double a_series(int N) {
double sum = 0;
double prev = -1;
for (double k = 1; sum != prev; k++) {
prev = sum;
sum += pow(((pow(N, k))/(tgamma(k+1))), 2) * harmonic_sum(k);
}
return sum;
}
// B series
double b_series(int N){
double sum = 0;
double prev = -1;
for (double k = 0; sum != prev; k++) {
prev = sum;
sum += pow(((pow(N, k))/(tgamma(k+1))), 2);
}
return sum;
}
double c_series(int N) {
double sum = 0;
for (double k = 0; k < N; k++) {
sum += pow(tgamma(2*k + 1), 3)/(pow(tgamma(k + 1), 4) * pow(16.0*N, (int)2*k));
}
return sum/(4.0*N);
}
// Takes in input the number D of desired correct decimals
// Best result obtained with D = 15, N = 10
int main(int argc, char** argv) {
double exact =
0.57721566490153286060651209008240243104215933593992;
// If no argument is given is input, an error signal is displayed
// and the program quits
if (argc != 2) {
fprintf(stderr, "usage: %s D\n", argv[0]);
fprintf(stderr, "Computes γ up to D decimal places.\n");
return EXIT_FAILURE;
}
int N = floor(2.0 + 1.0/4 * log(10) * (double)atoi(argv[1]));
double A = a_series(N);
double B = b_series(N);
double C = c_series(N);
double gamma = A/B - C/(B*B) - log(N);
printf("N: %d\n", N);
printf("approx:\t%.30f\n", gamma);
printf("true:\t%.30f\n", exact);
printf("diff:\t%.30f\n", fabs(gamma - exact));
printf("\t 123456789 123456789 123456789\n");
return EXIT_SUCCESS;
}

43
ex-2/limit.c Normal file
View File

@ -0,0 +1,43 @@
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
/* Compute the Euler_Mascheroni constant through the formula:
*
* γ = lim_{M +inf} sum_{k = 1}^{M} bin{M, k} (-1)^k / k * ln(Γ(k+1))
*
* Best result is obtained for M = 41.
* Values from 1 to 100 were checked.
*
*/
double exact = 0.577215664901532860;
int main() {
double best = 0, num = 0;
double sum = 0, prev;
for (double M = 1; M < 60; M++) {
prev = sum;
sum = 0;
for (double k = 1; k <= M; k++) {
sum += (tgamma(M + 1))/(tgamma(k + 1)*tgamma(M - k + 1)) * pow(-1, k)/k
* log(tgamma (k + 1));
}
printf("M: \t%d\t", (int)M);
printf("diff:\t%.15f\n", fabs(sum - exact));
if ((fabs(sum - exact) < fabs(prev - exact)) &&
(fabs(sum - exact) < fabs(best - exact))){
best = sum;
num = M;
}
}
printf("M: %d\n", (int)num);
printf("approx:\t%.15f\n", best);
printf("true:\t%.15f\n", exact);
printf("diff:\t%.15f\n", fabs(best - exact));
printf("\t 123456789_12345\n");
return EXIT_SUCCESS;
}

57
ex-2/naive.c Normal file
View File

@ -0,0 +1,57 @@
#include <stdio.h>
#include <math.h>
/* Naive approach using the definition
* of γ as the difference between log(n)
* and the Rieman sum of its derivative (1/n).
*
* The result is:
*
* approx: 0.57721 56648 77325
* true: 0.57721 56649 01533
* diff: 0.00000 00000 24207
*
* The convergence is logarithmic and so
* slow that the double precision runs out
* at the 10th decimal digit.
*
*/
double exact = 0.577215664901532860;
double partial_sum(size_t n) {
double sum = 0;
for (double k = 1; k <= n; k++) {
sum += 1/k;
}
return sum - log(n);
}
int main () {
size_t prev_n, n = 2;
double prev_diff, diff=1;
double approx;
/* Compute the approximation
* and the difference from the true value.
* Stop when the difference starts increasing
* due to roundoff errors.
*
*/
do {
prev_n = n;
n *= 10;
prev_diff = diff;
approx = partial_sum(n);
diff = fabs(exact - approx);
printf("n:\t%.0e\t", (double)n);
printf("diff:\t%e\n", diff);
} while (diff < prev_diff);
printf("n:\t%.0e\n", (double)prev_n);
printf("approx:\t%.15f\n", approx);
printf("true:\t%.15f\n", exact);
printf("diff:\t%.15f\n", prev_diff);
printf("\t 123456789_12345\n");
}

63
ex-2/recip.c Normal file
View File

@ -0,0 +1,63 @@
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
/* Compute the Euler-Mascheroni constant through the formula
* of the reciprocal Γ function:
*
* 1/Γ(z) = z*e^{γz} pro_{k = 1}^{+ inf} ((1 + z/k) e^{- z/k})
*
* Thus:
*
* γ = - 1/z * ln(z*Γ(z)*pro{k = 1}^{+ inf} (1 + z/k) e^{- z/k})
*
* Product stops when there is no difference between two consecutive
* terms of the productory due to limited precision.
*
* Range of z given in input as min, max and step.
*
*/
double exact = 0.577215664901532860;
int main(int argc, char** argv) {
double min = (double)atof(argv[1]);
double max = (double)atof(argv[2]);
double step = (double)atof(argv[3]);
double prev_gamma, gamma = 0;
double Z;
double best = 0;
for (double z = min; z <= max; z += step) {
prev_gamma = gamma;
double pro = 1;
double prev = -1;
for (double k = 1; pro != prev; k++) {
prev = pro;
pro *= (1 + z/k) * exp(- z/k);
if (z == 9 && pro == prev) printf("k:\t%.0f\n", k);
}
double gamma = - 1/z * log(z*tgamma(z)*pro);
printf("z:\t%.2f\t", z);
printf("diff:\t%.20f\n", fabs(gamma - exact));
if ((fabs(gamma - exact) < fabs(prev_gamma - exact)) &&
(fabs(gamma - exact) < fabs(best - exact))){
best = gamma;
Z = z;
}
}
printf("z:\t%.2f\n", Z);
printf("approx:\t%.20f\n", best);
printf("true:\t%.20f\n", exact);
printf("diff:\t%.20f\n", best - exact);
printf("\t 123456789 123456789 123456789\n");
return EXIT_SUCCESS;
}

292
ex-3/chisquared.c Normal file
View File

@ -0,0 +1,292 @@
#include "common.h"
#include "chisquared.h"
#include <gsl/gsl_multifit_nlinear.h>
#include <gsl/gsl_blas.h>
/* Minimisation function
*
* The χ² is defined as
*
* χ² = |f|²
* = Σi fi²
*
* where fi = (Oi - Ei)/Ei are the residuals.
*
* This function takes the parameters (α,β,γ) as
* a gsl_vector `par`, the sample `data` and a
* gsl_vector `f`, that will store the results
* `fi`.
*/
int chisquared_f(const gsl_vector *par,
void *data_, gsl_vector *f) {
struct hist data = *((struct hist*) data_);
struct event event;
double max, min;
double delta_th, delta_ph;
double expected, observed;
/* Loop over the (i,j) bins of the 2D histogram.
* The index k of the k-th component of f is actually:
*
* k = i * φbins + j
*
* Note: this is called the C-order.
*/
for (size_t i = 0; i < data.h->nx; i++)
for (size_t j = 0; j < data.h->ny; j++) {
/* Get the bin ranges and width for θ,φ.
* The event is supposed to happen in the
* midpoint of the range.
*/
gsl_histogram2d_get_xrange(data.h, i, &max, &min);
event.th = (max + min)/2;
delta_th = (max - min);
gsl_histogram2d_get_yrange(data.h, j, &max, &min);
event.ph = (max + min)/2;
delta_ph = (max - min);
/* O = observed number of events in the k-th bin
* E = expected number of events in k-th bin
*
* E is given by:
*
* /---> total number of events
* /
* E = N F(α,β,γ; θ,φ) Δθ Δφ sin(θ)
* \__________________________/
* |
* `-> probability
*/
observed = gsl_histogram2d_get(data.h, i, j);
expected = data.n * distr(par, &event)
* delta_th * delta_ph * sin(event.th);
/* Return an error (invalid domain) if
* the current bin is empty. That would
* be division by zero.
*/
if (expected < 0) {
//fprintf(stderr, "[warning] bin %ld:%ld p<0 (%.2g, %.2g, %.2g)\n",
// i, j, gsl_vector_get(par, 0),
// gsl_vector_get(par, 1),
// gsl_vector_get(par, 2));
expected = 1e-6;
}
gsl_vector_set(
f, i * data.h->ny + j,
(observed - expected)/sqrt(expected));
}
return GSL_SUCCESS;
}
/* Jacobian function
*
* The gradient of the χ² function is:
*
* χ² = |f|²
* = 2 J^Tf
*
* where J is the jacobian of the (vector)
* function f. Its entries are given by
*
* 2 Jij = 2 fi/xj
* = -(Oi + Ei)/Ei 1/Fi (Fi)j
*
*/
int chisquared_df(const gsl_vector *par,
void *data_, gsl_matrix *jac) {
struct hist data = *((struct hist*) data_);
struct event event;
double max, min;
double delta_th, delta_ph, prob;
double expected, observed;
gsl_vector *grad = gsl_vector_alloc(3);
for (size_t i = 0; i < data.h->nx; i++)
for (size_t j = 0; j < data.h->ny; j++) {
/* Get the bin ranges for θ,φ.
* The event is supposed to happen in the
* midpoint of the range.
*/
gsl_histogram2d_get_xrange(data.h, i, &max, &min);
event.th = (max + min)/2;
delta_th = (max - min);
gsl_histogram2d_get_yrange(data.h, j, &max, &min);
event.ph = (max + min)/2;
delta_ph = (max - min);
prob = distr(par, &event);
observed = gsl_histogram2d_get(data.h, i, j);
expected = data.n * prob * delta_th * delta_ph * sin(event.th);
if (expected < 0)
expected = 1e-6;
if (observed == 0)
observed = 1;
/* Compute the gradient of F(α,β,γ; θi,φi),
* then rescale it and set it to the i-th row
* of the jacobian.
*/
grad_distr(par, &event, grad);
gsl_vector_scale(
grad,
-0.5*(observed + expected)/sqrt(expected) * 1/prob);
gsl_matrix_set_row(jac, i * data.h->ny + j, grad);
}
// free memory
gsl_vector_free(grad);
return GSL_SUCCESS;
}
/* This is a callback function called during
* the minimisation to show the current progress.
* It prints:
* 1. the condition number cond(J) of the jacobian
* 2. the reduced χ² value
* 3. the current parameters
*/
void callback(const size_t iter, void *params,
const gsl_multifit_nlinear_workspace *w) {
gsl_vector *f = gsl_multifit_nlinear_residual(w);
gsl_vector *x = gsl_multifit_nlinear_position(w);
//if (iter % 4 != 0)
// return;
/* Compute the condition number of the
* jacobian and the reduced χ² (χ²/d).
*/
double rcond, chi2;
int d = w->fdf->n - w->fdf->p;
gsl_multifit_nlinear_rcond(&rcond, w);
gsl_blas_ddot(f, f, &chi2);
fprintf(stderr, "%2ld\t", iter);
vector_fprint(stderr, x);
fprintf(
stderr, "\tcond(J)=%.4g, χ²/d=%.4g\n\n",
1.0/rcond,
chi2/d);
}
/* Minimum χ² estimation of of the parameters
* α,β,γ.
*/
min_result minChiSquared(struct hist data) {
/* Initialise the function to be minimised */
gsl_multifit_nlinear_fdf chisquared;
chisquared.f = chisquared_f; // function
chisquared.df = chisquared_df; // gradient
chisquared.fvv = NULL; // something for geodesic accel.
chisquared.n = data.h->nx * data.h->ny; // numeber of data points
chisquared.p = 3; // numeber of data points
chisquared.params = &data; // histogram data
/* Initialise the minimisation workspace */
gsl_multifit_nlinear_parameters options =
gsl_multifit_nlinear_default_parameters();
gsl_multifit_nlinear_workspace *w =
gsl_multifit_nlinear_alloc(
gsl_multifit_nlinear_trust, // minimisation method
&options, // minimisation options
data.h->nx * data.h->ny, // number of data points
3); // number of (unknown) params
/* Set the starting point of the
* minimisation.
*/
gsl_vector *par = gsl_vector_alloc(3);
gsl_vector_set(par, 0, 0.50); // α
gsl_vector_set(par, 1, 0.01); // β
gsl_vector_set(par, 2, -0.10); // γ
gsl_multifit_nlinear_init(par, &chisquared, w);
/* Configure the solver and run the minimisation
* using the high-level driver.
*/
fputs("\n# least χ²\n\n", stderr);
int status, info;
status = gsl_multifit_nlinear_driver(
100, // max number of iterations
1e-8, // tolerance for step test: |δ| ≤ xtol(|x| + xtol)
1e-8, // tolerance for gradient test: max |∇i⋅ xi| ≤ gtol⋅xi
1e-8, // tolerance for norm test
callback, // function called on each iteration
NULL, // callback parameters
&info, // stores convergence information
w); // minimisation workspace
fprintf(stderr, "status: %s\n", gsl_strerror(status));
if (status != GSL_SUCCESS)
fprintf(stderr, "info: %s\n", gsl_strerror(info));
fprintf(stderr, "iterations: %ld\n", gsl_multifit_nlinear_niter(w));
/* Store results in the min_result type.
* Note: We allocate a new vector/matrix for the
* parameters because `w->x` will be freed
* along with the workspace `w`.
*/
min_result res;
res.par = gsl_vector_alloc(3);
res.err = gsl_vector_alloc(3);
res.cov = gsl_matrix_alloc(3, 3);
gsl_vector_memcpy(res.par, gsl_multifit_nlinear_position(w));
/* Compute the covariance of the fit parameters.
* The covariance Σ is estimated by
*
* Σ = H¹
*
* where H is the hessian of the χ², which is
* itself approximated by the jacobian as
*
* H = J^TJ
*
*/
gsl_matrix *jac = gsl_multifit_nlinear_jac(w);
gsl_multifit_nlinear_covar(jac, 0.0, res.cov);
/* Compute the standard errors
* from the covariance matrix.
*/
gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
gsl_vector_memcpy(res.err, &diagonal.vector);
vector_map(&sqrt, res.err);
/* Compute the reduced χ² */
double chi2;
gsl_vector *f = gsl_multifit_nlinear_residual(w);
gsl_blas_ddot(f, f, &chi2);
chi2 /= chisquared.n - chisquared.p;
/* Print the results */
fputs("\n## results\n", stderr);
fprintf(stderr, "\n* χ²/d: %.3f\n", chi2);
fputs("\n* parameters:\n ", stderr);
vector_fprint(stderr, res.par);
fputs("\n* covariance:\n", stderr);
matrix_fprint(stderr, res.cov);
// free memory
gsl_multifit_nlinear_free(w);
gsl_vector_free(par);
return res;
}

17
ex-3/chisquared.h Normal file
View File

@ -0,0 +1,17 @@
#include "common.h"
#include <gsl/gsl_histogram2d.h>
/* Structure containing the histogram
* for the χ² minimisation.
* n: number of events
* h: 2D histogram
*/
struct hist {
size_t n;
gsl_histogram2d *h;
};
/* Minimum χ² estimation of of the parameters
* α,β,γ.
*/
min_result minChiSquared(struct hist data);

126
ex-3/common.c Normal file
View File

@ -0,0 +1,126 @@
#include "common.h"
/* `vector_fprint(fp, x)` prints to file `fp`
* the components of a gsl_vector.
*/
void vector_fprint(FILE *fp, const gsl_vector *x) {
fprintf(fp, "[");
for (size_t i = 0; i < x->size; i++) {
fprintf(fp, i < x->size -1 ? "%10.4g, " : "%10.4g",
gsl_vector_get(x, i));
}
fprintf(fp, "]\n");
}
/* `matrix_fprint(fp, x)` prints to file `fp`
* the components of a gsl_vector.
*/
void matrix_fprint(FILE *fp, const gsl_matrix *x) {
gsl_vector row;
for (size_t i = 0; i < x->size1; i++) {
row = gsl_matrix_const_row(x, i).vector;
fputs(" ", fp);
vector_fprint(fp, &row);
}
}
/* `vector_map(f, v)` applies the function
* `f` to each element of the vector `v`, inplace.
* The function return a GSL exit code to either
* report success or a failure.
*/
int vector_map(double (*f)(double x), gsl_vector *v) {
double y;
for (size_t i = 0; i < v->size; i++ ) {
y = (*f)(gsl_vector_get(v, i));
gsl_vector_set(v, i, y);
}
return GSL_SUCCESS;
}
/* Angular distribution function F(θ,φ; α,β,γ)
* Takes three parameters α,β,γ and the angles θ,φ.
*/
double distr(const gsl_vector *par,
const struct event *event){
double a = gsl_vector_get(par, 0),
b = gsl_vector_get(par, 1),
c = gsl_vector_get(par, 2);
double th = event->th,
ph = event->ph;
double A = 3 /(4 * M_PI);
double B = 0.5 * (1 - a);
double C = 0.5 * (3 * a - 1) * pow(cos(th), 2);
double D = b * pow(sin(th), 2) * cos(2 * ph);
double E = sqrt(2) * c * sin(2 * th) * cos(ph);
return A * (B + C - D - E);
}
/* `grad_distr(par, event, grad)`
* computes the gradient of F(α,β,γ; α,β) as:
*
* 3/2cos²(θ) - 1/2
*
* sin²(θ)cos(2φ) 3/(4π)
*
* 2sin(2θ)cos(φ)
*
* where `par` is [α,β,γ], `event` is the angle
* structure. The results is stored in the
* gsl_vector `grad`
*/
void grad_distr(const gsl_vector *par,
const struct event *event,
gsl_vector *grad) {
double th = event->th,
ph = event->ph;
gsl_vector_set(grad, 0, 1.5*pow(cos(th), 2) - 0.5);
gsl_vector_set(grad, 1, -pow(sin(th), 2) * cos(2*ph));
gsl_vector_set(grad, 2, -sqrt(2)*sin(2*th) * cos(ph));
gsl_vector_scale(grad, 3.0/(4.0 * M_PI));
}
/* Test the compatibility between the original
* parameters `def` and the estimates `est.par`
* with standard errors `est.err`.
*/
void compatibility(gsl_vector *def,
min_result est) {
char* names[] = { "α", "β", "γ" };
double a, b;
double t, sigma, alpha;
fprintf(stderr, "| par | σ | α-value |\n");
fprintf(stderr, "|-----|----------|----------|\n");
for (size_t i = 0; i < def->size; i++) {
// discrepancy
a = gsl_vector_get(def, i);
b = gsl_vector_get(est.par, i);
sigma = gsl_vector_get(est.err, i);
// normal t-statistic
t = fabs(a - b)/sigma;
// α-value / significance
alpha = 1 - erf(t/sqrt(2));
fprintf(stderr, "| %s | %-8.2g | %-8.2g |\n",
names[i], sigma, alpha);
}
}
/* Free a min_result structure.
*/
void min_result_free(min_result x) {
gsl_vector_free(x.par);
gsl_matrix_free(x.cov);
}

88
ex-3/common.h Normal file
View File

@ -0,0 +1,88 @@
#pragma once
#include <math.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
/* Structure that holds the θ,φ angles
* of a single event of the sample.
*/
struct event {
double th;
double ph;
};
/* Structure that contains the sample events
* and its size.
*/
struct sample {
const size_t size;
struct event *events;
};
/* This structure contains the result of a
* minimisation method: estimated parameters,
* standard error and their covariance.
*/
typedef struct {
gsl_vector *par;
gsl_vector *err;
gsl_matrix *cov;
} min_result;
/* `vector_fprint(fp, x)` prints to file `fp`
* the components of a gsl_vector.
*/
void vector_fprint(FILE *fp, const gsl_vector *x);
/* `matrix_fprint(fp, x)` prints to file `fp`
* the components of a gsl_vector.
*/
void matrix_fprint(FILE *fp, const gsl_matrix *x);
/* `vector_map(f, v)` applies the function
* `f` to each element of the vector `v`, inplace.
* The function return a GSL exit code to either
* report success or a failure.
*/
int vector_map(double (*f)(double x), gsl_vector *v);
/* Angular distribution function F(θ,φ; α,β,γ)
* Takes three parameters α,β,γ and the angles θ,φ.
*/
double distr(const gsl_vector *par,
const struct event *event);
/* `grad_distr(par, event, grad)`
* computes the gradient of F(α,β,γ; α,β) as:
*
* 1/2 - 3/2cos²(θ)
*
* sin²(θ)cos(2φ) 3/(4π)
*
* 2sin(2θ)cos(φ)
*
* where `par` is [α,β,γ], `event` is the angle
* structure. The results is stored in the
* gsl_vector `grad`
*/
void grad_distr(const gsl_vector *par,
const struct event *event,
gsl_vector *grad);
/* Test the compatibility between the original
* parameters `def` and the estimates `est.par`
* with standard errors `est.err`.
*/
void compatibility(gsl_vector *def, min_result est);
/* Free a min_result structure.
*/
void min_result_free(min_result x);

239
ex-3/likelihood.c Normal file
View File

@ -0,0 +1,239 @@
#include "likelihood.h"
#include <gsl/gsl_errno.h>
#include <gsl/gsl_multimin.h>
#include <gsl/gsl_linalg.h>
/* `f_logL(par, &sample)`
* gives the negative log-likelihood for the sample `sample`.
*/
double f_logL(const gsl_vector *par, void *sample_) {
struct sample sample = *((struct sample*) sample_);
double sum = 0;
for (size_t i = 0; i < sample.size; i++)
sum += log(fabs(distr(par, &sample.events[i])));
/* Rescale the function to avoid
* round-off errors during minimisation
*/
sum *= 1e-5;
return -sum;
}
/* `fdf_logL(par, &sample, fun, grad)`
* simultaneously computes the gradient and the value of
* the negative log-likelihood for the sample `sample`
* and stores the results in `fun` and `grad` respectively.
*/
void fdf_logL(const gsl_vector *par, void *sample_,
double *fun, gsl_vector *grad) {
struct sample sample = *((struct sample*) sample_);
double prob;
struct event *event;
gsl_vector *term = gsl_vector_alloc(3);
// Note: fun/grad are *not* guaranteed to be 0
gsl_vector_set_zero(grad);
*fun = 0;
for (size_t i = 0; i < sample.size; i++) {
event = &sample.events[i];
prob = distr(par, event);
/* The gradient of log(F(α,β,γ)) is:
* 1/F(α,β,γ) F(α,β,γ)
*/
grad_distr(par, event, term);
gsl_vector_scale(term, -1.0/prob);
gsl_vector_add(grad, term);
// compute function
*fun += log(fabs(prob));
}
/* Rescale the function to avoid
* round-off errors during minimisation
*/
*fun = -*fun*1e-5;
gsl_vector_scale(grad, 1e-5);
// free memory
gsl_vector_free(term);
}
/* `df_logL(par, &sample, grad)`
* gives the gradient of the negative log-likelihood
* for the sample `sample`. The result is stored in
* the `grad` gsl_vector.
*/
void df_logL(const gsl_vector *par, void *sample, gsl_vector *grad) {
double fun;
fdf_logL(par, sample, &fun, grad);
}
/* `hf_logL(par, &sample, hess)`
* gives the hessian matrix of the negative log-likelihood
* for the sample `sample`. The result is stored in
* the `hess` gsl_matrix.
*/
gsl_matrix* hf_logL(const gsl_vector *par, void *sample_) {
struct sample sample = *((struct sample*) sample_);
gsl_vector *grad = gsl_vector_calloc(3);
gsl_matrix *term = gsl_matrix_calloc(3, 3);
gsl_matrix *hess = gsl_matrix_calloc(3, 3);
double prob;
struct event *event;
for (size_t n = 0; n < sample.size; n++) {
/* Compute gradient and value of F */
event = &sample.events[n];
prob = distr(par, event);
grad_distr(par, event, grad);
/* Compute the hessian matrix of -log(F):
* H_ij = 1/F² F_i F_j
*/
for (size_t i = 0; i < 3; i++)
for (size_t j = 0; j < 3; j++) {
gsl_matrix_set(
term, i, j,
gsl_vector_get(grad, i) * gsl_vector_get(grad, j));
}
gsl_matrix_scale(term, 1.0/pow(prob, 2));
gsl_matrix_add(hess, term);
}
// free memory
gsl_vector_free(grad);
gsl_matrix_free(term);
return hess;
}
/* Maximum likelihood estimation of the parameters
* α,β,γ.
*/
min_result maxLikelihood(struct sample *sample) {
/* Starting point: α,β,γ = "something close
* to the solution", because the minimisation
* seems pretty unstable.
*/
gsl_vector *par = gsl_vector_alloc(3);
gsl_vector_set(par, 0, 0.79);
gsl_vector_set(par, 1, 0.02);
gsl_vector_set(par, 2, -0.17);
/* Initialise the minimisation */
gsl_multimin_function_fdf likelihood;
likelihood.n = 3;
likelihood.f = f_logL;
likelihood.df = df_logL;
likelihood.fdf = fdf_logL;
likelihood.params = sample;
/* The minimisation technique used
* is the conjugate gradient method. */
const gsl_multimin_fdfminimizer_type *type =
gsl_multimin_fdfminimizer_conjugate_fr;
gsl_multimin_fdfminimizer *m =
gsl_multimin_fdfminimizer_alloc(type, 3);
gsl_multimin_fdfminimizer_set(
m, // minimisation technique
&likelihood, // function to minimise
par, // initial parameters/starting point
1e-5, // first step length
0.1); // accuracy
size_t iter;
int status = GSL_CONTINUE;
/* Iterate the minimisation until the gradient
* is smaller that 10^-4 or fail.
*/
fputs("\n# maximum likelihood\n\n", stderr);
for (iter = 0; status == GSL_CONTINUE && iter < 100; iter++) {
status = gsl_multimin_fdfminimizer_iterate(m);
if (status)
break;
status = gsl_multimin_test_gradient(m->gradient, 1e-5);
/* Print the iteration, current parameters
* and gradient of the log-likelihood.
*/
if (iter % 5 == 0 || status == GSL_SUCCESS) {
fprintf(stderr, "%2ld\t", iter);
vector_fprint(stderr, m->x);
fputs("\t", stderr);
vector_fprint(stderr, m->gradient);
putc('\n', stderr);
}
}
fprintf(stderr, "status: %s\n", gsl_strerror(status));
fprintf(stderr, "iterations: %ld\n", iter);
/* Store results in the min_result type.
* Note: We allocate a new vector for the
* parameters because `m->x` will be freed
* along with m.
*/
min_result res;
res.par = gsl_vector_alloc(3);
res.err = gsl_vector_alloc(3);
res.cov = gsl_matrix_alloc(3, 3);
gsl_vector_memcpy(res.par, m->x);
/* Compute the standard errors of the estimates.
* The CramérRao bound states that the covariance
* matrix of the estimates is
*
* Σ I¹ = (-H)¹
*
* where:
* - I is called the Fisher information,
* - H is the hessian of log(L) at
* the maximum likelihood.
*/
res.cov = hf_logL(m->x, sample);
/* Invert H by Cholesky decomposition:
*
* H = LL^T H¹ = L^T¹L¹
*
* Note: H is positive defined (because -logL is
* minimised) and symmetric so this is valid.
*
*/
gsl_linalg_cholesky_decomp(res.cov);
gsl_linalg_cholesky_invert(res.cov);
/* Compute the standard errors
* from the covariance matrix.
*/
gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
gsl_vector_memcpy(res.err, &diagonal.vector);
vector_map(&sqrt, res.err);
fputs("\n## results\n", stderr);
fputs("\n* parameters:\n ", stderr);
vector_fprint(stderr, res.par);
fputs("\n* covariance (CramérRao lower bound):\n", stderr);
matrix_fprint(stderr, res.cov);
// free memory
gsl_multimin_fdfminimizer_free(m);
return res;
}

6
ex-3/likelihood.h Normal file
View File

@ -0,0 +1,6 @@
#include "common.h"
/* Maximum likelihood estimation of the parameters
* α,β,γ.
*/
min_result maxLikelihood(struct sample *sample);

158
ex-3/main.c Normal file
View File

@ -0,0 +1,158 @@
#include <stdlib.h>
#include <string.h>
#include "common.h"
#include "likelihood.h"
#include "chisquared.h"
#include <gsl/gsl_rng.h>
#include <gsl/gsl_histogram2d.h>
/* Options for the program */
struct options {
int show_hist; // whether to print the 2D histogram
int only_hist; // whether to skip the minimisations
size_t num_events; // number of generated events
size_t bins_theta; // number of bins for θ
size_t bins_phi; // number of bins for φ
};
int main(int argc, char **argv) {
/* Set default options */
struct options opts;
opts.num_events = 50000;
opts.show_hist = 0;
opts.only_hist = 0;
opts.bins_theta = 30;
opts.bins_phi = 60;
/* Process CLI arguments */
for (size_t i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-i")) opts.show_hist = 1;
if (!strcmp(argv[i], "-I")) opts.only_hist = 1;
else if (!strcmp(argv[i], "-n")) opts.num_events = atol(argv[++i]);
else if (!strcmp(argv[i], "-t")) opts.bins_theta = atol(argv[++i]);
else if (!strcmp(argv[i], "-p")) opts.bins_phi = atol(argv[++i]);
else {
fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]);
fprintf(stderr, "\t-h\tShow this message.\n");
fprintf(stderr, "\t-i\tPrint the histogram to stdout.\n");
fprintf(stderr, "\t-I\tPrint *only* the histogram.\n");
fprintf(stderr, "\t-n N\tThe number of events to generate.\n");
fprintf(stderr, "\t-t N\tThe number of θ bins.\n");
fprintf(stderr, "\t-p N\tThe number of φ bins.\n");
return EXIT_FAILURE;
}
}
/* Initialize an RNG. */
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
/* Initialise the sample data structure with
* the number of events and allocate an array
* for the events.
*/
struct sample s = { opts.num_events, NULL };
s.events = calloc(s.size, sizeof(struct event));
/* Prepare a 2D histogram of the sample
* with 30x60 bins.
*/
gsl_histogram2d* hist =
gsl_histogram2d_alloc(opts.bins_theta, opts.bins_phi);
// set the ranges θ∈[0, π] and φ∈[0, 2π]
gsl_histogram2d_set_ranges_uniform(hist, 0,M_PI, 0,2*M_PI);
/* F(θ.φ) parameters we will generate
* a sample with, and later try to recover.
* Note: for this choice max F(θ,φ) 0.179.
*/
gsl_vector *def_par = gsl_vector_alloc(3);
gsl_vector_set(def_par, 0, 0.65); // α
gsl_vector_set(def_par, 1, 0.06); // β
gsl_vector_set(def_par, 2, -0.18); // γ
/* Generate the points (θ, φ) on the unit sphere
* using the inverse transform:
* θ = acos(1 - 2X)
* φ = 2πY
* where X,Y are random uniform variables in [0,1).
* This ensures that θ,φ are uniformly distributed, then we
* proceed to reject the events as follow:
* 1. generate a y [0, max F(θ,φ)] uniformly
* 2. reject the sample if y > F(θ,φ) else save it.
*/
fputs("# event sampling\n\n", stderr);
fprintf(stderr, "generating %ld events...", opts.num_events);
struct event *e;
for (size_t i = 0; i < s.size; i++){
e = &s.events[i];
do {
e->th = acos(1 - 2*gsl_rng_uniform(r));
e->ph = 2 * M_PI * gsl_rng_uniform(r);
} while(0.2 * gsl_rng_uniform(r) > distr(def_par, e));
// update the histogram
gsl_histogram2d_increment(hist, e->th, e->ph);
}
fputs("done\n", stderr);
// print histogram to stdout
if (opts.show_hist || opts.only_hist) {
fputs("\n# histogram\n", stderr);
gsl_histogram2d_fprintf(stdout, hist, "%g", "%g");
}
if (!opts.only_hist) {
/* Estimate the parameters α,β,γ by
* the maximum likelihood method.
*/
min_result like = maxLikelihood(&s);
/* Estimate the parameters α,β,γ by
* a minimum χ² method.
*/
struct hist data;
data.h = hist;
data.n = s.size;
min_result chi2 = minChiSquared(data);
/* Compare the results with the original ones
* and check whether they are compatible.
*/
fputs("\n# vector decay compatibility\n", stderr);
fputs("\n## likelihood results\n\n", stderr);
compatibility(def_par, like);
fputs("\n## χ² results\n\n", stderr);
compatibility(def_par, chi2);
/* Test the isotropic decay hypothesys
* with the χ² results
*/
fputs("\n# isotropic hypotesys\n\n", stderr);
/* If the decay is isotropic then F = 1/4π.
* Solving for α,β,γ yields α=1/3,β=γ=0.
*/
gsl_vector *iso_par = gsl_vector_alloc(3);
gsl_vector_set(iso_par, 0, 0.33); // α
gsl_vector_set(iso_par, 1, 0.00); // β
gsl_vector_set(iso_par, 2, 0.00); // γ
compatibility(iso_par, chi2);
// free memory
min_result_free(like);
min_result_free(chi2);
};
// free memory
gsl_rng_free(r);
free(s.events);
gsl_histogram2d_free(hist);
gsl_vector_free(def_par);
return EXIT_SUCCESS;
}

31
ex-3/plot.py Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import sys
xedges, yedges, counts = loadtxt(sys.stdin, unpack=True, usecols=[0,2,4])
bins = (30, 60)
counts = counts.reshape(bins)
plt.rcParams['font.size'] = 15
suptitle('Angular decay distribution')
# subplot2grid((1, 3), (0, 0), colspan=2, aspect='equal')
xlabel(r'$\phi$ (radians)')
ylabel(r'$\theta$ (radians)')
pcolor(yedges[:bins[1]], xedges[::bins[1]], counts)
norm = colorbar(fraction=0.023, pad=0.04).norm
show()
# Ax = subplot2grid((1, 3), (0, 2), projection='3d')
# Θ, φ = mgrid[0:pi:bins[0]*1j, 0:2*pi:bins[1]*1j]
# X = 5 * sin(θ) * cos(φ)
# Y = 5 * sin(θ) * sin(φ)
# Z = 5 * cos(θ)
# Ax.plot_surface(
# x, y, z, rstride=1, cstride=1,
# facecolors=cm.viridis(norm(counts)))
# Axis('off')
# show()

85
ex-3/results.md Normal file
View File

@ -0,0 +1,85 @@
# event sampling
generating 50000 events...done
# maximum likelihood
0 [ 0.79, 0.02, -0.17]
[ 0.18, -0.06812, 0.001955]
5 [ 0.7894, 0.02022, -0.17]
[ 0.1789, -0.06748, 0.002162]
10 [ 0.7709, 0.02725, -0.1702]
[ 0.1465, -0.04898, 0.007551]
15 [ 0.6525, 0.06317, -0.1725]
[ -0.002576, -0.002313, 0.0161]
20 [ 0.654, 0.06135, -0.1763]
[-2.098e-06, 0.0001837, 0.0001225]
24 [ 0.6541, 0.06125, -0.1763]
[ -3.24e-06, -7.905e-07, -1.432e-06]
status: success
iterations: 25
## results
* parameters:
[ 0, -0.5106, 1.089]
* covariance (CramérRao lower bound):
[ 6.604e-06, -3.474e-11, -1.798e-11]
[-3.474e-11, 1.227e-05, -1.267e-10]
[-1.798e-11, -1.267e-10, 9.564e-05]
# least χ²
0 [ 0.5, 0.01, -0.1]
cond(J)=inf, χ²/d=3.32
4 [ 0.6537, 0.05976, -0.1736]
cond(J)=2.725, χ²/d=1.017
status: success
iterations: 7
## results
* χ²/d: 1.017
* parameters:
[ 0.6537, 0.05972, -0.1736]
* covariance:
[ 0.00304, -1.66e-06, 2.383e-07]
[ -1.66e-06, 0.00212, 1.505e-06]
[ 2.383e-07, 1.505e-06, 0.001637]
# vector decay compatibility
## likelihood results
| par | σ | α-value |
|-----|----------|----------|
| α | 6.6e-06 | 0 |
| β | 1.2e-05 | 0 |
| γ | 9.6e-05 | 0 |
## χ² results
| par | σ | α-value |
|-----|----------|----------|
| α | 0.003 | 0.22 |
| β | 0.0021 | 0.89 |
| γ | 0.0016 | 0.0001 |
# isotropic hypotesys
| par | σ | α-value |
|-----|----------|----------|
| α | 0.003 | 0 |
| β | 0.0021 | 0 |
| γ | 0.0016 | 0 |

117
ex-4/dip.c Normal file
View File

@ -0,0 +1,117 @@
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <gsl/gsl_rng.h>
int main(int argc, char **argv)
{
// Set default options.
//
size_t N = 50000; // number of events.
size_t n = 50; // number of bins.
double p_max = 10; // maximum value of momentum module.
// Process CLI arguments.
//
for (size_t i = 1; i < argc; i++)
{
if (!strcmp(argv[i], "-N")) N = atol(argv[++i]);
else if (!strcmp(argv[i], "-n")) n = atol(argv[++i]);
else if (!strcmp(argv[i], "-p")) p_max = atof(argv[++i]);
else
{
fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]);
fprintf(stderr, "\t-h\tShow this message.\n");
fprintf(stderr, "\t-N integer\tThe number of events to generate.\n");
fprintf(stderr, "\t-n integer\tThe number of bins of the histogram.\n");
fprintf(stderr, "\t-p float\tThe maximum value of momentum.\n");
return EXIT_FAILURE;
}
}
// Initialize an RNG.
//
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
// Generate the angle θ uniformly distributed on a sphere using the
// inverse transform:
//
// θ = acos(1 - 2X)
//
// where X is a random uniform variable in [0,1), and the module p of
// the vector:
//
// p² = p_v² + p_h²
//
// uniformly distributed between 0 and p_max. The two components are
// then computed as:
//
// p_v = p⋅cos(θ)
// p_h = p⋅sin(θ)
//
// The histogram will be updated this way.
// The j-th bin where p_h goes in is given by:
//
// step = p_max / n
// j = floor(p_h / step)
//
// Thus an histogram was created and a structure containing the number of
// entries in each bin and the sum of |p_v| in each of them is created and
// filled while generating the events.
//
struct bin
{
size_t amo; // Amount of events in the bin.
double sum; // Sum of |p_v|s of all the events in the bin.
};
struct bin *histo = calloc(n, sizeof(struct bin));
// Some useful variables.
//
double step = p_max / n;
struct bin *b;
double theta;
double p;
double p_v;
double p_h;
size_t j;
for (size_t i = 0; i < N; i++)
{
// Generate the event.
//
theta = acos(1 - 2*gsl_rng_uniform(r));
p = p_max * gsl_rng_uniform(r);
// Compute the components.
//
p_v = p * cos(theta);
p_h = p * sin(theta);
// Update the histogram.
//
j = floor(p_h / step);
b = &histo[j];
b->amo++;
b->sum += fabs(p_v);
}
// Compute the mean value of each bin and print it to stodut
// together with other useful things to make the histogram.
//
printf("bins: \t%ld\n", n);
printf("step: \t%.5f\n", step);
for (size_t i = 0; i < n; i++)
{
histo[i].sum = histo[i].sum / histo[i].amo;
printf("\n%.5f", histo[i].sum);
};
// free memory
gsl_rng_free(r);
free(histo);
return EXIT_SUCCESS;
}

23
ex-4/plot.py Executable file
View File

@ -0,0 +1,23 @@
#!/usr/bin/env python
import matplotlib.pyplot as plt
import numpy as np
import sys
bins = input()
step = input()
bins = int(bins.split("\t")[1])
step = float(step.split("\t")[1])
counts = np.loadtxt(sys.stdin)
edges = np.linspace(0, bins*step, bins+1)
plt.rcParams['font.size'] = 17
plt.hist(edges[:-1], edges, weights=counts,
color='#eacf00', edgecolor='#3d3d3c')
plt.title('Vertical component distribution', loc='right')
plt.xlabel(r'$p_h$')
plt.ylabel(r'$\left<|p_v|\right>$')
plt.show()

37
ex-5/casino.c Normal file
View File

@ -0,0 +1,37 @@
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <gsl/gsl_rng.h>
int main(int argc, char** argv){
size_t size = atoi(argv[1]); // size of the generated sample
// Initialize an RNG.
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
// Random points (x, y) generation: x in [0,1), y in [0,3)
// get if y < exp(x)
// leave if y > exp(x)
size_t in = 0;
size_t total = 0;
do{
double x = gsl_rng_uniform(r);
double y = 3 * gsl_rng_uniform(r);
if (y <= exp(x)){
in++;
}
total++;
} while (total < size);
// The integral is esitmated thorugh the proportion:
// integral : 3 = in : total
double integral = 3.0*(double)in/(double)total;
printf("∫exp(x)dx in [0,1]:\t%.07f\n", integral);
// Free memory
gsl_rng_free(r);
return EXIT_SUCCESS;
}

31
ex-5/manual.c Normal file
View File

@ -0,0 +1,31 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
/* This program evaluates the integral of exp(x) in [0;1] through the
* trapezoidal rule.
* The integration step is given in input as d in step = 1e-d.
*
*/
int main(int argc, char** argv){
double left = 0; // left limit
double point = left; // points at the current position
double right = 1; // right limit
double step = pow(10, -atoi(argv[1])); // step of integration
double sum = 0;
// Integral evaluation: stops when point goes over the right edge of the
// range.
do {
sum += step * 0.5 * (exp(point) + exp(point + step));
point += step;
} while (point < right);
// Print out the result
printf("∫exp(x)dx in [0,1]:\t%.7f\n", sum);
return EXIT_SUCCESS;
}

89
ex-5/trifecta.c Normal file
View File

@ -0,0 +1,89 @@
#include <gsl/gsl_monte_plain.h>
#include <gsl/gsl_monte_miser.h>
#include <gsl/gsl_monte_vegas.h>
#include <gsl/gsl_monte.h>
#include <math.h>
// Exact value of the integral.
double exact = 1.71828182845905;
// Function which must be passed to the gsl_function.
double function (double * x, size_t dim, void * params){
// Avoid warning when no parameters are passed to the function.
(void)(params);
return exp(x[0]);
}
// Function which prints out the results when called.
void results (char *title, double result, double error)
{
printf ("---------------------------\n%s\n", title);
printf ("result: % .10f\n", result);
printf ("sigma: % .10f\n", error);
printf ("exact: % .10f\n", exact);
}
int main(){
// Some useful variables.
double integral, error;
size_t calls = 50000000;
size_t dims = 1;
double lower[1] = {0}; // Integration lower limit
double upper[1] = {1}; // Integration upper limit
// Initialize an RNG.
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
// Define GSL function.
gsl_monte_function expo = {&function, dims, NULL};
expo.f = &function;
expo.dim = 1;
expo.params = NULL;
{
gsl_monte_vegas_state *s = gsl_monte_vegas_alloc (dims);
gsl_monte_vegas_integrate (&expo, lower, upper, dims, calls,
r, s, &integral, &error);
do { gsl_monte_vegas_integrate (&expo, lower, upper, dims,
calls/5, r, s, &integral, &error);
} while (fabs (gsl_monte_vegas_chisq (s) - 1.0) > 0.5);
// Free memory.
gsl_monte_vegas_free(s);
// Display results
results("Vegas", integral, error);
}
{
gsl_monte_miser_state *s = gsl_monte_miser_alloc (dims);
gsl_monte_miser_integrate (&expo, lower, upper, dims, calls,
r, s, &integral, &error);
// Free memory.
gsl_monte_miser_free (s);
// Display results
results ("Miser", integral, error);
}
{
gsl_monte_plain_state *s = gsl_monte_plain_alloc (dims);
gsl_monte_plain_integrate (&expo, lower, upper, dims, calls,
r, s, &integral, &error);
// Display results
results ("Montecarlo", integral, error);
// Free memory.
gsl_monte_plain_free (s);
}
// Free memory.
gsl_rng_free(r);
return EXIT_SUCCESS;
}

196
ex-6/fft.c Normal file
View File

@ -0,0 +1,196 @@
#include "fft.h"
#include <string.h>
#include <gsl/gsl_fft_halfcomplex.h>
/* These macros are used to extract the
* real and imaginary parts of a complex
* vector */
#define REAL(a,stride,i) ((a)[2*(stride)*(i)])
#define IMAG(a,stride,i) ((a)[2*(stride)*(i)+1])
/* Computes the (forward) real DFT of a vector
* and returns the results as a complex vector
*/
gsl_vector_complex* vector_fft(gsl_vector *data) {
/* Create a copy of the input data to
* preserve it: GSL functions work in-place.
*/
gsl_vector *fdata = gsl_vector_alloc(data->size);
gsl_vector_memcpy(fdata, data);
/* Compute the forward Fourier transform */
gsl_fft_real_wavetable *rtable =
gsl_fft_real_wavetable_alloc(fdata->size);
gsl_fft_real_workspace *wspace =
gsl_fft_real_workspace_alloc(fdata->size);
gsl_fft_real_transform(
fdata->data, // array of data to transform
fdata->stride, // stride of the array, i.e. # steps between elements
fdata->size, // number of elements
rtable, // wavetable (real)
wspace); // workspace
/* The data in `data` is in the half-complex
* packed format, which reduces memory usage
* by using the fact that
*
* z_i = z̅_(n-i)
*
* We unpack it to the normal packing and use
* the array to define a gsl_vector_complex.
*/
gsl_vector_complex *cdata = gsl_vector_complex_calloc(fdata->size);
gsl_fft_halfcomplex_unpack(
fdata->data,
cdata->data,
cdata->stride,
fdata->size);
// clear memory
gsl_fft_real_wavetable_free(rtable);
gsl_fft_real_workspace_free(wspace);
gsl_vector_free(fdata);
return cdata;
}
/* Inverse function of gsl_fft_halfcomplex_unpack.
* `gsl_fft_halfcomplex_pack(c, hc, stride, n)`
* creates a halfcomplex packed version of a
* complex packaged array `hc` with stride `stride`
* and `n` elements.
*/
int gsl_fft_halfcomplex_pack(
const gsl_complex_packed_array c,
double hc[],
const size_t stride, const size_t n) {
hc[0] = REAL(c, stride, 0);
size_t i;
for (i = 1; i < n - i; i++) {
hc[(2 * i - 1) * stride] = REAL(c, stride, i);
hc[2 * i * stride] = IMAG(c, stride, i);
}
if (i == n - i)
hc[(n - 1) * stride] = REAL(c, stride, i);
return 0;
}
/* `fft_deconvolve(data, kernel)` tries to deconvolve
* `kernel` from `data` by factoring out the `kernel`
* in the Fourier space.
*/
gsl_histogram* fft_deconvolve(
gsl_histogram *data,
gsl_histogram *kernel) {
/* Size of the original data
* before being convoluted.
*
* Notation of sizes:
* - original: m
* - kernel: n
* - "full" convolution: m + n - 1
*/
size_t orig_size = data->n - kernel->n + 1;
/* Create vector views */
gsl_vector vdata =
gsl_vector_view_array(data->bin, data->n).vector;
gsl_vector vkernel =
gsl_vector_view_array(kernel->bin, kernel->n).vector;
/* 0-pad the kernel to make it the
* same size as the convoluted data,
* which is m+n-1:
*
* 1. create a new vector
* 2. create a subvector view to the center
* 3. copy the kernel to the view
*/
gsl_vector *vpadded = gsl_vector_calloc(data->n);
gsl_vector center = gsl_vector_subvector(
vpadded, // vector: padded kernel (n+m-1)
orig_size/2, // offset: half of original data size (m/2)
kernel->n).vector; // length: kernel size (n)
gsl_vector_memcpy(&center, &vkernel);
/* Compute the DFT of the data and
* divide it by the DFT of the kernel.
*/
gsl_vector_complex *fdata = vector_fft(&vdata);
gsl_vector_complex *fkernel = vector_fft(vpadded);
gsl_vector_complex_div(fdata, fkernel);
/* Pack the result in the half-complex
* format before computing the inverse DFT
*/
double *res = calloc(fdata->size, sizeof(double));
gsl_fft_halfcomplex_pack(fdata->data, res,
fdata->stride, fdata->size);
/* Compute the inverse DFT of the result */
gsl_fft_halfcomplex_wavetable *htable =
gsl_fft_halfcomplex_wavetable_alloc(data->n);
gsl_fft_real_workspace *wspace =
gsl_fft_real_workspace_alloc(data->n);
gsl_fft_halfcomplex_inverse(
res, // array of data to transform
fdata->stride, // stride of the array
fdata->size, // number of elements
htable, // wavetable (complex)
wspace); // workspace
/* Restore the natural order of frequency
* in the result of the DFT (negativepositive).
* To do that roll over the array by a half:
* 1. create a temp array of half size
* 2. copy first half over to temp
* 3. copy second half to the first
* 4. copy back the first to the second half
*/
size_t half_size = data->n/2 * sizeof(double);
double *temp = malloc(half_size);
memcpy(temp, res, half_size);
memcpy(res, res + data->n/2, half_size);
memcpy(res + data->n/2, temp, half_size);
/* Create a histogram with the same edges
* as `data`, but with the original size,
* to return the cleaned result
*/
gsl_histogram *hist = gsl_histogram_calloc(orig_size);
/* Set the same bin edges as `data`*/
double max = gsl_histogram_max(data);
double min = gsl_histogram_min(data);
gsl_histogram_set_ranges_uniform(hist, min, max);
/* Remove the extra size of the convolution
* (where the overlap is partial) when copying
* over the result to the histogram.
* This amounts to (n-1)/2 on each end of the
* vector.
*/
memcpy(
hist->bin,
res + (kernel->n - 1)/2,
orig_size * sizeof(double));
// free memory
gsl_vector_free(vpadded);
gsl_vector_complex_free(fdata);
gsl_vector_complex_free(fkernel);
gsl_fft_halfcomplex_wavetable_free(htable);
gsl_fft_real_workspace_free(wspace);
free(res);
free(temp);
return hist;
}

15
ex-6/fft.h Normal file
View File

@ -0,0 +1,15 @@
#include <gsl/gsl_fft_real.h>
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_vector.h>
/* Computes the (forward) real DFT of a vector
* and returns the results as a complex vector
*/
gsl_vector_complex* vector_fft(gsl_vector *data);
/* `fft_deconvolve(data, noise)` tries to deconvolve
* `noise` from `data` by factoring out the `noise`
* in the Fourier space.
*/
gsl_histogram* fft_deconvolve(gsl_histogram *data, gsl_histogram *noise);

232
ex-6/main.c Normal file
View File

@ -0,0 +1,232 @@
#include <math.h>
#include <string.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_sf.h>
#include "fft.h"
#include "rl.h"
/* Program options */
struct options {
int convolved;
int deconvolved;
int original;
size_t num_events;
size_t bins;
double sigma;
size_t rounds;
const char* mode;
double noise;
};
/* Intensity function parameters */
struct param {
double a; // aperture radius
double k; // wavenumber
double e; // wave amplitude
double l; // screen distance
};
/* Intensity of the EM field at a large distance
* from a circular aperture diffraction of a plane
* wave.
*
* The intensity is given as a function of θ,
* the diffraction angle, all the other physical
* parameter are controlled by the structure `param`.
*
* See Fraunhöfer diffraction on "Hect - Optics" for
* a derivation of the formula.
*/
double intensity(double theta, struct param p) {
double x = p.k * p.a * sin(theta);
double R = p.l / cos(theta);
/* The intensity function has an eliminable
* discontinuoty at the origin, which must
* be handled separately.
*/
if (fabs(x) < 1e-15) {
return 0.5 * pow(p.e/R * M_PI * pow(p.a, 2), 2);
}
double y = 2*M_PI*pow(p.a, 2) * gsl_sf_bessel_J1(x) / x;
return 0.5 * pow(p.e * y/R, 2);
}
/* `gaussian_for(hist, sigma)` generates a histogram
* with the same bin width of `hist` but smaller,
* with a gaussian PDF with μ at the central bin
* and σ equal to `sigma`.
*/
gsl_histogram* gaussian_for(gsl_histogram *hist, double sigma) {
/* Set the size to 6% of the histogram.
* Always choose n even to correctly center the
* gaussian in the middle.
*/
size_t n = ((double) hist->n * 6) / 100;
n = n % 2 ? n+1 : n;
/* Calculate the (single) bin width assuming
* the ranges are uniformely spaced
*/
double min, max;
gsl_histogram_get_range(hist, 0, &min, &max);
double dx = max - min;
gsl_histogram *res = gsl_histogram_alloc(n);
gsl_histogram_set_ranges_uniform(res, 0, dx * n);
/* The histogram will be such that
* the zero falls in the central bin.
*/
long int offset = res->n/2;
for (long int i = 0; i < (long int)res->n; i++)
res->bin[i] = gsl_ran_gaussian_pdf(
((double)(i - offset) + 0.5) * dx, sigma * dx);
return res;
}
/* Convolves two histograms while keeping the
* original bin edges. This is a simple wrapper
* function around `gsl_vector_convolve`.
*/
gsl_histogram* histogram_convolve(gsl_histogram *a, gsl_histogram *b) {
gsl_histogram *res = gsl_histogram_calloc(a->n + b->n - 1);
/* Set the same edges as `a`*/
double max = gsl_histogram_max(a);
double min = gsl_histogram_min(a);
gsl_histogram_set_ranges_uniform(res, min, max);
/* Create vector views for everybody */
gsl_vector_view va = gsl_vector_view_array(a->bin, a->n);
gsl_vector_view vb = gsl_vector_view_array(b->bin, b->n);
gsl_vector_view vres = gsl_vector_view_array(res->bin, res->n);
gsl_vector_convolve(&va.vector, &vb.vector, &vres.vector);
return res;
}
int main(int argc, char **argv) {
struct options opts;
/* Set default options */
opts.convolved = 0;
opts.deconvolved = 0;
opts.original = 1;
opts.num_events = 50000;
opts.bins = 150;
opts.sigma = 0.8;
opts.rounds = 3;
opts.mode = "fft";
opts.noise = 0;
/* Process CLI arguments */
for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-c")) opts.convolved = 1;
else if (!strcmp(argv[i], "-d")) opts.deconvolved = 1;
else if (!strcmp(argv[i], "-o")) opts.original = 1;
else if (!strcmp(argv[i], "-e")) opts.num_events = atol(argv[++i]);
else if (!strcmp(argv[i], "-b")) opts.bins = atol(argv[++i]);
else if (!strcmp(argv[i], "-s")) opts.sigma = atof(argv[++i]);
else if (!strcmp(argv[i], "-r")) opts.rounds = atol(argv[++i]);
else if (!strcmp(argv[i], "-m")) opts.mode = argv[++i];
else if (!strcmp(argv[i], "-n")) opts.noise = atof(argv[++i]);
else {
fprintf(stderr, "Usage: %s -[cdoebsh]\n", argv[0]);
fprintf(stderr, "\t-h\t\tShow this message.\n");
fprintf(stderr, "\t-c\t\tPrint the convolved histogram to stdout.\n");
fprintf(stderr, "\t-d\t\tAttempt and print the deconvolved histogram.\n");
fprintf(stderr, "\t-o\t\tPrint the original histogram to stdout.\n");
fprintf(stderr, "\t-e N\t\tThe number of events.\n");
fprintf(stderr, "\t-b N\t\tThe number of θ bins.\n");
fprintf(stderr, "\t-s SIGMA\tThe sigma of gaussian kernel.\n");
fprintf(stderr, "\t-r N\t\tThe number of RL deconvolution rounds.\n");
fprintf(stderr, "\t-n MU\t\tThe mean (μ) of Poisson noise to add to the convolution.\n");
return EXIT_FAILURE;
}
}
/* Initialize an RNG. */
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
gsl_histogram* hist = gsl_histogram_alloc(opts.bins);
gsl_histogram_set_ranges_uniform(hist, 0, M_PI/2);
double theta;
struct param p = { 0.01, 0.0001, 1e4, 1 };
/* Sample events following the intensity
* of a circular aperture diffraction I(θ)
* while producing a histogram.
*/
fputs("# event sampling\n", stderr);
fprintf(stderr, "max itensity: %.2f\n", intensity(0, p));
fprintf(stderr, "1. generating %ld events...", opts.num_events);
for (size_t i = 0; i < opts.num_events; i++){
do {
theta = acos(1 - gsl_rng_uniform(r));
} while(intensity(0, p) * gsl_rng_uniform(r) > intensity(theta, p));
gsl_histogram_increment(hist, theta);
}
fputs("done\n", stderr);
/* Generate the gaussian kernel, convolve the
* sample histogram with it and try to deconvolve it.
*/
fputs("\n# convolution\n", stderr);
fputs("1. generating gaussian kernel...", stderr);
gsl_histogram *kernel = gaussian_for(hist, opts.sigma);
fputs("done\n", stderr);
fputs("2. convolving...", stderr);
gsl_histogram *conv = histogram_convolve(hist, kernel);
fputs("done\n", stderr);
/* Add Poisson noise with μ=opts.noise to
* the convolution.
*/
if (opts.noise > 0) {
fputs("2.1 adding poisson noise...", stderr);
for (size_t i = 0; i < conv->n; i++)
conv->bin[i] += gsl_ran_poisson(r, opts.noise);
fputs("done\n", stderr);
}
fprintf(stderr, "3. %s deconvolution...", opts.mode);
gsl_histogram *clean;
if (!strcmp(opts.mode, "fft"))
clean = fft_deconvolve(conv, kernel);
else if (!strcmp(opts.mode, "rl"))
clean = rl_deconvolve(conv, kernel, opts.rounds);
else {
fputs("\n\nerror: invalid mode. select either 'fft' or 'rl'\n", stderr);
return EXIT_FAILURE;
}
fputs("done\n", stderr);
/* Print the selected histogram*/
fputs("\n# histogram \n", stderr);
gsl_histogram *showing;
if (opts.convolved) showing = conv;
else if (opts.deconvolved) showing = clean;
else if (opts.original) showing = hist;
gsl_histogram_fprintf(stdout, showing, "%g", "%g");
// free memory
gsl_rng_free(r);
gsl_histogram_free(hist);
gsl_histogram_free(kernel);
gsl_histogram_free(conv);
gsl_histogram_free(clean);
return EXIT_SUCCESS;
}

11
ex-6/plot.py Executable file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
from pylab import *
import sys
a, b, f = loadtxt(sys.stdin, unpack=True)
suptitle('Fraunhofer diffraction')
title(sys.argv[1] if len(sys.argv) > 1 else "", loc='right')
hist(a, np.insert(b, 0, a[0]), weights=f/sum(f),
color='#dbbf0d', edgecolor='#595856', linewidth=0.5)
show()

152
ex-6/rl.c Normal file
View File

@ -0,0 +1,152 @@
#include "rl.h"
#include <gsl/gsl_blas.h>
#include <math.h>
/* `gsl_vector_convolv(a, b, res)` computes the linear
* convolution of two vectors. The resulting vector
* has size `a->n + b->n - 1` and is stored in `res`.
*/
int gsl_vector_convolve(gsl_vector *a, gsl_vector *b, gsl_vector *res) {
size_t m = a->size;
size_t n = b->size;
/* Vector views for the overlap dot product */
gsl_vector_view va, vb;
/* Reverse `b` before convolving */
gsl_vector_reverse(b);
double dot;
size_t start_a, start_b, many;
for (size_t i = 0; i < m + n - 1; i++) {
/* Calculate the starting indices
* of the two vectors a,b overlap.
*/
if (i < n) start_a = 0;
else start_a = i - (n - 1);
if (i < n) start_b = (n - 1) - i;
else start_b = 0;
/* Calculate how many elements overlap */
if (i < n) many = i + 1;
else if (i >= m) many = n - (i - m) - 1;
else many = n;
/* Take the dot product of the overlap
* and store it in the resulting histogram
*/
va = gsl_vector_subvector(a, start_a, many);
vb = gsl_vector_subvector(b, start_b, many);
gsl_blas_ddot(&va.vector, &vb.vector, &dot);
gsl_vector_set(res, i, dot);
}
/* Put b back together */
gsl_vector_reverse(b);
return GSL_SUCCESS;
}
/* Performs the Richardson-Lucy deconvolution.
* In pseudo-python:
*
* def rl_deconvolve(data, kernel, rounds):
* est = np.full(data, 0.5)
* for _ in range(rounds):
* est_conv = convolve(est, kernel)
* est *= convolve(data / est_conv, reversed)
*
*/
gsl_histogram* rl_deconvolve(
gsl_histogram *data,
gsl_histogram *kernel,
size_t rounds) {
/* Size of the original data
* before being convoluted.
*
* Notation of sizes:
* - original: m
* - kernel: n
* - "full" convolution: m + n - 1
*/
size_t orig_size = data->n - kernel->n + 1;
/* Create a histogram with the same edges
* as `data`, but with the original size,
* to return the cleaned result
*/
gsl_histogram *hist = gsl_histogram_calloc(orig_size);
/* Set the same bin edges as `data`*/
double max = gsl_histogram_max(data);
double min = gsl_histogram_min(data);
gsl_histogram_set_ranges_uniform(hist, min, max);
/* Vector views of the result, kernel
* and data. These are used to perform
* vectorised operations on histograms.
*/
gsl_vector est =
gsl_vector_view_array(hist->bin, hist->n).vector;
gsl_vector vkernel =
gsl_vector_view_array(kernel->bin, kernel->n).vector;
gsl_vector vdata =
gsl_vector_view_array(data->bin, data->n).vector;
gsl_vector center;
/* Create a flipped copy of the kernel */
gsl_vector *vkernel_flip = gsl_vector_alloc(kernel->n);
gsl_vector_memcpy(vkernel_flip, &vkernel);
gsl_vector_reverse(vkernel_flip);
/* More vectors to store partial
* results
*/
gsl_vector* est_conv = gsl_vector_alloc(data->n);
gsl_vector* rel_blur = gsl_vector_alloc(data->n);
/* The zero-order estimate is simply
* all elements at 0.5 */
gsl_vector_set_all(&est, 0.5);
for (size_t iter = 0; iter < rounds; iter++) {
/* The current estimated convolution is the
* current estimate of the data with
* the kernel */
gsl_vector_convolve(&est, &vkernel, est_conv);
/* Divide the data by the estimated
* convolution to calculate the "relative blur".
*/
gsl_vector_memcpy(rel_blur, &vdata);
gsl_vector_div(rel_blur, est_conv);
/* Set NaNs to zero */
for (size_t i = 0; i < rel_blur->size; i++)
if (isnan(gsl_vector_get(rel_blur, i))) {
fprintf(stderr, "gotcha: %ld!!\n", i);
double y = (i > 0)? gsl_vector_get(rel_blur, i - 1) : 1;
gsl_vector_set(rel_blur, i, y);
}
/* Convolve the blur by the kernel
* and multiply the current estimate
* of the data by it.
*/
center = gsl_vector_subvector(rel_blur, (kernel->n-1)/2, orig_size).vector;
gsl_vector_convolve(&center, vkernel_flip, est_conv);
center = gsl_vector_subvector(est_conv, (kernel->n-1)/2, orig_size).vector;
gsl_vector_mul(&est, &center);
}
// free memory
gsl_vector_free(est_conv);
gsl_vector_free(rel_blur);
gsl_vector_free(vkernel_flip);
return hist;
}

20
ex-6/rl.h Normal file
View File

@ -0,0 +1,20 @@
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_vector.h>
/* `rl_deconvolve(data, noise, rounds)`
* tries to deconvolve `noise` from
* `data` in `rounds` rounds.
* */
gsl_histogram* rl_deconvolve(
gsl_histogram *data,
gsl_histogram *noise,
size_t rounds);
/* `convolve(a, b)` computes the linear convolution
* of two histograms. The convolted histogram
* has length `a->n + b->n - 1`.
*/
int gsl_vector_convolve(
gsl_vector *a,
gsl_vector *b,
gsl_vector *res);

433
ex-7/main.c Normal file
View File

@ -0,0 +1,433 @@
#include <stdio.h>
#include <math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
/* Parameters for bivariate
* gaussian PDF
*/
struct par {
double x0; // x mean
double y0; // y mean
double sigma_x; // x standard dev
double sigma_y; // y standard dev
double rho; // correlation: cov(x,y)/σx⋅σy
};
/* A sample of N 2D points is an
* N×2 matrix, with vectors as rows.
*/
typedef struct {
struct par p;
gsl_matrix *data;
} sample_t;
/* Create a sample of `n` points */
sample_t* sample_t_alloc(size_t n, struct par p) {
sample_t *x = malloc(sizeof(sample_t));
x->p = p;
x->data = gsl_matrix_alloc(n, 2);
return x;
}
/* Delete a sample */
void sample_t_free(sample_t *x) {
gsl_matrix_free(x->data);
free(x);
}
/* `generate_normal(r, n, p)` will create
* a sample of `n` points, distributed
* according to a bivariate gaussian distribution
* of parameters `p`.
*/
sample_t* generate_normal(
gsl_rng *r, size_t n, struct par *p) {
sample_t *s = sample_t_alloc(n, *p);
for (size_t i = 0; i < n; i++) {
/* Generate a vector (x,y) with
* a standard (μ = 0) bivariate
* gaussian PDF.
*/
double *x = gsl_matrix_ptr(s->data, i, 0);
double *y = gsl_matrix_ptr(s->data, i, 1);
gsl_ran_bivariate_gaussian(
r,
p->sigma_x, p->sigma_y, p->rho,
x, y);
/* Shift the vector to (x₀,y₀) */
*x += p->x0;
*y += p->y0;
}
return s;
}
/* Builds the covariance matrix Σ
* from the standard parameters (σ, ρ)
* of a bivariate gaussian.
*/
gsl_matrix* normal_cov(struct par *p) {
double var_x = pow(p->sigma_x, 2);
double var_y = pow(p->sigma_y, 2);
double cov_xy = p->rho * p->sigma_x * p->sigma_y;
gsl_matrix *cov = gsl_matrix_alloc(2, 2);
gsl_matrix_set(cov, 0, 0, var_x);
gsl_matrix_set(cov, 1, 1, var_y);
gsl_matrix_set(cov, 0, 1, cov_xy);
gsl_matrix_set(cov, 1, 0, cov_xy);
return cov;
}
/* Builds the mean vector of
* a bivariate gaussian.
*/
gsl_vector* normal_mean(struct par *p) {
gsl_vector *mu = gsl_vector_alloc(2);
gsl_vector_set(mu, 0, p->x0);
gsl_vector_set(mu, 1, p->y0);
return mu;
}
/* `fisher_proj(c1, c2)` computes the optimal
* projection map, which maximises the separation
* between the two classes.
* The projection vector w is given by
*
* w = Sw¹ (μ - μ)
*
* where Sw = Σ + Σ is the so-called within-class
* covariance matrix.
*/
gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) {
/* Construct the covariances of each class... */
gsl_matrix *cov1 = normal_cov(&c1->p);
gsl_matrix *cov2 = normal_cov(&c2->p);
/* and the mean values */
gsl_vector *mu1 = normal_mean(&c1->p);
gsl_vector *mu2 = normal_mean(&c2->p);
/* Compute the inverse of the within-class
* covariance Sw¹.
* Note: by definition Σ is symmetrical and
* positive-definite, so Cholesky is appropriate.
*/
gsl_matrix_add(cov1, cov2);
gsl_linalg_cholesky_decomp(cov1);
gsl_linalg_cholesky_invert(cov1);
/* Compute the difference of the means. */
gsl_vector *diff = gsl_vector_alloc(2);
gsl_vector_memcpy(diff, mu2);
gsl_vector_sub(diff, mu1);
/* Finally multiply diff by Sw.
* This uses the rather low-level CBLAS
* functions gsl_blas_dgemv:
*
* ___ double ___ 1 ___ nothing
* / / /
* dgemv computes y := α op(A)x + βy
* \ \__matrix-vector \____ 0
* \__ A is symmetric
*/
gsl_vector *w = gsl_vector_alloc(2);
gsl_blas_dgemv(
CblasNoTrans, // do nothing on A
1, // α = 1
cov1, // matrix A
diff, // vector x
0, // β = 0
w); // vector y
// free memory
gsl_matrix_free(cov1);
gsl_matrix_free(cov2);
gsl_vector_free(mu1);
gsl_vector_free(mu2);
gsl_vector_free(diff);
return w;
}
/* Computes the determinant from the
* Cholesky decomposition of matrix.
* In this case it's simply the product
* of the diagonal elements, squared.
*/
double gsl_linalg_cholesky_det(gsl_matrix *LL) {
gsl_vector diag = gsl_matrix_diagonal(LL).vector;
double det = 1;
for (size_t i = 0; i < LL->size1; i++)
det *= gsl_vector_get(&diag, i);
return det * det;
}
/* `fisher_cut(ratio, w, c1, c2)` computes
* the threshold (cut), on the line given by
* `w`, to discriminates the classes `c1`, `c2`;
* with `ratio` being the ratio of their prior
* probabilities.
*
* The cut is fixed by the condition of
* conditional probability being the
* same for each class:
*
* P(c|x) p(x|c)p(c)
* ------- = --------------- = 1;
* P(c|x) p(x|c)p(c)
*
* together with x = tw.
*
* If p(x|c) is a bivariate normal PDF the
* solution is found to be:
*
* t = (b/a) + ((b/a)² - c/a);
*
* where
*
* 1. a = (w, (Σ¹ - Σ¹)w)
* 2. b = (w, Σ¹μ - Σ¹μ)
* 3. c = (μ, Σ¹μ) - (μ, Σ¹μ) + log|Σ|/log|Σ| - 2 log(α)
* 4. α = p(c)/p(c)
*
*/
double fisher_cut(
double ratio,
gsl_vector *w,
sample_t *c1, sample_t *c2) {
/* Construct the covariances of each class... */
gsl_matrix *cov1 = normal_cov(&c1->p);
gsl_matrix *cov2 = normal_cov(&c2->p);
/* and the mean values */
gsl_vector *mu1 = normal_mean(&c1->p);
gsl_vector *mu2 = normal_mean(&c2->p);
/* Temporary vector/matrix for
* intermediate results.
*/
gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2);
gsl_vector *vtemp = gsl_vector_alloc(w->size);
/* Invert Σ₁ and Σ₂ in-place:
* we only need the inverse matrices
* in the steps to follow.
*/
gsl_linalg_cholesky_decomp(cov1);
gsl_linalg_cholesky_decomp(cov2);
// store determinant for later
double det1 = gsl_linalg_cholesky_det(cov1);
double det2 = gsl_linalg_cholesky_det(cov2);
gsl_linalg_cholesky_invert(cov1);
gsl_linalg_cholesky_invert(cov2);
/* Compute the first term:
*
* a = (w, (Σ¹ - Σ¹)w)
*
*/
// mtemp = cov1 - cov2
gsl_matrix_memcpy(mtemp, cov1);
gsl_matrix_sub(mtemp, cov2);
// vtemp = mtemp ⋅ vtemp
gsl_vector_memcpy(vtemp, w);
gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp);
// a = (w, vtemp)
double a; gsl_blas_ddot(w, vtemp, &a);
/* Compute the second term:
*
* b = (w, Σ¹μ - Σ¹μ)
*
*/
// vtemp = cov1 ⋅ mu1
// vtemp = cov2 ⋅ mu2 - vtemp
gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp);
gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp);
// b = (w, vtemp)
double b; gsl_blas_ddot(w, vtemp, &b);
/* Compute the last term:
*
* c = log|Σ|/|Σ| + (μ, Σ¹μ) - (μ, Σ¹μ)
*
*/
double c, temp;
c = log(det1 / det2) - 2*log(ratio);
gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, 0, vtemp);
gsl_blas_ddot(mu1, vtemp, &temp);
c += temp;
gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp);
gsl_blas_ddot(mu2, vtemp, &temp);
c -= temp;
/* To get the thresold value we have to
* multiply t by |w| if not normalised
*/
double norm; gsl_blas_ddot(w, w, &norm);
// free memory
gsl_vector_free(mu1);
gsl_vector_free(mu2);
gsl_vector_free(vtemp);
gsl_matrix_free(cov1);
gsl_matrix_free(cov2);
gsl_matrix_free(mtemp);
return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm;
}
/* `fisher_cut2(ratio, w, c1, c2)` computes
* the threshold (cut), on the line given by
* `w`, to discriminates the classes `c1`, `c2`;
* with `ratio` being the ratio of their prior
* probabilities.
*
* The cut is fixed by the condition of
* conditional probability being the
* same for each class:
*
* P(c|x) p(x|c)p(c)
* ------- = --------------- = 1;
* P(c|x) p(x|c)p(c)
*
* where p(x|c) is the probability for point x
* along the fisher projection line. If the classes
* are bivariate gaussian then p(x|c) is simply
* given by a normal distribution:
*
* Φ(μ=(w,μ), σ=(w,Σw))
*
* The solution is then
*
* t = (b/a) + ((b/a)² - c/a);
*
* where
*
* 1. a = S² - S²
* 2. b = MS² - MS²
* 3. c = M²S² - M²S² - 2S²S² log(α)
* 4. α = p(c)/p(c)
*
*/
double fisher_cut2(
double ratio,
gsl_vector *w,
sample_t *c1, sample_t *c2) {
gsl_vector *vtemp = gsl_vector_alloc(w->size);
/* Construct the covariances of each class... */
gsl_matrix *cov1 = normal_cov(&c1->p);
gsl_matrix *cov2 = normal_cov(&c2->p);
/* and the mean values */
gsl_vector *mu1 = normal_mean(&c1->p);
gsl_vector *mu2 = normal_mean(&c2->p);
/* Project the distribution onto the
* w line to get a 1D gaussian
*/
// mean
double m1; gsl_blas_ddot(w, mu1, &m1);
double m2; gsl_blas_ddot(w, mu2, &m2);
// variances
gsl_blas_dgemv(CblasNoTrans, 1, cov1, w, 0, vtemp);
double var1; gsl_blas_ddot(w, vtemp, &var1);
gsl_blas_dgemv(CblasNoTrans, 1, cov2, w, 0, vtemp);
double var2; gsl_blas_ddot(w, vtemp, &var2);
double a = var1 - var2;
double b = m2*var1 + m1*var2;
double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio);
// free memory
gsl_vector_free(mu1);
gsl_vector_free(mu2);
gsl_vector_free(vtemp);
gsl_matrix_free(cov1);
gsl_matrix_free(cov2);
return (b/a) + sqrt(pow(b/a, 2) - c/a);
}
int main(int argc, char **args) {
// initialize RNG
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
/* Generate two classes of normally
* distributed 2D points with different
* paramters: signal and noise.
*/
struct par par_sig = { 0, 0, 0.3, 0.3, 0.5 };
struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 };
// sample sizes
size_t nsig = 800;
size_t nnoise = 1000;
sample_t *signal = generate_normal(r, nsig, &par_sig);
sample_t *noise = generate_normal(r, nnoise, &par_noise);
/* Fisher linear discriminant
*
* First calculate the direction w onto
* which project the data points. Then the
* cut which determines the class for each
* projected point.
*/
gsl_vector *w = fisher_proj(signal, noise);
double t_cut = fisher_cut2(nsig / (double)nnoise,
w, signal, noise);
fputs("# Linear Fisher discriminant\n\n", stderr);
fprintf(stderr, "* w: [%.3f, %.3f]\n",
gsl_vector_get(w, 0),
gsl_vector_get(w, 1));
fprintf(stderr, "* t_cut: %.3f\n", t_cut);
gsl_vector_fprintf(stdout, w, "%g");
printf("%f\n", t_cut);
/* Print data to stdout for plotting.
* Note: we print the sizes to be able
* to set apart the two matrices.
*/
printf("%ld %ld %d\n", nsig, nnoise, 2);
gsl_matrix_fprintf(stdout, signal->data, "%g");
gsl_matrix_fprintf(stdout, noise->data, "%g");
// free memory
gsl_rng_free(r);
sample_t_free(signal);
sample_t_free(noise);
return EXIT_SUCCESS;
}

26
ex-7/plot.py Executable file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env python
from pylab import *
import sys
w = loadtxt(sys.stdin, max_rows=2)
cut = float(input())
n, m, d = map(int, input().split())
data = loadtxt(sys.stdin).reshape(n + m, d)
signal, noise = data[:n].T, data[n:].T
figure()
scatter(*signal, c='xkcd:grey blue', label='signal')
scatter(*noise, c='xkcd:baby blue', label='noise')
scatter(*(cut * w/np.dot(w,w)), c='red')
arrow(*(-abs(w)), *4*abs(w))
legend()
figure()
sig_proj = np.dot(w, signal)
noise_proj = np.dot(w, noise)
hist(sig_proj, color='xkcd:grey blue')
hist(noise_proj, color='xkcd:baby blue')
axvline(cut, c='r')
show()

33
makefile Normal file
View File

@ -0,0 +1,33 @@
GMP_FLAGS = -L$(GMP_PATH) -lgmp
GSL_FLAGS = $(shell pkg-config --libs gsl)
CFLAGS := -Wall --debug $(GMP_FLAGS) $(GSL_FLAGS)
CCOMPILE = \
mkdir -p $(@D); \
$(CC) $(CFLAGS) $^ -o $@
ex-1/bin/%: ex-1/%.c
$(CCOMPILE)
ex-2/bin/%: ex-2/%.c
$(CCOMPILE)
ex-3/bin/main: ex-3/common.c ex-3/likelihood.c ex-3/chisquared.c
$(CCOMPILE)
ex-4/bin/main: ex-4/main.c
$(CCOMPILE)
ex-5/bin/%: ex-5/%.c
$(CCOMPILE)
ex-6/bin/main: ex-6/rl.c ex-6/fft.c
$(CCOMPILE)
ex-7/main: ex-7/main.c
$(CCOMPILE)
misc/pdfs: misc/pdfs.c
clean:
rm -rf ex-*/bin

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

17
misc/lessons/makefile Normal file
View File

@ -0,0 +1,17 @@
sections = $(shell find sections/ -name '*.md' | sort -V)
pandoc = \
@ pandoc $(1) \
--toc \
--standalone \
--pdf-engine=xelatex \
-F pandoc-crossref \
-o $(2).pdf
all: lessons.pdf
clean:
rm lessons.pdf
lessons.pdf: $(sections) images
$(call pandoc, $(sections), lessons)

View File

@ -0,0 +1,23 @@
---
title: Analisi statistica
subtitle: Riassunto delle slides
numbersections: true
documentclass: article
fontsize: 12pt
geometry:
- width=150mm
- top=20mm
- bottom=30mm
header-includes: |
```{=latex}
% start new page with each section
\usepackage{etoolbox}
\usepackage{tikz}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{siunitx}
\pretocmd{\section}{\clearpage}{}{}
```
---

View File

@ -0,0 +1,140 @@
# Calcolo numerico
## Compilare un programma
Per eseguire un programma, un computer fa questa cosa: legge le informazioni
dall'input e le mette nella memoria; poi da qui legge le istruzioni
sequenzialmente e se c'è qualche calcolo da fare, lo fa fare alla ALU
(aritmetic logic unit) e memorizza il risultato nella memoria; alla fine passa
tutto in output.
Per essere eseguibile, un programma deve essere scritto in codice macchina
e per questo si usano i linguaggi di programmazione che, attraverso il
compilatore, vengono tradotti in codice macchina:
- codice sorgente,
- compilatore,
- codice oggetto,
- linker (aggiunge le librerie),
- codice eseguibile.
## Errori di implementazione
Quando si passa dal modello astratto a quello implementato, si distinguono
tre tipi di errori:
- errori analitici: causati dalla necessità di una aritmetica discreta,
cioè il fatto che non si possa rappresentare una cosa continua e quindi
la si fa a punti;
- errori inerenti o algoritmici (o di round-off): causati dal numero finito di
cifre significative. Dipendono dall'algoritmo utilizzato, perché ci sono
metodi migliori di altri per evitare questo tipo di problemi.
## Rappresentazione di un numero
L'obiettivo è quello di tenere sotto controllo gli errori di round-off.
Per rappresentare un numero, esistono diverse rappresentazioni, tra cui quella
di Von Neumann: si dedicano $n$ cifre significative alla mantissa e poi un tot
anche alla caratteristica, cioè all'ordine di grandezza (che generalmente è in
base 2).
Con $n$ cifre significative in base 2, il numero più alto rappresentabile è
$2^n -1$ perché con due cifre (0 e 1), ci sono $2^n$ possibili numeri
rappresentabili e, se posti in ordine crescente, ognuno dista dal precedente
un'unità, con il minimo che vale 0 (tutti 0), per cui il numero più alto che si
può rappresentare è $2^n -1$:
- 0 è l'1,
- 1 e il 2,
- ...$2^n -1$ è il $2^n$-esimo.
questo numero corrisponde a un numero in base dieci di $m$ cifre significative,
per cui:
$$
m = \log(2^n -1)
$$
quindi per un numero con 24 bit per la mantissa, si hanno circa 7 cifre
significative in base 10.
In precisione semplice (floating point):
- un bit per il segno,
- 23 bit per la mantissa (quindi 7 cifre significative),
- 8 bits per la caratteristica.
In precisione doppia (duble) si raddoppiano i bite alla mantissa e le cifre
significative diventano 17, però il tempo per le moltiplicazioni è triplicato
e si occupa più memoria, quindi non è che convenga moltissimo.
Esiste anche la rappresentazione fixed-point, in cui c'è un bit per il segno
e i restanti per il numero in sé, in cui da qualche parte c'è la virgola.
Nel caso del fixed pointd, l'errore relativo può variare moltissimo a seconda
del numero rappresentato, mentre per i float è lo stesso per ogni numero: la
densità relativa non è costante.
Quando un insieme di rappresentazione non è chiuso rispetto ad un'operazione,
il risultato presenta sicuramente errore di round-off e deve essere
approssimato.
## Errori algoritmici
Per numeri troppo grandi o troppo piccoli che non si riesce a
rappresentare, si parla rispettivamente di overflow o underflow. Per questo
motivo, somme e soprattutto differenze di numeri grandi portano grandi errori
dovuti all'arrotondamento: bisogna cercare di evitarlo se possibile. Per
esempio, per calcolare un polinomio, conviene usare il metodi di
Ruffini-Horner perché riduce il numero di operazioni da effettuare:
- metodo semplice:
$$
p(x) = a_0 + a_1x + a_2x^2 + ... +a_nx^n
$$
che sono n addizioni e n(n+1)/2 moltiplicazioni, quindi $\sim n^2$.
- metodo di R-H:
$$
p (x) = (((((a_n)x +a_{n-1})x +a_{n-2}x +...
$$
che sono n addizioni e n moltiplicazioni, quindi $\sim n$.
Il problema si verifica anche se si sottraggono due numeri molto simili tra
loro, perché il risultato è un numero molto piccolo che potrebbe finire in
underflow (si parla di "cancellazione catastrofica").
\textcolor{orange}{oss:} Il costo computazionale della risoluzione di un
sistema lineare in $n$ equazioni è asintoticamente uguale al costo del
prodotto di due matrici $nxn$. Esistono algoritmi che non richiedono più di
$k \cdot n^{\alpha}$ operazioni, col il più piccolo valore noto di $\alpha$
pari a 0.2375.
Alcuni algoritmi sono definiti "instabili": accade quando gli errori di
round-off si accumulano fino a portare a risultati completamente errati.
Esistono problemi detti "mal condizionati" che con qualsiasi algoritmo danno
errori talmente elevati da rendere il risultato privo di significato. In
questi casi, piccole variazioni dei dati iniziali portano a grandi variazioni
nei risultati. Si chiama "numero di condizionamento del problema" il seguente
rapporto:
$$
\frac{\Delta r}{\Delta \alpha} =
\frac{\text{\% errore risultato}}{\text{\% errore dato iniziale}}
$$
che può quindi essere espresso in questo modo:
$$
\Delta \alpha = \frac{x + h - x}{x} = \frac{h}{x}
\hspace{100pt}
\Delta r = \frac{f(x + h)-f(x)}{f(x)}
$$
$$
\frac{\Delta r}{\Delta \alpha} =
\frac{f(x + h)-f(x)}{h} \cdot \frac{x}{f(x)} =
f'(x) \cdot \frac{x}{f(x)}
$$
Se questo numero è $<< 1$, allora il problema è poco sensibile ai dati
iniziali.

View File

@ -0,0 +1,257 @@
# Probabilità
## Concetto di probabilità
Consideriamo un insieme S di eventi detto "spazio campionario" e due eventi
casuali A e B. Essi sono soggetti agli assiomi di Kolmogorov:
$\forall A \subseteq S, 0 \leqslant P(A) \leqslant 1$
$P(S) = 1$
$A \cap B = \varnothing \Longrightarrow P(A \cup B) = P(A) + P(B)$
Si definisce probabilità condizionata di A dato B:
$$
P(A|B) = \frac{P(A \cap B)}{P(B)}
$$
Se due eventi sono indipendenti, allora vale che:
$$
P(A \cap B) = P(A) \cdot P(B) \Longrightarrow
P(A|B) = \frac{P(A) \cdot P(B)}{P(B)} = P(A)
$$
Esistono diversi approcci per definire la probabilità:
- approccio frequentista:
$$
P(A) = \lim_{n \rightarrow +\infty} \frac{\#A}{n}
$$
ma c'è il problema che è impossibile fare un numero infinito di tentativi.
- approccio bayesiano:
utilizzare il teorema di Bayes, per esempio:
$$
P(\text{teoria|dati}) \propto P(\text{dati|teoria}) \cdot P(\text{teoria})
$$
si tratta quindi di un approccio "soggettivo", almeno per l'ultimo termine
dell'equazione qua sopra, perché la probabilità della validità della teoria
può essere data sulla base di ragionamenti e osservazioni riguardanti il
fenomeno di cui si sta parlando.
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
### Esempio di probabilità condizionata
Si osservano dei decadimenti e se ne misurano indipendentemente $N_a$ e $N_b$;
in comune ne sono stati misurati $N_{ab}$. Possiamo stimare il numero totale
di eventi $N$ e l'efficienza totale $\epsilon$.
$$
P(a) = \frac{N_a}{N} \hspace{100pt} P(b) = \frac{N_b}{N}
$$
$$
P(ab) = \frac{N_{ab}}{N} = P(a) \cdot P(b) = \frac{N_a \cdot N_b}{N^2}
\Longrightarrow N = \frac{N_a \cdot N_b}{N_{ab}}
$$
$$
\epsilon = P(a \cup b) = P(a) + P(b) -P(a \cap b) =
\frac{N_a + N_b - N_{ab}}{N}
$$
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
## Teorema di Bayes
$$
P(A|B) = \frac{P(A \cap B)}{P(B)}
\hspace{100pt}
P(B|A) = \frac{P(B \cap A)}{P(A)}
$$
$$
P(A|B) = \frac{P(B|A) \cdot P(A)}{P(B)}
$$
Dal teorema di Bayes è possibile dedurre il teorema della probabilità totale:
consideriamo un insieme $S$ diviso in sottoinsiemi disgiunti $A_i$ la cui
unione dà l'insieme di partenza:
$$
\cup_i A_i = S
$$
e consideriamo un insieme $B$ anch'esso interno ad $S$:
$$
B = B \cap S = B \cap (\cup_i A_i) = \cup_i (B \cap A_i)
$$
$$
\Longrightarrow P(B) = \sum_i P(B \cap A_i)
$$
$$
\Longrightarrow P(B) = \sum_i P(B|A_i)P(A_i)
$$
da cui, per uno specifico insieme $A_j$, attraverso il teorema di Bayes:
$$
P(A_j|B) = \frac{P(B|A_j) \cdot P(A_j)}{\sum_i P(B|A_i)P(A_i)}
$$
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
### Esempio di probabilità totale: segnale e fondo
Un rilevatore misura segnale e fondo con relative efficienze $P(R|S)$ e
$P(R|F)$. Se sono note a priori la probabilità di segnale e di fondo
$P(S)$ e $P(F)$, allora si può risalire alla probabilità, data una
misurazione, di aver misurato il segnale:
$$
P(S|R) = \frac{P(R|S) \cdot P(S)}{P(R|S) \cdot P(S) + P(R|F) \cdot P(F)}
$$
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
### Esempio di probabilità totale: AIDS
$$
P(\text{AIDS}) = 0.001 \hspace{50pt} P(\text{no AIDS}) = 0.999
$$
$$
P(\text{+|AIDS}) = 0.98 \hspace{50pt} P(\text{-|AIDS}) = 0.02
$$
$$
P(\text{+|no AIDS}) = 0.03 \hspace{50pt} P(\text{-|no AIDS}) = 0.97
$$
Quindi se il test è positivo, la probabilità di avere davvero preso l'AIDS è:
$$
P(\text{AIDS|+}) = \frac{P(\text{+|AIDS}) \cdot P(\text{AIDS})}
{P(\text{+|AIDS}) \cdot P(\text{AIDS}) + P(\text{+|no AIDS}) \cdot
P(\text{no AIDS})} = 0.032
$$
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
### Approccio bayesiano con una teoria
Cominciamo com un esempio: con un esperimento è stata misurata la massa di
un elettrone e sono stati trovati i valori {$m_i$}, di cui il valore medio
è $m_e = \SI{520 \pm 10}{KeV}$. Si assume quindi che il valore vero sia
compreso tra \SI{510}{KeV} e \SI{530}{KeV} con una probabilità del 68% data
dal confidence level. Per cui, la probabilità che la massa vera sia proprio
\SI{520}{KeV} è data da:
$$
P(m_e|{m_i}) = \frac{}{}
$$
\textcolor{red}{**rivedere negli appunti questa formula**}
Quando si fa una misurazione, si misurano $N$ valori e si calcolano il valore
medio $\bar{x}$ e la deviazione standard $\sigma$ e si dice che il risultato è:
$$
\mu = \bar{x} \pm \frac{\sigma}{\sqrt{N}}
$$
e di solito lo si interpreta dicendo che:
$$
P \left( \bar{x} - \frac{\sigma}{\sqrt{N}} \leqslant \mu \leqslant
\bar{x} + \frac{\sigma}{\sqrt{N}} \right) = 68\%
$$
ma in realtà quello che sappiamo è solo che:
$$
P \left( \mu - \frac{\sigma_{true}}{\sqrt{N}} \leqslant \bar{x} \leqslant
\mu + \frac{\sigma_{true}}{\sqrt{N}} \right) = 68\%
$$
**\textcolor{red}{WHAT?!?}**

View File

@ -0,0 +1,480 @@
# Statistica
## Distribuzioni di probabilità
Una funzione di densità di probabilità $f$ è definita in modo che la probabilità
che una variabile $x$ sia compresa tra $x$ e $x + dx$ sia data da:
$$
P(x \subset [x, x + dx]) = f(x)dx
$$
dunque vale che:
$$
\int\limits_{- \infty}^{+ \infty} dx f(x) = 1
$$
Si definisce funzione cumulante:
$$
F(x) = \int\limits_{- \infty}^x dx' f(x')
$$
e quantile di ordine $\alpha$ il valore di $x$ per cui $F(x) = \alpha$.
Nel caso multidimensionale in cui si abbiano due o più variabili, si parla di
joint pdf:
$$
f(x, y) \hspace{30pt} \Longrightarrow \hspace{30pt}
\int\limits_{- \infty}^{+ \infty} \int\limits_{- \infty}^{+ \infty}
dx dy f(x, y) = 1
$$
e si definiscono due distribuzioni marginali:
$$
f_x (x) = \int\limits_{- \infty}^{+ \infty} dy f(x, y)
\hspace{50pt}
f_y (y) = \int\limits_{- \infty}^{+ \infty} dx f(y, x)
$$
dunque due variabili $x$ e $y$ sono indipendenti se $f(x) = f_x(x) \cdot
f_y(y)$. Ora, se $A$ è l'evento di probabilità $f_x(x)dx$, mentre $B$ ha
probabilità $f_y(y)dy$, allora si possono definire le pdf condizionali come
segue:
$$
P(B|A) = \frac{P (A \cap B)}{P(A)} = \frac{f(x, y)dxdy}{f_x(x)dx}
\hspace{20pt} \Longrightarrow \hspace{20pt} h(y|x) = \frac{f(x, y)}{f_x(x)}
$$
per cui il teorema di Bayes diventa:
$$
g(x|y) = \frac{h(y|x)f_x(x)}{f_y(y)}
$$
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
### Paradosso di Borel-Kolmogorov
Si considerino dei punti distribuiti uniformemente sulla superficie del pianeta
Terra: ci si aspetterebbe che i punti siano uniformemente distribuiti anche
lungo un parallelo o un meridiano... ma consideriamo un meridiano: esso giace
per il 25% a nord del 45'esimo parallelo e quindi, secondo la logica di prima,
anche il 25% dei punti che si trovano su di esso. Però non è vero che il 45%
della superficie terrestre è al di sopra del 45'esimo parallelo!
Il paradosso è risolto perché non ci si può basare su un insieme di misura
nulla quale il meridiano (perché è unidimensionale). Lo si vede chiaramente
adottando la terminologia poc'anzi introdotta:
Se la distribuzione è uniforme, la probabilità di trovare un punto in una
certa superficie è dato dal rapporto tra l'angolo solido descritto da tale
superficie e l'angolo solido totale:
$$
f(\theta, \phi) d\theta d\phi= \frac{d\phi d\theta \cos(\theta)}{4 \pi}
$$
da cui è possibile determinare la due probabilità marginali:
$$
f_{\phi}(\phi) = \int\limits_{0}^{\pi} f(\theta, \phi) d\theta =
\int\limits_{0}^{\pi} \frac{\cos(\theta)}{4 \pi} = \frac{\cos(\theta)}{2}
$$
$$
f_{\theta}(\theta) = \int\limits_{0}^{2 \pi} f(\theta, \phi) d\phi =
\frac{1}{2 \pi}
$$
per cui si tratta di due costanti rispetto alle rispettive variabili. Da ciò
si può dunque dedurre che, mentre la densità lungo un parallelo è
effettivamente costante, lo stesso non si può dire riguardo a un meridiano.
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
Una funzione di una variabile casuale è essa stessa una variabile casuale.
Consideriamo la pdf $f(x)$ e una funzione $a(x)$ di cui si vuole trovare la pdf
$g(a)$. Nel caso in cui l'inversa di $a(x)$ sia univoca, definita $dS$ la
regione delle $x$ per cui $a \subset [a, a +da]$:
$$
g(a)da = \int\limits_{dS} dxf(x)
= \left| \int\limits_{x(a)}^{x(a +da)} f(x')dx' \right|
= \int\limits_{x(a) + \left| \frac{dx}{da} \right| da}^{x(a +da)} f(x')dx'
$$
Ovvero:
$$
g(a) = f(x(a)) \left| \frac{dx}{da} \right|
$$
e se $x(a)$ non è univoca, allora bisogna considerare tutti gli intervalli $dS$
di $dx$ che corrispondono a $da$.
Nel caso di funzioni di $N$ variabili, siccome vale che:
$$
g(a')da' = \int \dots \int\limits_{dS} f(x_1 \dots x_N) dx_1 \dots dx_N
$$
con $dS$ regione dello spazio delle $x$ compreso tra le isosuperfici:
$$
a(\vec{x}) = a' \hspace{10pt} \wedge \hspace{10pt} a(\vec{x}) = a' + da'
$$
Nel caso in cui $z = x \cdot y$, si trova la convoluzione di Mellin:
$$
g(z)dz = \int\limits_{dS} dxdy f(x, y)
= \int\limits_{-\infty}^{+\infty} dx
\int\limits_{\frac{z}{x}}^{\frac{z + dz}{x}} dy f(x, y)
$$
**\textcolor{red}{Non ho capito questa parte...}**
## Propagazione degli errori
Consideriamo una variabile $x$ con pdf $f(x)$. Si definisce valore di
aspettazione o media (e lo si indica spesso con $\mu$):
$$
E[x] = \int dx f(x) x
$$
Nel caso di una variabile $y(x)$ con pdf $g(x)$, invece:
$$
E[y] = \int dy \cdot y \cdot g(y) = \int dx f(x) g(x)
$$
Mentre si definisce varianza (e la si indica spesso con $\sigma^2$, mentre
con deviazione standard si intende $\sigma$):
$$
V[x] = E[x - E[x]^2] = E[x^2] - \mu^2
$$
Più in generale si definiscono 'momenti algebrici' $E[x^n] =\mu'_n$ con
$\mu'_1 = \mu$ e 'momenti centrali' $E[(x -\mu)^n] = \mu_n$ con $\mu_2 =
\sigma^2$.
Si definiscono inoltre due grandezze di correlazione. La covarianza:
$$
\text{cov} [x, y] = E[xy] - E[x]E[y] = E[xy] - \mu_x \mu_y
$$
che equivale a:
\begin{align*}
\text{cov} [x, y]
&= E[(x -\mu_x)(y -\mu_y)] \\
&= E[xy -x\mu_y -y\mu_x + \mu_x\mu_Y] \\
&= E[xy] -\mu_y E[x] -\mu_x E[y] + \mu_x \mu_y \\
&= E[xy] -\mu_y mu_x - \mu_x \mu_y + \mu_x \mu_y \\
&= E[xy] - \mu_x \mu_y
\end{align*}
Notare che se $x$ e $y$ sono indipendenti, allora $f(x, y) = f_x(x)f_y(y)$,
perciò:
$$
E[xy] = \int dx \int dy xy f(x, y) = \mu_x \mu_y
\hspace{20pt} \Longrightarrow \hspace{20pt} \text{cov} [x, y] = 0
$$
e il coefficiente di correlazione:
$$
\rho_{xy} = \frac{\text{cov} [xy]}{\sigma_x \sigma_y}
$$
![Esempio di correlazione tra due
grandezze.](images/correlazione.png){width=70%}
Anche se la $f(\vec{x})$ non è completamente nota, è comunque possibile stimare
il valore medio e la varianza di una grandezza $y(\vec{x})$ conoscendo solo le
stime di media e varianza della pdf. Espandiamo attraverso la serie di
Taylor:
$$
y(\vec{x}) = y(\vec{\mu}) + \sum_{i= 1}^N \left[
\frac{\partial y}{\partial x_i} \right]_{\vec{x}
= \vec{\mu}} (x_i - \mu_i)
$$
$$
\Longrightarrow \hspace{20pt} E[y] = y(\vec{\mu})
\Longleftarrow \hspace{20pt} E[x_i] = \mu_i
$$
Mentre per la varianza servono $E[y^2]$ ed $E[y]$. Sempre passando
attraverso uno sviluppo di Taylor attorno al valore medio:
\begin{align*}
E[y^2] &= y^2(\vec{\mu}) + 2y(\vec{\mu}) \sum_{i = 1}^{N}
\left[ \frac{\partial y} {\partial x_i} \right]_{\vec{x} = \vec{\mu}}
E[x_i - \mu_i] \\
&+ E \left[ \left( \sum_{i_1}^N
\left[ \frac{\partial y}{\partial x_i} \right]_{\vec{x} = \vec{\mu}}
(x_i - \mu_i) \right) \left( \sum_{j = 1}^N \left[ \frac{\partial y}
{\partial x_i} \right]_{\vec{x} = \vec{\mu}} (x_j - \mu_j) \right) \right]
\end{align*}
Siccome il secondo termine si annulla sempre perché $E[x_i] = \mu_i$, allora
rimane che:
$$
V[y] = E[y^2] - E[y]^2 = \sigma_y^2 = \sum_{i,j = 1}^N \left[
\frac{\partial y}{\partial x_i}
\frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij}
$$
Con $V_{ij}$ che è la matrice di covarianza, che ha come entrate:
$$
V_{ij} = E[(x_i - \mu_i)(x_j - \mu_j)] = \rho_{ij} \sigma_i \sigma_j
$$
e quindi, nel caso in cui le variabili siano scorrelate, si ottiene che:
$$
V_{ij} = \sigma_i^2 \delta_{ij}
\hspace{20pt} \Longrightarrow \hspace{20pt}
\sigma_y^2 = \sum_{i = 1}^N \left[ \frac{\partial y}{\partial x_i}
\right]_{\vec{x} = \vec{\mu}}^2 \sigma_i^2
$$
Cioè dice quanto cambia la $y$ al variare del 'dato iniziale' $\vec{x}$.
Ma quindi, per quanto visto prima:
$$
\text{cov} [x_i, x_j] = E[(x_i - \mu_i)(x_j - \mu_j)] = V_{ij}
$$
Più in generale, date $\vec{y}$ variabili dipendenti da $\vec{x}$, vale che:
$$
U = AVA^T \hspace{30pt} \text{con} \hspace{30pt} A_{ij} = \left[
\frac{\partial y_i}{\partial x_j} \right]_{\vec{x} = \vec{\mu}}
\hspace{30pt} \text{e con} \hspace{30pt} U_{kl} = \text{cov}[y_k, y_l]
$$
dove $U$ è detta matrice di covarianza delle $y$.
Attenzione: quanto detto fin'ora, che descrive in che modo gli errori di
$\vec{x}$ influenzano $y$, vale solo nel caso in cui $y$ sia lineare nelle $x$.
Quindi, in casi come $y(x) = 1/x$, non si può fare questo discorso.
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
### Esempio
Consideriamo:
$$
y = x_1 - x_2
$$
$$
\text{con} \hspace{30pt} \mu_1 = \mu_2 = 10 \hspace{30pt} \wedge
\hspace{30pt} \sigma_1 = \sigma_2 = 1
$$
allora abbiamo che $y = y(x_1, x_2)$, quindi:
$$
E[y] = y(\mu_1, \mu_2) = 10 - 10 = 0
$$
$$
V[y] = \sum_{i, j = 1}^2 \left[ \frac{\partial y}{\partial x_i}
\frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij} =
1 \cdot V_{11} + 1 \cdot V_{22} -1 \cdot 2 \cdot V_{12}
$$
Se le correlazioni sono nulle, allora $V_{12} = 0 \Longrightarrow V[y] = 2
\Longrightarrow \sigma_y = 1.4$, se invece $x_1$ e $x_2$ sono correlate, nel
caso in cui il coefficiente di correlazione sia unitario si ha che $V[y] =
0$. Quindi la correlazione può cambiare di molto le cose.
\begin{tikzpicture}
\draw [thick, pink] (0,0) -- (1,0);
\draw [red] (1.5,0) circle [radius=0.1];
\draw [thick, pink] (2,0) -- (3,0);
\draw [red] (3.5,0) circle [radius=0.1];
\draw [thick, pink] (4,0) -- (5,0);
\draw [red] (5.5,0) circle [radius=0.1];
\draw [thick, pink] (6,0) -- (7,0);
\draw [red] (7.5,0) circle [radius=0.1];
\draw [thick, pink] (8,0) -- (9,0);
\draw [red] (9.5,0) circle [radius=0.1];
\draw [thick, pink] (10,0) -- (11,0);
\draw [red] (11.5,0) circle [radius=0.1];
\draw [thick, pink] (12,0) -- (13,0);
\draw [red] (13.5,0) circle [radius=0.1];
\draw [thick, pink] (14,0) -- (15,0);
\end{tikzpicture}
### Errori sistematici
Consideriamo due grandezze $x_1$ e $x_2$ con un errore sistematico in comune
$S$:
\begin{align*}
&x_1 = x_{1_0} + x_{1_s} \\
&x_2 = x_{2_0} + x_{2_s}
\end{align*}
si avrà che i termini con pedice $0$ sono indipendenti tra loro, mentre gli
altri due saranno correlati. Dato che gli errori si sommano in quadratura, la
matrice di covarianza sarà quindi:
$$
\text{cov}[x_1, x_2] = S^2 \hspace{30pt} \Longrightarrow \hspace{30pt}
V = \begin{pmatrix}
\sigma_1^2 + S^2 & S^2\\
S^2 & \sigma_2^2 + S^2
\end{pmatrix}
$$
perché:
\begin{align*}
\text{cov}[x_1, x_2] &= E[x_1 x_2] - E[x_1]E[x_2] = \\
&= E[(x_{1_0} + x_{1_s})(x_{2_0} + x_{2_s})] - E[x_{1_0} + x_{1_s}]
E[x_{2_0} + x_{2_s}] = \\
&= E[x_{1_0}x_{2_0}] + E[x_{1_0}x_{2_s}] + E[x_{1_s}x_{2_0}]
+ E[x_{1_s}x_{2_s}] + \\
&\hspace{13pt} - E[x_{1_0}]E[x_{2_0}] - E[x_{1_0}]E[x_{2_2}]
- E[x_{1_s}]E[x_{2_0}] - E[x_{1_s}]E[x_{2_s}] = \\
&= \mu_1 \mu_2 + \mu_1E[x_{2_s}] + E[x_{1_s}]\mu_2 +E[x_{1_s}x_{2_s}] + \\
&\hspace{13pt} - \mu_1 \mu_2 - \mu_1 E[x_{2_s}] - E[x_{1_s}] \mu_2
- E[x_{1_s}] E[x_{2_s}] = \\
&= E[x_{1_s} x_{2_s}] - E[x_{1_s}] E[x_{2_s}] = \text{cov}[x_{1_s}, x_{2_s}]
\end{align*}
### Trasformazione ortogonale
Può tornare utile fare un cambio di variabile che permetta di ottenere una
matrice di covarianza delle $y$ diagonale.
Consideriamo le solite variabili $Y_i$ legate linearmente alle $x_j$:
$$
y_i = \sum_j = A^i_j x_j
\hspace{30pt} \Longrightarrow \hspace{30pt}
U_{ij} = \sum_{k,l} A_{ik} V_{kl} A^T_{lj}
$$
Si tratta quindi di diagonalizzare la matrice $U$: la soluzione è semplice,
la matrice $A$ è quella formata dagli autovalori di $V$. Questo concetto è
utile nel caso della scelta delle coordinate da utilizzare.
Se immaginiamo di star utilizzando le coordinate polari $\vec{x} = (x, y)$, la
matrice di covarianza sarà:
$$
V = \begin{pmatrix}
\sigma_1^2 & \rho \sigma_1 \sigma_2 \\
\rho \sigma_1 \sigma_2 & \sigma_2^2
\end{pmatrix}
$$
Diagonaliziamola: prima di tutto troviamo gli autovalori della matrice $V -
\lambda I$:
$$
\begin{vmatrix}
\sigma_1^2 - \lambda & \rho \sigma_1 \sigma_2 \\
\rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda
\end{vmatrix}
= (\sigma_1^2 - \lambda) (\sigma_2^2 - \lambda) - \rho^2 \sigma_1^2
\sigma_2^2 = 0
$$
\begin{align*}
&\Longrightarrow \hspace{30pt}
\lambda^2 -(\sigma_1^2 + \sigma_2^2) \lambda + \sigma_1^2 \sigma_2^2
(1 - \rho^2) = 0 \\
&\Longrightarrow \hspace{30pt}
\lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm
\sqrt{\sigma_1^4 + \sigma_2^4 +2 \sigma_1^2 \sigma_2^2
- 4 \sigma_1^2 \sigma_2^2 + 4\rho^2 \sigma_1^2 \sigma_2^2}}{2} = \\
&\Longrightarrow \hspace{30pt}
\lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm
\sqrt{(\sigma_1^2 - \sigma_2^2)^2 +4 \rho^2 \sigma_1^2 \sigma_2^2}}{2}
\end{align*}
e ora calcoliamo gli autovettori:
$$
(V - \lambda I)\vec{r} = 0
\hspace{30pt} \Longrightarrow \hspace{30pt}
\begin{pmatrix}
\sigma_1^2 - \lambda & \rho \sigma_1 \sigma_2 \\
\rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda
\end{pmatrix}
\begin{pmatrix}
r_1 \\
r_2
\end{pmatrix}
=
\begin{pmatrix}
r_1 \\
r_2
\end{pmatrix}
$$
$$
\Longrightarrow \hspace{30pt}
\begin{cases}
(\sigma_1^2 - \lambda)r_1 + \rho \sigma_1 \sigma_2 r_2 = r_1 \\
\rho \sigma_1 \sigma_2 r_1 + (\sigma_2^2 - \lambda)r_2 = r_2
\end{cases}
\hspace{30pt} \Longrightarrow \hspace{30pt}
r_1 = \frac{}{} r_2
$$
eccetera eccetera...

View File

@ -0,0 +1,466 @@
Distribuzioni di probabilità
## Distribuzione binomiale
![Distribuzione binomiale: p = 0.3, n = 10,
N = 1000.](images/binomiale.png)
Si considerino $N$ tentativi di un esperimento che può avere come esiti
soltanto successo o fallimento e che la probabilità di ogni successo sia $p$.
Definiamo $n$ il numero dei successi. Dunque la probabilità di ottenere $n$
successi su $N$ tentativi totali è data da:
$$
P(N, n, p) = \binom{N}{n} p^n (1 - p)^{N -n}
$$
perché:
- la probabilità che un successo si verifichi è $p$;
- la probabilità che $k$ successi si verifichino è data dal prodotto di tutte
le probabilità: $p^n$;
- lo stesso discorso vale per gli insuccessi: ognuno ha probabilità $(1_p)$ e
se ne verificano $N -n$;
- il termine binomiale rappresenta tutte le possibili permutazioni: il
concetto è semplice se si immagina di posizionare successi e fallimenti
all'interno di una griglia con $N$ possibili posizioni: un successo è un
pallino bianco e un fallimento è un pallino nero. In quanti posti posso
mettere il primo successo? $N$. E il secondo? $N -1$. E il terzo? E così
via, finché ho messo tutti i successi, che occupano $n$ posizioni, l'ultima
delle quali è stata scelta tra $N - (n -1)$ posizioni, per cui:
$$
N \cdot (N -1) \dots (N -n + 1) =
\frac{N \cdot (N -1) \dots (N -n + 1) \cdot 2 \cdot 1}
{(N -n) \cdot (N - n -1) \dots 2 \cdot 1} = \frac{N!}{(N -n)!}
$$
ma non bisogna considerare che poi tutte le posizioni delle palline nere
sono uguali, quindi va ulteriormente diviso per $n!$ per le stesse ragioni.
Da cui:
$$
\binom{N}{n} = \frac{N!}{n! (N -n)!}
$$
Per la normalizzazione, vale che:
$$
\sum_{n = 0}^N P(N, n, p) = 1
$$
Possiamo definire un valore di aspettazione e una varianza:
\begin{align*}
&E[n] = \sum_{n = 0}^N n P(N, n, p) = Np \\
&V[n] = E[n^2] - E[n]^2 = Np(1 -p)
\end{align*}
## Distribuzione multinomiale
È la generalizzazione della pdf precedente nel caso in cui ci siano $m$
possibili risultati, ciascuno con una probabilità $P_m$ di verificarsi. Per
esempio, è il caso di un istogramma riguardo al quale ci si domanda quale sia la
probabilità di trovarlo esattamente con quelle specifiche entrate.
La probabilità è conseguentemente data da:
$$
P(N, \vec{n}, \vec{p}) = \frac{N!}{n_1! n_2! \dots n_m!}
p_1^{n_1} p_2^{n_2} \dots p_m^{n_m}
$$
E come valore di aspettazione e deviazione standard si ottiene che:
\begin{align*}
&E[n_i] = Np_i \\
&V[n_i] = Np_i (1 -p_i)
\end{align*}
### Legge dei grandi numeri
La legge dei grandi numeri afferma che la media sperimentale di una variabile
$x$, per un numero di tentativi $N$ che tende all'infinito, si avvicina molto
alla media vera. Questa legge può essere utilizzata per stimare le probabilità
$P_i$ di una distribuzione muiltinomiale tramite le frequenze con cui i diversi
eventi si verificano.
Si consideri la frequenza $f_j$ con cui l'evento j-esimo si verifica, dato un
set di $N$ tentativi:
$$
f_j = \frac{1}{N} \sum_{i = 1}^N x_i = \frac{x_j}{N}
$$
dove $x_i$ è una variabile che vale 1 se l'evento j-esimo si è verificato e
vale 0 quando se ne è verificato un altro e $x_j$ è quindi il numero di volte
che l'evento j-esimo si è verificato.
A differenza di $P_j$, $f_j$ è una variabile casuale perché dipende da $x_j$
che è la somma di variabili casuali. Definiamo valore medio:
$$
E(f_j) = \frac{E(x_j)}{N} = P_j
$$
e calcoliamo la varianza:
$$
V \left[ f_j \right] = V \left[ \frac{x_j}{N} \right] =
E \left[ \frac{x_j^2}{N^2} \right] - \left( E \left[ \frac{x_j}{N}
\right] \right)^2 = \frac{1}{N^2} V \left[ x_j \right]
$$
ora, $x_j$ è esattamente $n_j$ della multinomiale, perciò:
$$
V[x_j] = NP_j (1 - P_j)
\hspace{30pt} \Longrightarrow \hspace{30pt}
V[f_j] = \frac{1}{N} P_j (1 - P_j) \leqslant \frac{1}{N}
$$
## Distribuzione di Poisson
![Distribuzione Poissoniana: $\nu$ = 1,
N = 1000.](images/poisson.png)
Se si considera la distribuzione binomiale e ci si pone nel limite in cui il
numero di tentativi ripetuti tenda all'infinito e che la probabilità di
successo tenda a zero (con il vincolo che $N \cdot p = cost = \nu$), si ottiene
la distribuzione di Poisson:
$$
P(N, n, \nu) = \frac{\nu^n}{n!} e^{-\nu}
$$
con:
\begin{align*}
&E[n] = \nu \\
&V[n] = \nu
\end{align*}
dove $\nu = NP = \frac{}{}$ numero medio di successi.
Quando $N$ è talmente grande da non essere definito (come nel caso in cui si
osservino i decadimenti di un atomo e dunque i tentativi sono le osservazioni,
che sono dunque continue), non è più possibile definire una probabilità di
successo per ogni evento (perché sarebbe nulla, da cui il motivo per cui
la Poissoniana è definita con questi due limiti), e quindi $\nu$ va definita in
un altro modo. Infatti la distribuzione di Poisson, trattandosi di un limite in
$N$ e $p$, non dipende più esplicitamente da queste due grandezze.
Nel caso in cui si osservi il decadimento di un atomo, si è soliti procedere in
questo modo: si suddivide il tempo di osservazione in intervalli (il che
significa aver suddiviso gli infiniti tentativi in sottoinsiemi di infiniti
tentativi) e si misura quante volte in ognuno di questi intervalli si verifica
un successo. L'esperimento è ora praticamente suddiviso in più esperimenti
minori da cui è possibile dedurre un numero medio frequentistico di successi.
Per esempio:
----------------------------------------------------------
# successi 0 1 2 3 4 5 6 7
------------- ------ ----- ----- ---- ---- --- ----- -----
# intervalli 1042 860 307 78 15 3 0 0
Poisson 1064 823 318 82 16 2 0.3 0.3
----------------------------------------------------------
Table: Decadimento di un atomo. Il tempo di osservazione è stato suddiviso in
intervalli e per ogni intervallo è stato contato il numero di successi
osservati.
\newpage
Il numero medio di eventi è:
$$
\frac{1042 \cdot 0 + 860 \cdot 1 + 307 \cdot 2 + 78 \cdot 3 + 15 \cdot 4
+ 3 \cdot 5 + 0 \cdot 6 + 0 \cdot 7}{1064 + 860 + 307 + 78 + 15 + 3
+ 0 + 0} = 0.77
$$
Da cui è possibile calcolare i valori sempre riportati nella tabella precedente.
## Distribuzione uniforme
![Distribuzione uniforme: $a = 0$, $b = 100$.](images/uniform.png)
Una pdf di numeri che hanno tutti uguale probabilità di verificarsi è detta
uniforme:
$$
P (n, a, b) = \begin{cases}
\frac{1}{b - a} \hspace{30pt} a \leqslant x \leqslant b \\
0 \hspace{42pt} \text{altrove}
\end{cases}
$$
con:
\begin{align*}
E[n] = \frac{1}{2} (a + b) \\
V[n] = \frac{1}{12} (a + b)^2
\end{align*}
Se una variabile è distribuita secondo una pdf $f(x)$, la sua cumulante è
uniformemente distribuita. Intuitivamente è semplice perché basta vederla in
questo modo: si immagini il grafico della pdf; ogni volta che si estrae un
numero, questo cadrà in un punto casuale nell'area al di sotto della pdf,
lasciando uno spazio casuale alla sua sinistra (che è il valore della
cumulante)
## Distribuzione Gaussiana e CLT
![Distribuzione Gaussiana: $\mu = 30$,
$\sigma = 5$.](images/gaussian.png)
La distribuzione Gaussiana (o normale) è definita come:
$$
P (x, \mu, \sigma) = \frac{1}{\sqrt{2 \pi} \sigma}
e^{\frac{(x - \mu)^2}{2 \sigma^2}}
$$
con:
\begin{align*}
E[x] = \mu \\
V[x] = \sigma^2
\end{align*}
La error function, che è la cumulativa di questa pdf, è molto utile in
laboratorio e i suoi valori sono tabulati.
Il teorema centrale del limite afferma che date $n$ variabili casuali
indipendenti distribuite con una pdf comune e varianze $\sigma_i^2$, nel
limite in qui $n \rightarrow + \infty$, la somma di queste variabili segue un
andamento gaussiano con valore medio la somma dei valori medi e varianza la
somma delle varianze.
Ciò può essere sfruttato per generare numeri casuali distribuiti secondo una
distribuzione normale.
Per grandi valori di $\mu$ (vale a dire qualche unità), la distribuzione di
Poisson tende a quella Gaussiana con $\mu = \nu$ e $\sigma = \sqrt{\nu}$.
Analogamente per $N \rightarrow + \infty$ la binomiale tende alla Gaussiana
con $\mu = Np$ e $\sigma = \sqrt{Np (1 - p)}$.
## Distribuzione Gaussiana multivariata
Nel caso multidimensionale, la pdf per il vettore $\vec{x} = {x_1 ... n_n}$ è
data da:
$$
f(\vec{x}, \vec{\mu}, V) = \frac{1}{(2 \pi)^{N/2} \mid V \mid^{1/2}}
\exp \left[ - \frac{1}{2} (\vec{x} - \vec{\mu})^t V^{-1} (\vec{x}
- \vec{\mu}) \right]
$$
con $E[x_i] = \mu_i$ e $\text{cov}[x_i, x_j] = V_{ij}$
## Media pesata
Quando si hanno misure con diversi errori, vanno combinate attraverso il
concetto di media pesata:
$$
E[x] = \frac{\sum_{i = 1}^N \frac{x_i}{\sigma_i^2}}{\sum_{i = 1}^N
\frac{1}{\sigma_i^2}}
$$
$$
V[x] = \frac{1}{\sum_{i = 1}^N \frac{1}{\sigma_i^2}}
$$
Ma non ha senso mediare valori che non sono compatibili!
## Distribuzione di Breit-Wigner
![Distribuzione di Breit-Wigner: $x_0 = 20$,
$\Gamma = 10$.](images/wigner.png)
Esistono alcune distribuzioni che hanno momenti non ben definiti e che per
questo si dicono "patologiche". Un esempio è la distribuzione di Breit-Wigner:
$$
f (x, \Gamma, x_o) = \frac{1}{\pi} \cdot
\frac{\Gamma/2}{\Gamma^2/4 + (x - x_0)^2}
$$
Un caso particolare è quello in cui $x_0 = 0$ e $\Gamma = 2$, caso in cui è
detta distribuzione di Cauchy:
$$
f(x, 2, 0) = f(x) = \frac{1}{\pi} \cdot \frac{1}{1 + x^2}
$$
Il valore medio e la varianza non sono definiti perché l'integrale è
divergente. Conviene usare la moda e l'ampiezza a mezza altezza, che sono
rispettivamente $x_0$ e $\Gamma$.
Nella libreria *GSL*, la pdf è scritta in questo modo:
$$
p(x) = \frac{1}{a \pi (1 + (x/a))^2}
\hspace{50pt} \Longrightarrow \hspace{50pt}
a = \Gamma/2
$$
## Distribuzione di Landau
Per una particella carica con $\beta = v/c$ che attraversa un materiale sottile
di spessore $d$, la perdita di energia $\Delta$ segue la distribuzione di
Landau:
![Distribuzione di Landau.](images/landau.png)
Ha una forma complicatissima che racchiude integrali, logaritmi... Anche in
questo caso non si possono definire i momenti algebrici perché l'integrale
diverge.
## Distribuzione del chi-quadro
![Distribuzione del $\chi^2$: $n = 5$.](images/chi2.png)
Date $N$ grandezze distribuite ciascuna con una propria distribuzione
Gaussiana, la somma dei loro quadrati segue la distribuzione $\chi^2$.
Formalmente è definita così:
$$
f(z, n) \frac{1}{2^{n/2} \Gamma (n/2)} z^{n/2 - 1}e^{-z/2}
$$
\begin{align*}
&E[z] = n \\
&V[z] = 2n
\end{align*}
dove $z$ è la variabile e $n$ è il numero di fradi di libertà.
Quando si fa un esperimento e si campiona $y(x)$ e poi si fittano i dati
trovati con una funzione teorica $f(x)$, ciascun valore $y(x)$ si assume
distribuito come una gaussiana attorno al suo valore vero, che assumiamo
essere $f(x)$: dunque i residui, che sono la differenza $R(x)= y(x) - f(x)$,
sono ancora una gaussiana, ma centrata in zero. Il chi quadro è definito come:
$$
\sum_i \frac{[y(x_i) - f(x_i)]^2}{f(x_i)}
\hspace{50pt} \text{oppure} \hspace{50pt}
\sum_i \frac{[y(x_i) - f(x_i)]^2}{\sigma_i^2}
$$
Ne consegue che il chi quadro segua appunto la distribuzione del chi quadro.
Nella libreria *GSL* la distribuzione $\chi^2$ corrisponde alla distribuzione
gamma con $a = n/2$ e $b = 2$.
## Distribuzione esponenziale
![Distribuzione del esponenziale: $\lambda = 3$.](images/exponential.png)
$$
f(x, \lambda) = \lambda e^{-\lambda x}
$$
\begin{align*}
&E[z] = \frac{1}{\lambda} \\
&V[z] = \frac{1}{\lambda^2}
\end{align*}
## Distribuzione t di Student
![Distribuzione t di Student: $\nu = 3$.](images/student.png)
È la distribuzione seguita dalla media di una popolazione gaussiana quando
la si stima con un piccolo campione e senza conoscere la deviazione standard.
Se $y_1$ è distribuita come una Gaussiana e $y_2$ come un $\chi^2$, se $\nu$
sono i gradi di libertà, allora $x$ segue la t di Student:
$$
x = \frac{y_1}{\sqrt{\frac{y_2}{\nu}}}
$$
che è così definita:
$$
f(x, \nu) = \frac{\Gamma \left( \frac{\nu + 1}{2} \right)}{\sqrt{\nu \pi}
\Gamma \left( \frac{\nu}{2} \right)} \left( 1 + \frac{x^2}{\nu}
\right)^{- \frac{\nu + 1}{2}}
$$
\begin{align*}
&E[z] = 0 \\
&V[z] =
\begin{cases}
\frac{\nu}{\nu - 2} \hspace{15pt} \nu \greater 2 \\
\infty \hspace{30pt} \nu \leqslant 2
\end{cases}
\end{align*}
## Distribuzione di Fischer-Snedecor
![Distribuzione di Fischer: $n = 3$, $m = 4$.](images/fischer.png)
Se si hanno due campioni $\vec{x}$ e $\vec{y}$ di variabili che seguono le
rispettive Gaussiane, si può usare la distribuzione di Fisher-Snedecor per
comparare le due varianze. Se nel primo caso le variabili sono $n$ e nel
secondo sono $m$, allora la distribuzione di Fisher con gradi di libertà
$n-1$ e $m-1$ dà la distribuzione del rapporto:
$$
\frac{S^2_x / S^2_y}{\sigma^2_x / \sigma^2_y} =
\frac{S^2_x / \sigma^2_x}{S^2_y / \sigma^2_y}
$$
con:
$$
S^2_x = \frac{1}{n -1} \sum_{i = 1}^n (x_i - \mu_i)^2
\hspace{50pt} \text{,} \hspace{50pt}
S^2_y = \frac{1}{m -1} \sum_{i = 1}^m (y_i - \mu_i)^2
$$
che quindi è il rapporto di due grandezze distribuite secondo il $chi^2$.
La definizione della pdf è complicata...
## Funzione caratteristica
Si definisce funzione caratteristica di una variabile $x$ distribuita secondo
una $f(x)$, la trasformata di Fourier di quest'ultima:
$$
\hat{f}(k) = E[e^{ikx}] = \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx}
$$
come per ogni trasformata, tutte le informazioni contenute nella funzione
originaria sono contenute anche nella funzione caratteristica, perché per
tornare alla prima è sufficiente calcolare la trasformata inversa:
$$
f(x) = \frac{1}{2 \pi} \int\limits_{-\infty}^{+\infty} dx \hat{f}(k) e^{-ikx}
$$
la funzione caratteristica è utile per semplificare alcuni conti. Se
$x_1 \dots x_N$ sono variabili casuali indipendenti:
\begin{align*}
z = \sum_{i = 1}^N x_i
\hspace{20pt} \Longrightarrow \hspace{20pt}
\hat{f}_z(k) &= \int dx_1 \dots dx_N f_1(x_1)
\dots f_N(x_N) e^{ik \sum_{i=1}^N x_i} = \\
&= \int dx_1 f_1(x_1) e^{ikx_1} \dots \int dx_N f_N(x_N) e^{ikx_N} = \\
&= \hat{f}_1(k) \dots \hat{f}_N(k)
\end{align*}
Inoltre vale anche che:
$$
\frac{d^m}{dk^m} \hat{f}(k) \big|_{k = 0} =
\frac{d^m}{dk^m}
\int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} \big|_{k = 0} =
i^m \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} x^m \big|_{k = 0} =
i^m \mu_m = i^m E[x^m]
$$
che è il momento algebrico di ordine $m$.
Per esempio, nel caso di due variabili indipendenti $x$ e $y$ gaussiane, si
può notare subito che la loro somma è una gaussiana con $\mu = \mu_x + \mu_y$ e
$\sigma^2 = \sigma_x^2 + \sigma_y^2$. Analogamente per la Poissoniana.
Inoltre è facile osservare quale sia il comportamento delle pdf nei vari limiti
che abbiamo visto in precedenza: se si manda $N \rightarrow \infty$ mantenendo
il valore medio costante nella funzione caratteristica di una binomiale, si
ottiene la funzione caratteristica di una Poissoniana. Anche il teorema
centrale del limite si può dimostrare in questo modo.

View File

@ -0,0 +1,281 @@
# BPH
## Statistica descrittiva
Si possono distinguere due tipi di analisi dei dati: "model independent"
(statistica descrittiva) e "model dependent", che si basano su un modello
teorico. In questo capitolo studiamo quelli del primo tipo.
Alcuni argomenti tipici di statistica descrittiva sono:
- test per stabilire se due datasets provengono dalla stessa distribuzione
$f(x)$;
- test per stabilire la correlazione tra due datasets (test di ipotesi);
- metodi per determinare i momenti di una distribuzione;
- metodi per lo smoothing dei dati sperimentali.
### Momenti di una distribuzione
Definire i momenti di una distribuzione ha senso quando gli eventi che la
costituiscono hanno la tendenza ad agglomerarsi attorno ad un valore centrale.
Se i dati sono discreti, si usano le seguenti definizioni:
Media campionaria: se $n_i$ è la frequenza con cui si presenta ciascun valore
$x_j$:
$$
\bar{x} = \frac{1}{N} \sum_{i = 1}^N x_i
= \frac{1}{N} \sum_{j = 1}^{N'} n_j x_j
$$
Momento centrale di ordine $r$:
$$
V_r = \frac{1}{N} \sum_{j=1}^{N'} n_j^r (x_j - \bar{x})^r
$$
Esistono anche altri due valori "centrali", che nel caso continuo diventano:
mediana:
$$
\int\limits_{-\infty}^{x_{\text{med}}} dx \, f(x) = \frac{1}{2}
$$
moda: valore per cui $f(x)$ è massima, ovvero valore che si ripete con
maggiore frequenza.
Se la pdf ha code molto estese, è possibile che gli integrali non convergano
e questi valori non siano definiti. Per questo motivo la mediana è uno
stimatore del valore centrale più robusto della media.
I momenti centrali definiscono il modo in cui i dati si distribuiscono attorno
al valore centrale: quanto sono "diffusi". Il primo è la varianza (si noti la
correzione di Bessel per cui $N \rightarrow N -1$ al denominatore):
$$
V = \frac{1}{N - 1} \sum_{i = 1}^N (x_i - \bar{x})^2
$$
La skewness (letteralmente "asimmetria") descrive quanto i valori siano
distribuiti in modo disuniforme attorno al valore medio:
$$
\gamma = \frac{1}{\sigma^3} E[(x - \bar{x})^3]
$$
dove $\sigma$ è la deviazione standard.
![Skewness.](images/skewness.png){width=12cm}
Quanto una pdf è più o meno piccata rispetto ad una gaussuana è dato dalla
kurtosis ("curved", "arching"):
$$
K = \frac{1}{\sigma^4} E[(x -\bar{x})^4] -3
$$
![Kurtosis.](images/kurtosis.png){width=8cm}
Esiste una stima per le deviazioni standard di questi parametri nel caso di
distribizioni circa gaussiane:
\begin{align*}
&V(\sigma^2) = \frac{2 \sigma^4}{N} \\
&V(\gamma) \approx \frac{15}{N} \\
&V(K) \approx \frac{96}{N}
\end{align*}
### Smoothing dei dati
Lo smoothing dei dati si rende necessario quando i dati sono corrotti da un
rumore casuale. Solitamente si attua una media su finestre che inglobano dati
contigui. Fare una media, però, significa abbassare inevitabilmente il valore
nei picchi, perché la maggior parte delle volte conservano l'area al di sotto
del picco e la posizione, ma non l'altezza.
Uno dei più efficienti metodi di smoothing è il filtro di Savitsky-Golay.
Il segnale viene analizzato a gruppi di punti incentrati ciascuno in $Y_i$, con
$i$ che scorre su tutto l'array. Chiamiamo $y_0$ il punto centrale e $Y_N$ e
$Y_{-N}$ gli estremi. $Y_0$ viene sostituito con un valore calcolato in un modo
spiegato di seguito. Durante questo processo, i valori di $Y_i$ non vengono
sostituiti con $f_i$, bensì si crea un array parallelo che sarà poi quello
definitivo smoothato.
I valori di $Y_i$ si ottengono tramite un fit sui punti della finestra con
un polinomio di grado arbitrario $g$: $P_g(j)$. Il polinomio viene poi
valutato in zero e sostituito al valore di $y_0$.
### Test di ipotesi
Supponiamo di voler dimostrare che una certa variabile casuale $x$ segua una
pdf $f(x)$: questa è detta ipotesi nulla $H_0$. Se $f(x)$ non dipende da alcun
parametro, si parla di ipotesi semplice, altrimenti di dice composta. Oltre
alla ipotesi nulla si possono avere una o più ipotesi alternative $H_1$,
$H_2$...
Consideriamo il semplice caso in cui abbiamo una sola ipotesi alternativa
$H_1$ che proponga a sua volta una pdf. Per valutare l'accordo tra i dati e
un'ipotesi nulla si costruisce una statistica di test $t(x)$, che è una
variabile che dipende da $\vec{x}$ che definisco per determinare se l'ipotesi
nulla sia vera oppure no (vedi $t_{\text{cut}}$ oppure la discrepanza...) e che
segue a sua volta due pdf, una prevista da $H_0$ e una da $H_1$.
\begin{center}
\begin{tikzpicture}
\draw [thick, ->] (0,0) -- (12,0);
\draw [thick, ->] (0,0) -- (0,6);
\node [left] at (0,6) {g(t)};
\node [below] at (12,0) {t};
\draw [thick, dashed] (6,0) -- (6,6);
\node [below] at (6,0) {$t_{\text{cut}}$};
\draw [thick, blue] (0,0) to [out = 20, in = 180] (3,5)
to [out = 0, in = 180] (8,0);
\draw [thick, red] (4,0) to [out = 20, in = 180] (7,3)
to [out = 0, in = 180] (11,0);
\node [blue] at (2.5, 2) {$g(t \, | \, H_0)$};
\node [red] at (8, 1) {$g(t \, | \, H_1)$};
\end{tikzpicture}
\end{center}
Si definisce 'significanza del criterio di test' $\alpha$ (mentre $(1 -
\alpha)$ è il 'livello di confidenza del criterio di test', o 'efficienza'):
$$
\alpha = \int\limits_{t_{\text{cut}}}^{+ \infty} dt \, g(t \, | \, H_0)
$$
mentre $\beta$ è chiamato 'potenza del test' (mentre $(1 - \beta)$ è
detto 'purezza'):
$$
\beta = \int\limits_{-\infty}^{t_{\text{cut}}} dt \, g(t \, | \, H_1)
$$
Si chiamano:
- errore di prima specie: rigezione di $H_0$ qualora questa sia vera (con
relativa probabilità $P_1$);
- errore di seconda specie: accettazion di $H_0$ qualora questa sia falsa
(con relativa probabilità $P_2$);
Per $t < t_{\text{cut}}$ deciso arbitrariamente, imponiamo che l'ipotesi
nulla sia verificata. Ne consgue che $\alpha = P_1$ e $\beta = P_2$.
La scelta migliore di $y_{\text{cut}}$ è quella che dà la massima purezza data
una certa efficienza. Nel caso 1D lo si ottiene automaticamente (vedi esempio),
altrimenti può essere complicato.
Facciamo un esempio in cui applichiamo il lemma di Neyman-Pearson.
Immaginiamo di avere i valori $\vec{x} = (x_1 ... x_N)$ che appartengono ad
una distribuzione normale la cui varianza $\sigma$ è nota e si deve distinguere
tra due valori medi $\mu_0$ e $\mu_1$, cioé:
$$
H_0 = [\mu = \mu_0]
\hspace{100pt}
H_1 = [\mu = \mu_1]
$$
A questo punto le pdf previste da $H_0$ e $H_1$ sono due gaussiane centrate
ciascuna nel proprio valore medio. Secondo il lemma di cui sopra, dobbiamo
calcolare la Likelihood, che è la produttoria su tutte le misure effettuate
$x_i$ della pdf prevista di un'ipotesi calcolata in $x_i$:
$$
L(\vec{x}, \mu, \sigma) = \frac{1}{(\sigma \sqrt{2 \pi})^N} \Pi_{i=1}^N
N(x_i, \nu, \sigma)
$$
dove con $N$ si indica la distribuzione normale. Si tratta, cioè, della
probabilità di avere ottenuto quelle misure secondo l'ipotesi considerata.
Vorremo, quindi, che $L(H_0) >> L(H_1)$. A questo scopo si guarda $r$,
parametro previsto dal lemma, che vale:
$$
r = \frac{(L(\vec{x}) \, | \, H_0)}{(L(\vec{x}) \, | \, H_1)}
\hspace{30pt} \Longrightarrow \hspace{30pt}
\ln{r} = \ln{L(\vec{x}, \mu_0, \sigma)} - \ln{L(\vec{x}, \mu_0, \sigma)}
$$
Che deve essere a sua volta molto grande. La regione in cui si deve accettare
l'ipotesi nulla è infatti quella con $r > c$, dove $c$ deve ancora essere
valutato.
$$
\ln{r} = R(\vec{x}) > \ln{c}
\hspace{30pt} \Longrightarrow \hspace{30pt}
\vec{x} > (\text{oppure} <) \, g(c) = t_{\text{cut}}
$$
Per scegliere $k$, si impone che:
$$
P_1 = \alpha = Pr(\vec{x} > (\text{oppure} <) \, t_{\text{cut}} \,
| \, H_0)
$$
Quindi ciò che può essere scelto arbitrariamente, alla fine dei conti, è
$\alpha$, che solitamente si impone $= 5 \%$.
### Discriminante lineare di Fisher
In che modo si possono definire $f(t \, | \, H_0)$ e $f(t \, | \, H_1)$? Si
possono fare degli *ansatz* riguardo alla forma di $t$. Il modello di Fischer
utilizza una funzione lineare:
$$
t = \sum_{i = 1}^N a_i x_i = \vec{a} \cdot \vec{x}
$$
dove il vettore $\vec{a}$ è da determinare. Definiamo l'insieme dei valori medi
e delle "varianze" delle variabili misurate come segue: $\mu_{k, i}$ è il valore
medio della variabile $i$-esima secondo l'ipotesi $k$-esima:
$$
\mu_{k,i} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N
\, x_i f(\vec{x} \, | \, H_k)
$$
dove $k$ può quindi essere 0 o 1; mentre:
$$
(V_k)_{i,j} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N
\, (x_i - \mu_{k,i})(x_j - \mu_{k,j}) f(\vec{x} \, | \, H_k)
$$
Si può dimostrare che, per funzioni
gaussiane, la migliore statistica di test (ovvero che massimizza $1 - \beta$
per un dato $a$) è quella per cui:
$$
\vec{a} = \frac{1}{w} (\vec{\nu}_0 - \vec{\nu}_1)
\hspace{40pt} \text{con} \hspace{40pt}
W_{i,j} = (V_0 + V_i)_{i,j}
$$
In genere si introduce anche un offset:
$$
t = a_0 + \sum_{i = 1}^N a_i x_i
$$
### Reti neuronali
Si può dimostrare che se si usa il discriminante lineare di Fisher, allora dati
i dati $\vec{x}$, la probabilità che sia giusta $H_0$ è:
$$
P(H_0 | \vec{x}) = frac{1}{1 + e^{-t}}
$$
![Logistic function.](images/logistic.png){width=6cm}
che è la funzione logistica. Se le due pdf $f(\vec{x} | H_0)$ e $f(\vec{x} |
H_1)$ non sono gaussiane, allora il discriminante lineare di Fisher non è più
ottimale e si può generalizzare $t(\vec{x})$ con un caso speciale di Artificial
Neural Network (ANN).
Supponiamo di prendere
$$
t(\vec{x}) = s_0 \left( a_0 \sum_{i = 1}^N a_i x_i \right)
$$
con $s$ detta funzione di attivazione e $a_0$ detta soglia. Siccome la sigmoide
è monotona, questa ANN è equivalente ad un test lineare.

58
misc/pdfs.c Normal file
View File

@ -0,0 +1,58 @@
// This program generates a collection of random numbers which follow the
// distribution whose name is given in input by the user when calling the
// program and outputs an histogram of the result. Maximum and minimum of
// the range must also be given. The random number distribution functions
// are taken from the GSL library at:
// https://www.gnu.org/software/gsl/doc/html/randist.html
#include <stdio.h>
#include <string.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_histogram.h>
int main (int argc, char** argv)
{
// An histogram with 100 bins which range from 0 to 100 is set:
size_t bins = 100;
int low = atoi(argv[2]);
int upp = atoi(argv[3]);
gsl_histogram * histo = gsl_histogram_alloc(bins);
gsl_histogram_set_ranges_uniform(histo, low, upp);
// The random number generator is set:
gsl_rng* gen = gsl_rng_alloc(gsl_rng_taus);
// 100'000 random numbers are generated and put into the histogram:
size_t N = 100000;
for (size_t i = 0; i < N; i++)
{
if (strcmp(argv[1], "binomial") == 0)
gsl_histogram_increment(histo, gsl_ran_binomial(gen, 0.3, 10));
if (strcmp(argv[1], "poisson") == 0)
gsl_histogram_increment(histo, gsl_ran_poisson(gen, 1));
if (strcmp(argv[1], "uniform") == 0)
gsl_histogram_increment(histo, gsl_rng_get(gen)*100);
if (strcmp(argv[1], "gaussian") == 0)
gsl_histogram_increment(histo, gsl_ran_gaussian(gen, 5)+30);
if (strcmp(argv[1], "wigner") == 0)
gsl_histogram_increment(histo, gsl_ran_cauchy(gen, 10)+20);
if (strcmp(argv[1], "landau") == 0)
gsl_histogram_increment(histo, gsl_ran_landau(gen));
if (strcmp(argv[1], "chi2") == 0)
gsl_histogram_increment(histo, gsl_ran_gamma(gen, 5, 2));
if (strcmp(argv[1], "exponential") == 0)
gsl_histogram_increment(histo, gsl_ran_exponential(gen, 5));
if (strcmp(argv[1], "student") == 0)
gsl_histogram_increment(histo, gsl_ran_tdist(gen, 3));
if (strcmp(argv[1], "fischer") == 0)
gsl_histogram_increment(histo, gsl_ran_fdist(gen, 3, 4));
}
// The histogram is given in output:
gsl_histogram_fprintf (stdout, histo, "%g", "%g");
// The memory is freed.
gsl_rng_free(gen);
gsl_histogram_free(histo);
return 0;
}

15
misc/plot Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env python
import matplotlib.pyplot as plt
import numpy as np
import sys
lows, upps, values = np.loadtxt(sys.stdin, unpack=True,)
dim = upps[1] - lows[1]
values = values/np.sum((values)*dim)
plt.bar(lows, align='edge', width=dim, height=values,
color='#dbbf0d', edgecolor='#595856')
plt.annotate("N = 100000\nbins=100", (15, 0.16))
plt.ylabel("pdf(x)")
plt.xlabel("x")
plt.show()

BIN
notes/images/conti-ex6.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 MiB

BIN
notes/images/dip.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
notes/images/gamma-area.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

17
notes/makefile Normal file
View File

@ -0,0 +1,17 @@
sections = $(shell find sections/ -name '*.md' | sort -V)
pandoc = \
@ pandoc $(1) \
--toc \
--standalone \
--pdf-engine=xelatex \
-F pandoc-crossref \
-o $(2).pdf
all: exercises.pdf
clean:
rm exercises.pdf
exercises.pdf: $(sections) images
$(call pandoc, $(sections), exercises)

50
notes/sections/0.md Normal file
View File

@ -0,0 +1,50 @@
---
title: Statistical Analysis
subtitle: Exercises
numbersections: true
documentclass: article
fontsize: 12pt
geometry:
- width=150mm
- top=20mm
- bottom=30mm
mainlang: english
otherlang: russian
header-includes: |
```{=latex}
% start new page with each section
\usepackage{etoolbox}
\usepackage{tikz}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{siunitx}
\pretocmd{\section}{\clearpage}{}{}
% strict figure placement
\usepackage{float}
\floatplacement{figure}{H}
% customer macros
%% "with" in formulas
\DeclareMathOperator*{\with}{%
\hspace{30pt} \text{with} \hspace{30pt}
}
%% "thus" in formulas
\DeclareMathOperator*{\thus}{%
\hspace{30pt} \Longrightarrow \hspace{30pt}
}
%% "et" in formulas
\DeclareMathOperator*{\et}{%
\hspace{30pt} \wedge \hspace{30pt}
}
% configure captions
\usepackage{caption}
\captionsetup{font={footnotesize,it}}
\captionsetup{width=11cm}
\usepackage{stmaryrd}
```
---

226
notes/sections/1.md Normal file
View File

@ -0,0 +1,226 @@
# Exercise 1
## Random numbers following the Landau distribution
The Landau distribution is a probability density function which can be defined
as follows:
$$
f(x) = \int \limits_{0}^{+ \infty} dt \, e^{-t log(t) -xt} \sin (\pi t)
$$
![Landau distribution.](images/landau-small.pdf){width=50%}
The GNU Scientific Library (GSL) provides a number of functions for generating
random variates following tens of probability distributions. Thus, the function
for generating numbers from the Landau distribution, namely `gsl_ran_landau()`,
was used.
For the purpose of visualizing the resulting sample, the data was put into
an histogram and plotted with matplotlib. The result is shown in @fig:landau.
![Example of N points generated with the `gsl_ran_landau()`
function and plotted in a 100-bins histogram ranging from -10 to
80.](images/landau.png){#fig:landau}
## Randomness testing of the generated sample
### Kolmogorov-Smirnov test
In order to compare the sample with the Landau distribution, the
Kolmogorov-Smirnov (KS) test was applied. This test statistically quantifies the
distance between the cumulative distribution function of the Landau distribution
and the one of the sample. The null hypothesis is that the sample was
drawn from the reference distribution.
The KS statistic for a given cumulative distribution function $F(x)$ is:
$$
D_N = \text{sup}_x |F_N(x) - F(x)|
$$
where:
- $x$ runs over the sample,
- $F(x)$ is the Landau cumulative distribution and function
- $F_N(x)$ is the empirical cumulative distribution function of the sample.
If $N$ numbers have been generated, for every point $x$,
$F_N(x)$ is simply given by the number of points preceding the point (itself
included) normalized by $N$, once the sample is sorted in ascending order.
$F(x)$ was computed numerically from the Landau distribution with a maximum
relative error of $10^{-6}$, using the function `gsl_integration_qagiu()`,
found in GSL.
Under the null hypothesis, the distribution of $D_N$ is expected to
asymptotically approach a Kolmogorov distribution:
$$
\sqrt{N}D_N \xrightarrow{N \rightarrow + \infty} K
$$
where $K$ is the Kolmogorov variable, with cumulative
distribution function given by:
$$
P(K \leqslant K_0) = 1 - p = \frac{\sqrt{2 \pi}}{K_0}
\sum_{j = 1}^{+ \infty} e^{-(2j - 1)^2 \pi^2 / 8 K_0^2}
$$
Plugging the observed value $\sqrt{N}D_N$ in $K_0$, the $p$-value can be
computed. At 95% confidence level (which is the probability of confirming the
null hypothesis when correct) the compatibility with the Landau distribution
cannot be disproved if $p > α = 0.05$.
To approximate the series, the convergence was accelerated using the Levin
$u$-transform with the `gsl_sum_levin_utrunc_accel()` function. The algorithm
terminates when the difference between two successive extrapolations reaches a
minimum.
For $N = 1000$, the following results were obtained:
- $D = 0.020$
- $p = 0.79$
Hence, the data was reasonably sampled from a Landau distribution.
**Note**:
Contrary to what one would expect, the $\chi^2$ test on a histogram is not very
useful in this case. For the test to be significant, the data has to be binned
such that at least several points fall in each bin. However, it can be seen
(@fig:Landau) that many bins are empty both in the right and left side of the
distribution, so it would be necessary to fit only the region where the points
cluster or use very large bins in the others, making the $\chi^2$ test
unpractical.
### Parameters comparison
When a sample of points is generated in a given range, different tests can be
applied in order to check whether they follow an even distribution or not. The
idea which lies beneath most of them is to measure how far the parameters of
the distribution are from the ones measured in the sample.
The same principle can be used to verify if the generated sample effectively
follows the Landau distribution. Since it turns out to be a very pathological
PDF, only two parameters can be easily checked: the mode and the
full-width-half-maximum (FWHM).
![Landau distribution with emphatized mode and
FWHM = ($x_+ - x_-$).](images/landau.pdf)
\begin{figure}
\hypertarget{fig:parameters}{%
\begin{tikzpicture}[overlay]
\begin{scope}[shift={(0,0.4)}]
% Mode
\draw [thick, dashed] (7.57,3.1) -- (7.57,8.55);
\draw [thick, dashed] (1.9,8.55) -- (7.57,8.55);
\node [above right] at (7.6,3.1) {$m_e$};
\node [below right] at (1.9,8.55) {$f(m_e)$};
% FWHM
\draw [thick, dashed] (1.9,5.95) -- (9.05,5.95);
\draw [thick, dashed] (6.85,5.83) -- (6.85,3.1);
\draw [thick, dashed] (8.95,5.83) -- (8.95,3.1);
\node [below right] at (1.9,5.95) {$\frac{f(m_e)}{2}$};
\node [above right] at (6.85,3.1) {$x_-$};
\node [above right] at (8.95,3.1) {$x_+$};
\end{scope}
\end{tikzpicture}
}
\end{figure}
The mode of a set of data values is defined as the value that appears most
often, namely: it is the maximum of the PDF. Since there is no way to find
an analytic form for the mode of the Landau PDF, it was numerically estimated
through a minimization method (found in GSL, called method 'golden section')
with an arbitrary error of $10^{-2}$, obtaining:
- expected mode $= m_e = \SI{-0.2227830 \pm 0.0000001}{}$
The minimization algorithm begins with a bounded region known to contain a
minimum. The region is described by a lower bound $x_\text{min}$ and an upper
bound $x_\text{max}$, with an estimate of the location of the minimum $x_e$.
The value of the function at $x_e$ must be less than the value of the function
at the ends of the interval, in order to guarantee that a minimum is contained
somewhere within the interval.
$$
f(x_\text{min}) > f(x_e) < f(x_\text{max})
$$
On each iteration the interval is divided in a golden section (using the ratio
($3 - \sqrt{5}/2 \approx 0.3819660$ and the value of the function at this new
point $x'$ is calculated. If the new point is a better estimate of the minimum,
namely is $f(x') < f(x_e)$, then the current estimate of the minimum is
updated.
The new point allows the size of the bounded interval to be reduced, by choosing
the most compact set of points which satisfies the constraint $f(a) > f(x') <
f(b)$ between $f(x_\text{min})$, $f(x_\text{min})$ and $f(x_e)$. The interval is
reduced until it encloses the true minimum to a desired tolerance.
In the sample, on the other hand, once the data were binned, the mode can be
estimated as the central value of the bin with maximum events and the error
is the half width of the bins. In this case, with 40 bins between -20 and 20,
the following result was obtained:
- observed mode $= m_o = \SI{0 \pm 1}{}$
In order to compare the values $m_e$ and $x_0$, the following compatibility
t-test was applied:
$$
p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
t = \frac{|m_e - m_o|}{\sqrt{\sigma_e^2 + \sigma_o^2}}
$$
where $\sigma_e$ and $\sigma_o$ are the absolute errors of $m_e$ and $m_o$
respectively. At 95% confidence level, the values are compatible if $p > 0.05$.
In this case:
$$
p = 0.82
$$
Thus, the observed value is compatible with the expected one.
---
The same approach was taken as regards the FWHM. It is defined as the distance
between the two points at which the function assumes half times the maximum
value. Even in this case, there is not an analytic expression for it, thus it
was computed numerically ad follow.
First, some definitions must be given:
$$
f_{\text{max}} := f(m_e) \et \text{FWHM} = x_{+} - x_{-} \with
f(x_{\pm}) = \frac{f_{\text{max}}}{2}
$$
then the function $f'(x)$ was minimized using the same minimization method
used for finding $m_e$, dividing the range into $[x_\text{min}, m_e]$ and
$[m_e, x_\text{max}]$ (where $x_\text{min}$ and $x_\text{max}$ are the limits
in which the points have been sampled) in order to be able to find both the
minima of the function:
$$
f'(x) = |f(x) - \frac{f_{\text{max}}}{2}|
$$
resulting in:
$$
\text{expected FWHM} = w_e = \SI{4.0186457 \pm 0.0000001}{}
$$
On the other hand, the observed FWHM was computed as the difference between
the center of the bins with the values closer to $\frac{f_{\text{max}}}{2}$
and the error was taken as twice the width of the bins, obtaining:
$$
\text{observed FWHM} = w_o = \SI{4 \pm 2}{}
$$
This two values turn out to be compatible too, with:
$$
p = 0.99
$$

311
notes/sections/2.md Normal file
View File

@ -0,0 +1,311 @@
# Exercise 2
## Euler-Mascheroni constant
The Euler-Mascheroni constant is defined as the limiting difference between the
partial sums of the harmonic series and the natural logarithm:
$$
\gamma = \lim_{n \rightarrow +\infty} \left( \sum_{k=1}^{n} \frac{1}{k}
- \ln(n) \right)
$$ {#eq:gamma}
and represents the limiting blue area in @fig:gamma. The first 30 digits of
$\gamma$ are:
$$
\gamma = 0.57721\ 56649\ 01532\ 86060\ 65120\ 90082 \dots
$$ {#eq:exact}
In complex analysis, this constant is related to many functions and can be
evaluated through a variety of identities. In this work, five methods were
implemented and their results discussed. In fact, evaluating $\gamma$ with a
limited precision due to floating points number representation entails limited
precision on the estimation of $\gamma$ itself due to roundoff errors. All the
known methods involve sums, subtractions or products of very big or small
numbers, packed in series, partial sums or infinite products. Thus, the
efficiency of the methods lies on how quickly they converge to their limit.
![The area of the blue region converges to the EulerMascheroni
constant.](images/gamma-area.png){#fig:gamma width=7cm}
## Computing the constant
### Definition
First, in order to have a quantitative idea of how hard it is to reach a good
estimation of $\gamma$, it was naively computed using the definition given in
@eq:gamma. The difference was computed for increasing value of $n$, with
$n_{i+1} = 10 \cdot n_i$ and $n_1 = 20$, till the approximation starts getting
worse, namely:
$$
| \gamma(n_{i+1}) - \gamma | > | \gamma(n_i) - \gamma|
$$
and $\gamma (n_i)$ was selected as the result (see @tbl:1_results).
-----------------------------------------------
n sum $|\gamma(n)-\gamma|$
----------- ------------- ---------------------
\SI{2e1}{} \SI{2.48e-02}{}
\SI{2e2}{} \SI{2.50e-03}{}
\SI{2e3}{} \SI{2.50e-04}{}
\SI{2e4}{} \SI{2.50e-05}{}
\SI{2e5}{} \SI{2.50e-06}{}
\SI{2e6}{} \SI{2.50e-07}{}
\SI{2e7}{} \SI{2.50e-08}{}
\SI{2e8}{} \SI{2.50e-09}{}
\SI{2e9}{} \SI{2.55e-10}{}
\SI{2e10}{} \SI{2.42e-11}{}
\SI{2e11}{} \SI{1.44e-08}{}
-----------------------------------------------
Table: Partial results using the definition of $\gamma$ with double
precision. {#tbl:1_results}
The convergence is logarithmic: to fix the first $d$ decimal places about
$10^d$ terms are needed. The double precision runs out at the
10\textsuperscript{th} place, $n=\SI{2e10}{}$.
Since all the number are given with double precision, there can be at best 15
correct digits but only 10 were correctly computed: this means that when the
terms of the series start being smaller than the smallest representable double,
the sum of all the remaining terms give a number $\propto 10^{-11}$.
--------- -----------------------
true: 0.57721\ 56649\ 01533
approx: 0.57721\ 56648\ 77325
diff: 0.00000\ 00000\ 24207
--------- -----------------------
Table: First method results. {#tbl:first}
### Alternative formula
As a first alternative, the constant was computed through the identity which
relates $\gamma$ to the $\Gamma$ function as follow:
$$
\gamma = \lim_{M \rightarrow + \infty} \sum_{k = 1}^{M}
\binom{M}{k} \frac{(-1)^k}{k} \ln(\Gamma(k + 1))
$$
Varying $M$ from 1 to 100, the best result was obtained for $M = 41$ (see
@tbl:second). It went sour: the convergence is worse than using the definition
itself. Only two places were correctly computed.
--------- -----------------------
true: 0.57721\ 56649\ 01533
approx: 0.57225\ 72410\ 34058
diff: 0.00495\ 84238\ 67473
--------- -----------------------
Table: Second method results. {#tbl:second}
Here, the problem lies in the binomial term: computing the factorial of a
number greater than 18 goes over 15 places and so cannot be correctly
represented. Furthermore, the convergence (even if this is not a series
and, consequently, it is not properly a "convergence") is slowed down by
the logarithmic factor.
### Reciprocal $\Gamma$ function
A better result was found using the well known reciprocal $\Gamma$ function
formula:
$$
\frac{1}{\Gamma(z)} = z e^{yz} \prod_{k = 1}^{+ \infty}
\left( 1 + \frac{z}{k} \right) e^{-z/k}
$$
which gives:
$$
\gamma = - \frac{1}{z} \ln \left( z \Gamma(z)
\prod_{k = 1}^{+ \infty} \left( 1 + \frac{z}{k} \right) e^{-z/k} \right)
$$
The execution stops when there is no difference between two consecutive therms
of the infinite product (it happens for $k = 456565794$, meaning that for this
value of $k$, the term of the product is equal to 1). Different values of $z$
were checked, with $z_{i+1} = z_i + 0.01$, ranging from 0 to 20 and the best
result was found for $z = 9$. As can be seen in @tbl:3_results, it's only by
chance, since all $|\gamma(z) - \gamma |$ are of the same order of magnitude.
The best one is compared with the exact value of $\gamma$ in @tbl:third.
---------------------------------------------------------------
z $|\gamma(z) - \gamma |$ z $|\gamma(z) - \gamma |$
----- ------------------------ ------ ------------------------
1 \SI{9.712e-9}{} 8.95 \SI{9.770e-9}{}
3 \SI{9.320e-9}{} 8.96 \SI{9.833e-9}{}
5 \SI{9.239e-9}{} 8.97 \SI{9.622e-9}{}
7 \SI{9.391e-9}{} 8.98 \SI{9.300e-9}{}
9 \SI{8.482e-9}{} 8.99 \SI{9.059e-9}{}
11 \SI{9.185e-9}{} 9.00 \SI{8.482e-9}{}
13 \SI{9.758e-9}{} 9.01 \SI{9.564e-9}{}
15 \SI{9.747e-9}{} 9.02 \SI{9.260e-9}{}
17 \SI{9.971e-9}{} 9.03 \SI{9.264e-9}{}
19 \SI{10.084e-9}{} 9.04 \SI{9.419e-9}{}
---------------------------------------------------------------
Table: Differences between the obtained values of $\gamma$ and the exact
one. {#tbl:3_results}
--------- -----------------------
true: 0.57721\ 56649\ 01533
approx: 0.57721\ 56564\ 18607
diff: 0.00000\ 00084\ 82925
--------- -----------------------
Table: Third method results for z = 9.00. {#tbl:third}
This time, the convergence of the infinite product is fast enough to ensure the
$8^{th}$ place.
### Fastest convergence formula
The fastest known convergence belongs to the following formula:
(source: http://www.numberworld.org/y-cruncher/internals/formulas.html):
$$
\gamma = \frac{A(N)}{B(N)} -\frac{C(N)}{B^2(N)} - \ln(N)
$$ {#eq:faster}
with:
\begin{align*}
&A(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!} \cdot H(k)
\with H(k) = \sum_{j=1}^{k} \frac{1}{j} \\
&B(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!} \\
&C(N) = \frac{1}{4N} \sum_{k=0}^{2N}
\frac{((2k)!)^3}{(k!)^4 \cdot (16k)^2k} \\
\end{align*}
The series $A$ and $B$ are computed till there is no difference between two
consecutive terms.
The number of desired correct decimals is given in input and $N$ is
consequently computed through a formula given in the same article
above-mentioned. Results are shown in @tbl:fourth.
--------- ------------------------------
true: 0.57721\ 56649\ 01532\ 75452
approx: 0.57721\ 56649\ 01532\ 86554
diff: 0.00000\ 00000\ 00000\ 11102
--------- ------------------------------
Table: Fourth method results. {#tbl:fourth}
Due to roundoff errors, the best results is obtained for $N = 10$. Since up to
15 places were correctly computed, an approximation of $\gamma$ better than the
one reached with the definition in @eq:gamma was obtained.
### Arbitrary precision
To overcome the issues related to the double representation, one can resort to a
representation with arbitrary precision. In the GMP library (which stands for
GNU Multiple Precision arithmetic), for example, real numbers can be
approximated by a ratio of two integers (fraction) with arbitrary precision:
this means a check is performed on every operation and in case of an integer
overflow, additional memory is requested and used to represent the larger result.
Additionally, the library automatically switches to the optimal algorithm
to compute an operation based on the size of the operands.
The terms in @eq:faster can therefore be computed with arbitrarily large
precision. Thus, a program that computes the Euler-Mascheroni constant within
a user controllable precision has been implemented. Unlike the previously
mentioned programs, this one was more carefully optimized.
The $A$ and $B$ series are computed up to an arbitrary limit $k_{\text{max}}$.
Different values of $k_{\text{max}}$ were tested but, obviously, they all
eventually reach a point where the approximation cannot guarantee the
requested precision; the solution turned out to be to let $k_{\text{max}}$ depends on
$N$. Consequently, $k_{\text{max}}$ was chosen to be $5N$, after it has been
verified to produce the correct digits up to 500 decimal places.
The GMP library offers functions to perform some operations such as addition,
multiplication, division, etc. however, the logarithm function is not
implemented. Thus, most of the code carries out the $\ln(N)$ computation.
First, it should be noted that the logarithm of only some special numbers can
be computed with arbitrary precision, namely the ones of which a converging
series is known. This forces $N$ to be rewritten in the following way:
$$
N = N_0 \cdot b^e \thus \ln(N) = \ln(N_0) + e \cdot \ln(b)
$$
Since a fast converging series for $\ln(2)$ is known $b = 2$ was chosen. As
well as for the scientific notation, in order to get the mantissa $1 \leqslant
N_0 < 2$, the number of binary digits of $N$ must be computed (conveniently, a
dedicated function `mpz_sizeinbase()` can be found in GMP). If the digits are $n$:
$$
e = b - 1 \thus N_0 = \frac{N}{2^{n - 1}}
$$
Then, by defining:
$$
N_0 = \frac{1 + y}{1 - y} \thus y = \frac{N_0 - 1}{N_0 + 1} < 1
$$
and the following series (which is convergent for $y < 1$) can therefore be
used:
$$
\ln \left( \frac{1 + y}{1 - y} \right) =
2 \sum_{k = 0}^{+ \infty} \frac{y^{2k + 1}}{2k + 1}
$$
But when to stop computing the series?
Given a partial sum $S_k$ of the series, it is possible to know when a digit is
definitely correct, meaning that no matter how large $k$ can be,
it will not affect that decimal place. The key lies in the following concept.
Letting $S$ the value of the series:
$$
S_k + \frac{a_{k+1}}{1 - \frac{a_{k+1}}{a_k}} < S < S_k + a_k \frac{L}{1 -L}
$$
where $L$ is the limiting ratio of the series terms, which must be $< 1$ in
order for it to converge (in this case, it is easy to prove that $L = y^2$).
The width $\Delta S$ of the interval containing $S$, for a given $S_k$, gives
the precision of the partial sum with respect to $S$. By imposing $\Delta S <
1/10^D$ where $D$ is the correct decimal place required, the desired precision
is obtained. This is achieved by trials, checking at every step whether $\Delta
S$ is less or greater than $1/10^D$.
The same holds for $\ln(2)$, which is given by the following series:
$$
\log(2) = \sum_{k=1}^{+ \infty} \frac{1}{k \cdot 2^k}
$$
In this case the ratio is $L = 1/2$.

423
notes/sections/3.md Normal file
View File

@ -0,0 +1,423 @@
# Exercise 3 {#sec:3}
## Meson decay events generation
A number of $N = 50'000$ points on the unit sphere, each representing a
particle detection event, must be generated according to then angular
probability distribution function $F_0$:
\begin{align*}
F_0 (\theta, \phi) = \frac{3}{4 \pi} \Bigg[
\frac{1}{2} (1 - \alpha_0) + \frac{1}{2} (3\alpha_0 - 1) \cos^2(\theta)& \\
- \beta_0 \sin^2(\theta) \cos(2 \phi)
- \gamma_0 \sqrt{2} \sin(2 \theta) \cos(\phi)& \Bigg]
\end{align*}
where $\theta$ and $\phi$ are, respectively, the polar and azimuthal angles, and
$$
\alpha_0 = 0.65 \et \beta_0 = 0.06 \et \gamma_0 = -0.18
$$
To generate the points a *try and catch* method was employed:
1. generate a point $(\theta , \phi)$ uniformly on a unit sphere
2. generate a third value $Y$ uniformly in $[0, 1]$
3. if @eq:requirement is satisfied save the point
4. repeat from 1.
$$
Y < F_0(\theta, \phi)
$$ {#eq:requirement}
To increase the efficiency of the procedure, the $Y$ values were actually
generated in $[0, 0.2]$, since $\max F_0 = 0.179$ for the given parameters
(an other option would have been generating the numbers in the range $[0 ; 1]$,
but all the points with $Y_{\theta, \phi} > 0.179$ would have been rejected with
the same probability, namely 1).
While $\phi$ can simply be generated uniformly between 0 and $2 \pi$, for
$\theta$ one has to be more careful: by generating evenly distributed numbers
between 0 and $\pi$, the points turn out to cluster on the poles. The point
$(\theta,\phi)$ is *not* uniform on the sphere but is uniform on a cylindrical
projection of it. (The difference can be appreciated in @fig:compare).
The proper distribution of $\theta$ is obtained by applying a transform
to a variable $x$ with uniform distribution in $[0, 1)$,
easily generated by the GSL function `gsl_rng_uniform()`.
<div id="fig:compare">
![Uniformly distributed points with $\theta$ evenly distributed between
0 and $\pi$.](images/histo-i-u.pdf){width=45%}
![Points uniformly distributed on a spherical
surface.](images/histo-p-u.pdf){width=45%}
![Sample generated according to $F$ with $\theta$ evenly distributed between
0 and $\pi$.](images/histo-i-F.pdf){width=45%}
![Sample generated according to $F$ with $\theta$ properly
distributed.](images/histo-p-F.pdf){width=45%}
Examples of samples. On the left, points with $\theta$ evenly distributed
between 0 and $\pi$; on the right, points with $\theta$ properly distributed.
</div>
The transformation can be found by imposing the angular PDF to be a constant:
\begin{align*}
\frac{d^2 P}{d \Omega^2} = \text{const} = \frac{1}{4 \pi}
\hspace{20pt} &\Longrightarrow \hspace{20pt}
d^2P = \frac{1}{4 \pi} \, d \Omega^2 = \frac{1}{4 \pi} \,
d \phi \sin(\theta) d \theta
\\
&\Longrightarrow \hspace{20pt}
\frac{dP}{d \theta} = \int_0^{2 \pi} d \phi \, \frac{1}{4 \pi} \sin(\theta)=
\frac{1}{4 \pi} \sin(\theta) 2 \pi = \frac{1}{2} \sin(\theta)
\end{align*}
\begin{align*}
\theta = \theta (x)
\hspace{20pt} &\Longrightarrow \hspace{20pt}
\frac{dP}{d \theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d \theta} \right|
= \left. \frac{dP}{dx} \middle/ \, \left| \frac{d \theta}{dx} \right| \right.
\\
&\Longrightarrow \hspace{20pt}
\frac{1}{2} \sin(\theta) = \left. 1 \middle/ \, \left|
\frac{d \theta}{dx} \right| \right.
\end{align*}
Since $\theta \in [0, \pi]$, then the absolute value symbol can be omitted:
\begin{align*}
\frac{d \theta}{dx} = \frac{2}{\sin(\theta)}
\hspace{20pt} &\Longrightarrow \hspace{20pt}
d \theta \sin(\theta) = 2 dx
\\
&\Longrightarrow \hspace{20pt}
- \cos (\theta') |_{0}^{\theta} = 2[x(\theta) - x(0)] = 2(x - 0) = 2x
\\
&\Longrightarrow \hspace{20pt}
- \cos(\theta) + 1 = 2x
\\
&\Longrightarrow \hspace{20pt}
\theta = \text{acos} (1 -2x)
\end{align*}
Since 1 is not in $[0 , 1)$, $\theta = \pi$ can't be generated. Being it just
a single point, the effect of this omission is negligible.
\vspace{30pt}
## Parameter estimation
The sample must now be used to estimate the parameters $\alpha_0$,
$\beta_0$ and $\gamma_0$ of the angular distribution $F_0$.
### Maximum Likelihood method
The Maximum Likelihood method (MLM) is based on the observation that the best
estimate {$\bar{\alpha}$, $\bar{\beta}$, $\bar{\gamma}$} of the parameters
{$\alpha$, $\beta$, $\gamma$} should maximize the Likelihood function $L$
defined in @eq:Like0, where the index $i$ runs over the sample and $F$ is the
function $F_0$ with free parameters $\alpha$, $\beta$ and $\gamma$:
$$
L = \prod_i F(\theta_i, \phi_i) = \prod_i F_i
$$ {#eq:Like0}
The reason is that the points were generated according to the probability
distribution function $F$ with parameters $\alpha_0$, $\beta_0$ and $\gamma_0$,
thus the probability $P$ of sampling this special sample is, by definition, the
product of their probabilities $F_i$ and it must be maximum for {$\alpha$,
$\beta$, $\gamma$} = {$\alpha_0$, $\beta_0$, $\gamma_0$}. Thus, by viewing $F_i$
as a function of the parameters, by maximizing $P = L$ with respect
to $\alpha$, $\beta$ and $\gamma$, a good estimate should be found.
Instead of actually maximising $L$, the function $-\ln(L)$
was minimised, as minimisation methods as usually described in literature
and $\ln$ since it simplifies the math:
$$
L = \prod_i F_i \thus - \ln(L) = - \sum_{i} \ln(F_i)
$$ {#eq:Like}
The problem of multidimensional minimisation was tackled using the GSL library
`gsl_multimin.h`, which implements a variety of methods, including the
conjugate gradient Fletcher-Reeves algorithm, used in the solution, which requires
the implementation of $-\ln(L)$ and its gradient.
To minimise a function $f$, given an initial guess point $x_0$, the gradient
$\nabla f$ is used to find the initial direction $v_0$ (the steepest descent) along
which a line minimisation is performed: the minimum occurs where the
directional derivative along $v_0$ is zero:
$$
\frac{\partial f}{\partial v_0} = \nabla f \cdot v_0 = 0
$$
or, equivalently, at the point where the gradient is orthogonal to $v_0$.
After this first step the following procedure is iterated:
1. Find the steepest descent $v_n$.
2. Compute the *conjugate* direction: $w_n = v_n + \beta_n w_{n-1}$
3. Perform a line minimisation along $w_n$
4. Update the position $x_{n+1} = x_n + \alpha_n w_n$
where $\alpha_n$ is line minimum and $\beta_n$ is given by the Fletcher-Reeves
formula:
$$
\beta = \frac{\|\nabla f(x_n)\|^2}{\|\nabla f(x_{n-1})\|^2}
$$
The accuracy of each line minimisation is controlled the parameter `tol`,
meaning:
$$
w \cdot \nabla f < \text{tol} \, \|w\| \, \|\nabla f\|
$$
The minimisation is quite unstable and this forced the starting point to be
taken very close to the solution, namely:
$$
\alpha_{sp} = 0.79 \et
\beta_{sp} = 0.02 \et
\gamma_{sp} = -0.17
$$
The overall minimisation ends when the gradient module is smaller than $10^{-4}$.
The program took 25 of the above iterations to reach the result shown in @eq:Like_res.
The Cramér-Rao bound states that the covariance matrix of parameters estimated
by MLM is greater than the of the inverse of the Hessian matrix of $-\log(L)$
at the minimum. Thus, the Hessian matrix was computed
analytically and inverted by a Cholesky decomposition, which states that
every positive definite symmetric square matrix $H$ can be written as the
product of a lower triangular matrix $L$ and its transpose $L^T$:
$$
H = L \cdot L^T
$$
The Hessian matrix is, indeed, both symmetric and positive definite, when
computed in a minimum. Once decomposed the inverse is given by
$$
H^{-1} = (L \cdot L^T)^{-1} = (L^{-1})^T \cdot L^{-1}
$$
The inverse of $L$ is easily computed since it's a triangular matrix.
The GSL library provides two functions `gsl_linalg_cholesky_decomp()` and
`gsl_linalg_cholesky_invert()` to decompose and invert in-place a matrix.
The result is shown below:
$$
\bar{\alpha_L} = 0.6541 \et
\bar{\beta_L} = 0.06125 \et
\bar{\gamma_L} = -0.1763
$$ {#eq:Like_res}
$$
\text{cov}_L = \begin{pmatrix}
9.225 \cdot 10^{-6} & -1.785 \cdot 10^{-6} & 2.41 \cdot 10^{-7} \\
-1.785 \cdot 10^{-6} & 4.413 \cdot 10^{-6} & 1.58 \cdot 10^{-6} \\
2.41 \cdot 10^{-7} & 1.58 \cdot 10^{-6} & 2.602 \cdot 10^{-6}
\end{pmatrix}
$$ {#eq:Like_cov}
Hence:
\begin{align*}
\alpha_L &= 0.654 \pm 0.003 \\
\beta_L &= 0.0613 \pm 0.0021 \\
\gamma_L &= -0.1763 \pm 0.0016
\end{align*}
### Least squares estimation
Another method that can be used to estimate {$\alpha$, $\beta$, $\gamma$} are
the least squares (LSQ), which is a multidimensional minimisation
specialised to the case of sum of squared functions called residuals.
The residuals can be anything in general but are usually interpreted as the
difference between an expected (fit) and an observed value. The least squares
then correspond to the expected values that best fit the observations.
To apply the LSQ method, the data must be properly binned, meaning that each
bin should contain a significant number of events (say greater than four or
five): in this case, 30 bins for $\theta$ and 60 for $\phi$ turned out
to be satisfactory. The expected values were given as the product of $N$, $F$
computed in the geometric center of the bin and the solid angle enclosed by the
bin itself, namely:
$$
E_{\theta, \phi} = N F(\theta, \phi) \, \Delta \theta \, \Delta \phi \,
\sin{\theta}
$$
Once the data are binned, the number of events in each bin plays the role of the
observed value, while the expected one is given by the probability of finding
an event in that bin multiplied by the total number of points, $N$. Then, the
$\chi^2$, defined as follow, must be minimized with respect to the three
parameters to be estimated:
$$
\chi^2 = \sum_i f_i^2 = \|\vec f\|^2 \with f_i = \frac{O_i - E_i}{\sqrt{E_i}}
$$
where the sum runs over the bins, $O_i$ are the observed values and $E_i$ the
expected ones. Besides the more generic `gsl_multimin.h` library, GSL provides a
special one for least square minimisation, called `gsl_multifit_nlinear.h`. A
trust region method was used to minimize the $\chi^2$.
In such methods, the $\chi^2$, its gradient and its Hessian matrix are computed
in a series of points in the parameters space till the gradient and the matrix
reach given proper values, meaning that the minimum has been well approached,
where "well" is related to that values.
If {$x_1 \dots x_p$} are the parameters which must be estimated, first $\chi^2$
is computed in the starting point $\vec{x_k}$, then its value in a point
$\vec{x}_k + \vec{\delta}$ is modelled by a function $m_k (\vec{\delta})$ which
is the second order Taylor expansion around the point $\vec{x}_k$, namely:
$$
\chi^2 (\vec{x}_k + \vec{\delta}) \sim m_k (\vec{\delta}) =
\chi^2 (\vec{x}_k) + \nabla_k^T \vec{\delta} + \frac{1}{2}
\vec{\delta}^T H_k \vec{\delta}
$$
where $\nabla_k$ and $H_k$ are the gradient and the Hessian matrix of $\chi^2$
in the point $\vec{x}_k$ and the superscript $T$ stands for the transpose. The
initial problem is reduced the so called trust-region subproblem (TRS), which
is the minimisation of $m_k(\vec{\delta})$ in a region where
$m_k(\vec{\delta})$ should be a good approximation of $\chi^2 (\vec{x}_k
+ \vec{\delta})$, given by the condition:
$$
\|D_k \vec\delta\| < \Delta_k
$$
If $D_k = I$ the region is a hypersphere of radius $\Delta_k$
centered at $\vec{x}_k$ but can be deformed according to $D_k$. This is
necessary in the case one or more parameters have scale very different scales.
Given an initial point $x_0$, radius $\Delta_0$, scale matrix $D_0$
and step $\vec\delta_0$ the LSQ algorithm consist in the following iteration:
1. Construct the function $m_k(\vec\delta)$.
2. Solve the TRS for step $\vec\delta_k$.
3. Check whether the solution actually decreases $\chi^2$
1. If positive increase the trust region radius $\Delta_{k+1} =
\alpha\Delta_k$ and shift the position $\vec x_{k+1} = \vec x_k +
\vec \delta_k$.
2. If negative decrease the radius $\Delta_{k+1} = \Delta_k/\beta$.
4. Repeat
This method is advantageous compared to a general minimisation because
the TRS usually amounts to solving a linear system. There are many algorithms
to solve the problem, in this case the Levenberg-Marquardt was used. It is based
on a theorem that proves the existence of a number $\mu_k$ such that
\begin{align*}
\Delta_k \mu_k = \|D_k \vec\delta_k\| &&
(H_k + \mu_k D_k^TD_k) \vec\delta_k = -\nabla_k
\end{align*}
Using the approximation[^1] $H\approx J^TJ$, obtained by computing the Hessian
of the first-order Taylor expansion of $\chi^2$, $\vec\delta_k$ can
be found by solving the system
$$
\begin{cases}
J_k \vec{\delta_k} + \vec{f_k} = 0 \\
\sqrt{\mu_k} D_k \vec{\delta_k} = 0
\end{cases}
$$
[^1]: Here $J_{ij} = \partial f_i/\partial x_j$ is the Jacobian matrix of the
vector-valued function $\vec f(\vec x)$.
The algorithm terminates if on of the following condition are satisfied:
1. $|\delta_i| \leq \text{xtol} (|x_i| + \text{xtol})$ for every component
of $\vec \delta$.
2. $\max_i |\nabla_i \cdot \max(x_i, 1)| \leq \text{gtol}\cdot \max(\chi^2, 1)$
3. $\|\vec f(\vec x+ \vec\delta) - \vec f(\vec x)|| \leq \text{ftol}
\cdot \max(\|\vec f(\vec x)\|, 1)$
These tolerance have all been set to \SI{1e-8}{}. The program converged in 7
iterations giving the results below. The covariance of the parameters can again
been estimated through the Hessian matrix at the minimum. The following results
were obtained:
$$
\bar{\alpha_{\chi}} = 0.6537 \et
\bar{\beta_{\chi}} = 0.05972 \et
\bar{\gamma_{\chi}} = -0.1736
$$ {#eq:lsq-res}
$$
\text{cov}_{\chi} = \begin{pmatrix}
9.244 \cdot 10^{-6} & -1.66 \cdot 10^{-6} & 2.383 \cdot 10^{-7} \\
-1.66 \cdot 10^{-6} & 4.495 \cdot 10^{-6} & 1.505 \cdot 10^{-6} \\
2.383 \cdot 10^{-7} & 1.505 \cdot 10^{-6} & 2.681 \cdot 10^{-6}
\end{pmatrix}
$$
Hence:
\begin{align*}
&\alpha_{\chi} = 0.654 \pm 0.003 \\
&\beta_{\chi} = 0.0597 \pm 0.0021 \\
&\gamma_{\chi} = -0.1736 \pm 0.0016
\end{align*}
### Results compatibility
In order to compare the values $x_L$ and $x_{\chi}$ obtained from both methods
with the correct ones (namely {$\alpha_0$, $\beta_0$, $\gamma_0$}), the
following compatibility t-test was applied:
$$
p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
t = \frac{x_i - x_0}{\sqrt{\sigma_{x_i}^2 + \sigma_{x_0}^2}}
= \frac{x_i - x_0}{\sigma_{x_i}}
$$
where $i$ stands either for the MLM or LSQ parameters and $\sigma_{x_i}$ is the
uncertainty of the value $x_i$. At 95% confidence level, the values are
compatible if $p > 0.05$.
Likelihood results:
----------------------------
par $p$-value
------------ ---------------
$\alpha_L$ 0.18
$\beta_L$ 0.55
$\gamma_L$ 0.023
----------------------------
Table: Likelihood results compatibility.
$\chi^2$ results:
---------------------------------
par $p$-value
----------------- ---------------
$\alpha_{\chi}$ 0.22
$\beta_{\chi}$ 0.89
$\gamma_{\chi}$ 0.0001
---------------------------------
Table: $\chi^2$ results compatibility.
It can be concluded that only the third parameter, $\gamma$ is not compatible
with the expected one in both cases. An in-depth analysis of the algebraic
arrangement of $F$ would be required in order to justify this outcome.
\vspace{30pt}
## Isotropic hypothesis testing
What if the probability distribution function was isotropic? Could it be
compatible with the found results? If $F$ was isotropic, then $\alpha_I$,
$\beta_I$ and $\gamma_I$ would be $1/3$, 0, and 0 respectively, since this
gives $F_I = 1/{4 \pi}$. The t-test gives a $p$-value approximately zero for all
the three parameters, meaning that there is no compatibility at all with this
hypothesis.

269
notes/sections/4.md Normal file
View File

@ -0,0 +1,269 @@
# Exercise 4
**Kinematic dip pdf derivation**
Consider a great number of non-interacting particles, each of which with a
random momentum $\vec{p}$ with module between 0 and $p_{\text{max}}$ randomly
angled with respect to a coordinate system {$\hat{x}$, $\hat{y}$, $\hat{z}$}.
Once the polar angle $\theta$ is defined, the momentum vertical and horizontal
components of a particle, which will be referred as $p_v$ and $p_h$, are the
ones shown in @fig:components.
If $\theta$ is evenly distributed on the sphere and the same holds for the
module $p$, which distribution will the average value of the absolute value of
$p_v$ follow as a function of $p_h$?
\begin{figure}
\hypertarget{fig:components}{%
\centering
\begin{tikzpicture}
% Axes
\draw [thick, ->] (5,2) -- (5,8);
\draw [thick, ->] (5,2) -- (2,1);
\draw [thick, ->] (5,2) -- (8,1);
\node at (1.5,0.9) {$x$};
\node at (8.5,0.9) {$y$};
\node at (5,8.4) {$z$};
% Momentum
\definecolor{cyclamen}{RGB}{146, 24, 43}
\draw [ultra thick, ->, cyclamen] (5,2) -- (3.8,6);
\draw [thick, dashed, cyclamen] (3.8,0.8) -- (3.8,6);
\draw [thick, dashed, cyclamen] (5,7.2) -- (3.8,6);
\draw [ultra thick, ->, pink] (5,2) -- (5,7.2);
\draw [ultra thick, ->, pink] (5,2) -- (3.8,0.8);
\node at (4.8,1.1) {$\vec{p_h}$};
\node at (5.5,6.6) {$\vec{p_v}$};
\node at (3.3,5.5) {$\vec{p}$};
% Angle
\draw [thick, cyclamen] (4.4,4) to [out=80,in=210] (5,5);
\node at (4.7,4.2) {$\theta$};
\end{tikzpicture}
\caption{Momentum components.}\label{fig:components}
}
\end{figure}
The aim is to compute $\langle |p_v| \rangle (p_h) dp_h$.
Consider all the points with $p_h \in [p_h ; p_h - dp_h]$: the values of
$p_v$ that these points can assume depend on $\theta$ and the total momentum
length $p$.
$$
\begin{cases}
p_h = p \sin{\theta} \\ p_v = p \cos{\theta}
\end{cases}
\thus |p_v| = p |\cos{\theta}| = p_h \frac{|\cos{\theta}|}{\sin{\theta}}
= |p_v| (\theta)
$$
It looks like the dependence on $p$ has disappeared, but obviously it has
not. In fact, it lies beneath the limits that one has to put on the possible
values of $\theta$. For the sake of clarity, take a look at @fig:sphere (the
system is rotation-invariant, hence it can be drown at a fixed azimuthal angle).
\begin{figure}
\hypertarget{fig:sphere}{%
\centering
\begin{tikzpicture}
% p_h slice
\definecolor{cyclamen}{RGB}{146, 24, 43}
\filldraw [cyclamen!15!white] (1.5,-3.15) -- (1.5,3.15) -- (1.75,3.05)
-- (2,2.85) -- (2,-2.85) -- (1.75,-3.05)
-- (1.5,-3.15);
\draw [cyclamen] (1.5,-3.15) -- (1.5,3.15);
\draw [cyclamen] (2,-2.9) -- (2,2.9);
\node [cyclamen, left] at (1.5,-0.3) {$p_h$};
\node [cyclamen, right] at (2,-0.3) {$p_h + dp_h$};
% Axes
\draw [thick, ->] (0,-4) -- (0,4);
\draw [thick, ->] (0,0) -- (4,0);
\node at (0.3,3.8) {$z$};
\node at (4,0.3) {$hd$};
% p_max semicircle
\draw [thick, cyclamen] (0,-3.5) to [out=0, in=-90] (3.5,0);
\draw [thick, cyclamen] (0,3.5) to [out=0, in=90] (3.5,0);
\node [cyclamen, left] at (-0.2,3.5) {$p_{\text{max}}$};
\node [cyclamen, left] at (-0.2,-3.5) {$-p_{\text{max}}$};
% Angles
\draw [thick, cyclamen, ->] (0,1.5) to [out=0, in=140] (0.55,1.2);
\node [cyclamen] at (0.4,2) {$\theta$};
\draw [thick, cyclamen] (0,0) -- (1.5,3.15);
\node [cyclamen, above right] at (1.5,3.15) {$\theta_x$};
\draw [thick, cyclamen] (0,0) -- (1.5,-3.15);
\node [cyclamen, below right] at (1.5,-3.15) {$\theta_y$};
% Vectors
\draw [ultra thick, cyclamen, ->] (0,0) -- (1.7,2.2);
\draw [ultra thick, cyclamen, ->] (0,0) -- (1.9,0.6);
\draw [ultra thick, cyclamen, ->] (0,0) -- (1.6,-2);
\end{tikzpicture}
\caption{Momentum space at fixed azimuthal angle ("$hd$" stands for
"horizontal direction"). Some vectors with $p_h \in [p_h, p_h +dp_h]$
are evidenced.}\label{fig:sphere}
}
\end{figure}
As can be seen, $\theta_x$ and $\theta_y$ are the minimum and maximum tilts
angles of these vectors respectively, because if a point had $p_h \in [p_h; p_h
+ dp_h]$ and $\theta < \theta_x$ or $\theta > \theta_y$, it would have $p >
p_{\text{max}}$. Therefore their values are easily computed as follow:
$$
p_h = p_{\text{max}} \sin{\theta_x} = p_{\text{max}} \sin{\theta_y}
\thus \sin{\theta_x} = \sin{\theta_y} = \frac{p_h}{p_{\text{max}}}
$$
Since the average value of a quantity is computed by integrating it over all the
possible quantities it depends on weighted on their probability, one gets:
$$
\langle |p_v| \rangle (p_h) dp_h = \int_{\theta_x}^{\theta_y}
d\theta P(\theta) \cdot P(p) \, dp \cdot |p_v| (\theta)
$$
where $d\theta P(\theta)$ is the probability of generating a point with $\theta
\in [\theta; \theta + d\theta]$ and $P(p) \, dp$ is the probability of
generating a point with $\vec{p}$ in the pink region in @fig:sphere, given a
fixed $\theta$.
The easiest to deduce is $P(p) \, dp$: since $p$ is evenly distributed, it
follows that:
$$
P(p) \, dp = \frac{1}{p_{\text{max}}} dp
$$
with:
$$
dp = p(p_h + dp_h) - p(p_h)
= \frac{p_h + dp_h}{\sin{\theta}} - \frac{p_h}{\sin{\theta}}
= \frac{dp_h}{\sin{\theta}}
$$
hence
$$
P(p) \, dp = \frac{1}{p_{\text{max}}} \cdot \frac{1}{\sin{\theta}} \, dp_h
$$
For $d\theta P(\theta)$, instead, one has to do the same considerations done
in @sec:3, from which:
$$
P(\theta) d\theta = \frac{1}{2} \sin{\theta} d\theta
$$
Ultimately, having found all the pieces, the integral must be computed:
\begin{align*}
\langle |p_v| \rangle (p_h) dp_h &= \int_{\theta_x}^{\theta_y}
d\theta \frac{1}{2} \sin{\theta} \cdot
\frac{1}{p_{\text{max}}} \frac{1}{\sin{\theta}} \, dp_h \cdot
p_h \frac{|\cos{\theta}|}{\sin{\theta}}
\\
&= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \int_{\theta_x}^{\theta_y}
d\theta \frac{|\cos{\theta}|}{\sin{\theta}}
\\
&= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \cdot \mathscr{O}
\end{align*}
Then, with a bit of math:
\begin{align*}
\mathscr{O} &= \int_{\theta_x}^{\theta_y} d\theta
\frac{|\cos{\theta}|}{\sin{\theta}}
\\
&= \int_{\theta_x}^{\frac{\pi}{2}} d\theta \frac{\cos{\theta}}{\sin{\theta}}
- \int_{\frac{\pi}{2}}^{\theta_y} d\theta \frac{\cos{\theta}}{\sin{\theta}}
\\
&= \left[ \ln{(\sin{\theta})} \right]_{\theta_x}^{\frac{\pi}{2}}
- \left[ \ln{(\sin{\theta})} \right]_{\frac{\pi}{2}}^{\theta_y}
\\
&= \ln{(1)} -\ln{ \left( \frac{p_h}{p_{\text{max}}} \right) }
- \ln{ \left( \frac{p_h}{p_{\text{max}}} \right) } + \ln{(1)}
\\
&= 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
\end{align*}
\newpage
Hence, in conclusion:
\begin{align*}
\langle |p_v| \rangle (p_h) dp_h &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}}
\cdot 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
\\
&= \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) }
\frac{p_h}{p_{\text{max}}} dp_h
\end{align*}
Namely:
\begin{figure}
\hypertarget{fig:plot}{%
\centering
\begin{tikzpicture}
\definecolor{cyclamen}{RGB}{146, 24, 43}
% Axis
\draw [thick, <->] (0,5) -- (0,0) -- (11,0);
\node [below right] at (11,0) {$p_h$};
\node [above left] at (0,5) {$\langle |p_v| \rangle$};
% Plot
\draw [domain=0.001:10, smooth, variable=\x,
cyclamen, ultra thick] plot ({\x},{12*ln(10/\x)*\x/10});
\node [cyclamen, below] at (10,0) {$p_{\text{max}}$};
\end{tikzpicture}
\caption{Plot of the expected distribution.}\label{fig:plot}
}
\end{figure}
**The code**
The same distribution should be found by generating and binning points in a
proper way.
A number of $N = 50'000$ points were generated as a couple of values ($p$,
$\theta$), with $p$ evenly distrinuted between 0 and $p_{\text{max}}$ and
$\theta$ given by the same procedure described in @sec:3, namely:
$$
\theta = \text{acos}(1 - 2x)
$$
with $x$ uniformely distributed between 0 and 1.
The data binning turned out to be a bit challenging. Once $p$ was sorted and
$p_h$ was computed, the bin in which the latter goes in must be found. If $n$ is
the number of bins in which the range [0, $p_{\text{max}}$] is willing to be
divided into, then the width $w$ of each bin is given by:
$$
w = \frac{p_{\text{max}}}{n}
$$
and the $j^{th}$ bin in which $p_h$ goes in is:
$$
j = \text{floor} \left( \frac{p_h}{w} \right)
$$
where 'floor' is the function which gives the upper integer lesser than its
argument and the bins are counted starting from zero.
Then, a vector in which the $j^{\text{th}}$ entry contains the sum $S_j$ of all
the $|p_v|$s relative to each $p_h$ fallen into the $j^{\text{th}}$ bin and the
number num$_j$ of entries in the $j^{\text{th}}$ bin was reiteratively updated.
At the end, the average value of $|p_v|_j$ was computed as $S_j /
\text{num}_j$.
For the sake of clarity, for each sorted couple, it works like this:
- the couple $[p; \theta]$ is generated;
- $p_h$ and $p_v$ are computed;
- the $j^{\text{th}}$ bin in which $p_h$ goes in is computed;
- num$_j$ is increased by 1;
- $S_j$ (which is zero at the beginning of everything) is increased by a factor
$|p_v|$.
At the end, $\langle |p_h| \rangle_j$ = $\langle |p_h| \rangle (p_h)$ where:
$$
p_h = j \cdot w + \frac{w}{2} = w \left( 1 + \frac{1}{2} \right)
$$
The following result was obtained:
![Histogram of the obtained distribution.](images/dip.pdf)

113
notes/sections/5.md Normal file
View File

@ -0,0 +1,113 @@
# Exercize 5
**Numerically compute an integral value via Monte Carlo approaches**
The integral to be evaluated is the following:
$$
I = \int\limits_0^1 dx \, e(x)
$$
whose exact value is 1.7182818285...
The three most popular MC methods where applied: plain Monte Carlo, Miser and
Vegas (besides this popularity fact, these three method were chosen for being
the only ones implemented in the GSL library).
## Plain Monte Carlo
When the integral $I$ over a volume $V$ of a function $f$ must be evaluated in
a $n-$dimensional space, the simplest MC method approach is to sample $N$
points $x_i$ evenly distributed in $V$ and approx $I$ as:
$$
I \sim I_N = \frac{V}{N} \sum_{i=1}^N f(x_i) = V \cdot \langle f \rangle
$$
with $I_N \rightarrow I$ for $N \rightarrow + \infty$ for the law of large
numbers. Hence, the variance can be merely extimated as:
$$
\sigma^2 =
\frac{1}{N-1} \sum_{i = 1}^N \left( f(x_i) - \langle f \rangle \right)^2
$$
thus, the error on $I_N$ decreases as $1/\sqrt{N}$. Unlike in deterministic
methods, the estimate of the error is not a strict error bound: random sampling
may not uncover all the important features of the integrand that can result in
an underestimate of the error.
Since the proximity of $I_N$ to $I$ is related to $N$, the accuracy of the
method is determined by the number of function calls when implemented (proof
in @tbl:MC).
-----------------------------------------------------------------
500'000 calls 5'000'000 calls 50'000'000 calls
--------- ----------------- ------------------ ------------------
result 1.7166435813 1.7181231109 1.7183387184
$\sigma$ 0.0006955691 0.0002200309 0.0000695809
-----------------------------------------------------------------
Table: MC results and errors with different numbers of function
calls. {#tbl:MC}
## Miser
The MISER algorithm is based on recursive stratified sampling.
On each recursion step the integral and the error are estimated using a plain
Monte Carlo algorithm. If the error estimate is larger than the required
accuracy, the integration volume is divided into sub-volumes and the procedure
is recursively applied to sub-volumes.
This technique aims to reduce the overall integration error by concentrating
integration points in the regions of highest variance.
The idea of stratified sampling begins with the observation that for two
disjoint regions $a$ and $b$ with Monte Carlo estimates of the integral
$E_a (f)$ and $E_b (f)$ and variances $\sigma_a^2 (f)$ and $\sigma_b^2 (f)$,
the variance $V (f)$ of the combined estimate $E (f)$:
$$
E (f)= \frac {1}{2} \left( E_a (f) + E_b (f) \right)
$$
is given by,
$$
V(f) = \frac{\sigma_a^2 (f)}{4 N_a} + \frac{\sigma_b^2 (f)}{4 N_b}
$$
It can be shown that this variance is minimized by distributing the points such that,
$$
\frac{N_a}{N_a + N_b} = \frac{\sigma_a}{\sigma_a + \sigma_b}
\cdot \frac{N_a}{N_a + N_b}
= \frac{\sigma_a}{\sigma_a + \sigma b}
$$
Hence the smallest error estimate is obtained by allocating sample points in proportion
to the standard deviation of the function in each sub-region.
---
---------------------------------------------------------
calls plain MC Miser Vegas
------------ -------------- -------------- --------------
500'000 1.7166435813 1.7182850738 1.7182818354
5'000'000 1.7181231109 1.7182819143 1.7182818289
50'000'000 1.7183387184 1.7182818221 1.7182818285
---------------------------------------------------------
Table: Results of the three methods. {#tbl:results}
---------------------------------------------------------
calls plain MC Miser Vegas
------------ -------------- -------------- --------------
500'000 0.0006955691 0.0000021829 0.0000000137
5'000'000 0.0002200309 0.0000001024 0.0000000004
50'000'000 0.0000695809 0.0000000049 0.0000000000
---------------------------------------------------------
Table: $\sigma$s of the three methods. {#tbl:sigmas}

85
notes/sections/6.md Normal file
View File

@ -0,0 +1,85 @@
# Exercise 6
**Generating points according to Fraunhofer diffraction**
The diffraction of a plane wave thorough a round slit must be simulated by
generating $N =$ 50'000 points according to the intensity distribution
$I(\theta)$ on a screen at a great distance $L$ from the slit iself:
$$
I(\theta) = \frac{E^2}{2} \left( \frac{2 \pi a^2 \cos{\theta}}{L}
\frac{J_1(x)}{x} \right)^2 \with x = k a \sin{\theta}
$$
where:
- $E$ is the electric field amplitude, default set $E = \SI{1e4}{V/m}$;
- $a$ is the radius of the slit aperture, default set $a = \SI{0.01}{m}$;
- $\theta$ is the angle specified in @fig:fenditure;
- $J_1$ is the Bessel function of first order;
- $k$ is the wavenumber, default set $k = \SI{1e-4}{m^{-1}}$;
- $L$ default set $L = \SI{1}{m}$.
\begin{figure}
\hypertarget{fig:fenditure}{%
\centering
\begin{tikzpicture}
\definecolor{cyclamen}{RGB}{146, 24, 43}
% Walls
\draw [thick] (-1,3) -- (1,3) -- (1,0.3) -- (1.2,0.3) -- (1.2,3)
-- (9,3);
\draw [thick] (-1,-3) -- (1,-3) -- (1,-0.3) -- (1.2,-0.3) -- (1.2,-3)
-- (9,-3);
\draw [thick] (10,3) -- (9.8,3) -- (9.8,-3) -- (10,-3);
% Lines
\draw [thick, gray] (0.7,0.3) -- (0.5,0.3);
\draw [thick, gray] (0.7,-0.3) -- (0.5,-0.3);
\draw [thick, gray] (0.6,0.3) -- (0.6,-0.3);
\draw [thick, gray] (1.2,0) -- (9.8,0);
\draw [thick, gray] (1.2,-0.1) -- (1.2,0.1);
\draw [thick, gray] (9.8,-0.1) -- (9.8,0.1);
\draw [thick, cyclamen] (1.2,0) -- (9.8,-2);
\draw [thick, cyclamen] (7,0) to [out=-90, in=50] (6.6,-1.23);
% Nodes
\node at (0,0) {$2a$};
\node at (5.5,0.4) {$L$};
\node [cyclamen] at (5.5,-0.4) {$\theta$};
\node [rotate=-90] at (10.2,0) {screen};
\end{tikzpicture}
\caption{Fraunhofer diffraction.}\label{fig:fenditure}
}
\end{figure}
Once again, $\theta$, which must be evenly distributed on half sphere, can be
generated only as a function of a variable $x$ uniformely distributed between
0 and 1. Therefore:
\begin{align*}
\frac{d^2 P}{d\omega^2} = const = \frac{1}{2 \pi}
&\thus d^2 P = \frac{1}{2 \pi} d\omega^2 =
\frac{1}{2 \pi} d\phi \sin{\theta} d\theta \\
&\thus \frac{dP}{d\theta} = \int_0^{2 \pi} d\phi \frac{1}{2 \pi} \sin{\theta}
= \frac{1}{2 \pi} \sin{\theta} \, 2 \pi = \sin{\theta}
\end{align*}
\begin{align*}
\theta = \theta (x) &\thus
\frac{dP}{d\theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d\theta} \right|
= \left. \frac{dP}{dx} \middle/ \, \left| \frac{d\theta}{dx} \right| \right.
\\
&\thus \sin{\theta} = \left. 1 \middle/ \, \left|
\frac{d\theta}{dx} \right| \right.
\end{align*}
Since $\theta \in [0, \pi/2]$, then the absolute value symbol can be omitted:
\begin{align*}
\frac{d\theta}{dx} = \frac{1}{\sin{\theta}}
&\thus d\theta \sin(\theta) = dx
\\
&\thus - \cos (\theta') |_{0}^{\theta} = x(\theta) - x(0) = x - 0 = x
\\
&\thus - \cos(\theta) + 1 =x
\\
&\thus \theta = \text{acos} (1 -x)
\end{align*}

10
shell.nix Normal file
View File

@ -0,0 +1,10 @@
with import <nixpkgs> { };
stdenv.mkDerivation {
name = "jeypsi";
GMP_PATH = "${gmp}/lib";
buildInputs = with pkgs; [ gdb gsl gmp pkgconfig ];
shellHook = "exec fish";
}