commit cf27610d537e0a198c973b254dff836aa317af53 Author: rnhmjoj Date: Fri Mar 6 02:24:32 2020 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cca1883 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +bin/ +*.pdf +.stignore +todo diff --git a/ex-1/main.c b/ex-1/main.c new file mode 100644 index 0000000..4341cf2 --- /dev/null +++ b/ex-1/main.c @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Function that compare doubles for sorting: + * x > y ⇒ 1 + * x == y ⇒ 0 + * x < y ⇒ -1 + */ +int cmp_double (const void *xp, const void *yp) { + double x = *(double*)xp, + y = *(double*)yp; + return x > y ? 1 : -1; +} + + +/* This is a wrapper needed by `landau_cdf` because + * the numerical integration expects a function + * with parameters. + */ +double landau_pdf(double x, void* params) { + return gsl_ran_landau_pdf(x); +} + + +/* The cumulative function of the Landau distribution + * calculated by numerical integration. + */ +double landau_cdf(double x, void* params) { + // create the integrand + gsl_function pdf; + pdf.function = &landau_pdf; + pdf.params = NULL; + + // set up the integration + double res, err; + size_t iter = 500; + gsl_integration_workspace* w = + gsl_integration_workspace_alloc(iter); + + // clip values too small + if (x < -200) x = -200; + + // We integrate the pdf in [x, +∞) instead + // of (-∞, x] to avoid a singularity and + // then return 1 - P. + gsl_integration_qagiu( + &pdf, // integrand + x, // lower bound + 1e-10, 1e-6, // abs and rel error + iter, // max iteration + w, // "workspace" + &res, &err); // result, abs error + + // free the memory + gsl_integration_workspace_free(w); + + return 1 - res; +} + + +/* Another wrapper: this time it is needed by + * `landau_qdf` to solve the equation: + * `landau_cdf(x) = p0` for x + */ +double landau_cdf_root(double x, void* params) { + double p0 = *(double*)params; + return p0 - landau_cdf(x, NULL); +} + + +/* The quantile function (inverse CDF) of the Landau + * distribution calculated by numerical root method. + */ +double landau_qdf(double p0) { + // create function + gsl_function cdf; + cdf.function = &landau_cdf_root; + cdf.params = &p0; + + // use the Brent method + gsl_root_fsolver* s = + gsl_root_fsolver_alloc(gsl_root_fsolver_brent); + + // search interval + double low = -1000, // lower value + upp = 100000; // upper value + gsl_root_fsolver_set(s, &cdf, low, upp); + + // iterative search + size_t iter = 1000; // max iteration + int stat = GSL_CONTINUE; + for (size_t i=0; stat==GSL_CONTINUE && isum_plain); + fprintf(stderr, "err: %f\n", abserr); + + gsl_sum_levin_utrunc_free(s); + free(terms); + + return sqrt(2*M_PI)/x * sum; +} + + +/* This is a wrapper needed by `numeric_mode` because + * the minimization expects a function to be minimized and not + * maximized. + */ +double neg_landau_pdf(double x, void* params) { + return (-1) * gsl_ran_landau_pdf(x); +} + + +/* Compute numerically the mode of the Landau distribution. + */ +double numeric_mode (double min, double max) { + // create funtion + gsl_function pdf; + pdf.function = &neg_landau_pdf; + pdf.params = NULL; + + // initialize minimization + double guess = 0; + int iter = 0; + int max_iter = 100; + double prec = 1e-7; + int status; + const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection; + gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T); + gsl_min_fminimizer_set(s, &pdf, guess, min, max); + + // minimization + do { + iter++; + status = gsl_min_fminimizer_iterate (s); + guess = gsl_min_fminimizer_x_minimum (s); + min = gsl_min_fminimizer_x_lower (s); + max = gsl_min_fminimizer_x_upper (s); + status = gsl_min_test_interval (min, max, prec, prec); + + } while (status == GSL_CONTINUE && iter < max_iter); + + // Free memory + gsl_min_fminimizer_free (s); + return guess; +} + + +/* This is the function to be minimized in `numeric_FWHM`. + */ +double abs_landau_pdf(double x, void* params_) { + double* params = ((double *) params_); + return fabs(gsl_ran_landau_pdf(x) - params[0]); +} + + +/* Compute numerically the FWHM of the Landau distribution. + */ +double numeric_fwhm (double min_lim, double mode, double max_lim) { + // create funtion + gsl_function pdf; + pdf.function = &abs_landau_pdf; + double params [1]; + params[0]= gsl_ran_landau_pdf(mode)/2; + pdf.params = params; + + // initialize minimization for x₋ + double guess = mode - 1; + double min, max; + int iter = 0; + int max_iter = 100; + double prec = 1e-7; + int status; + const gsl_min_fminimizer_type * T = gsl_min_fminimizer_goldensection; + gsl_min_fminimizer * s = gsl_min_fminimizer_alloc (T); + gsl_min_fminimizer_set(s, &pdf, guess, min_lim, mode); + + // minimization + do { + iter++; + status = gsl_min_fminimizer_iterate (s); + guess = gsl_min_fminimizer_x_minimum (s); + min = gsl_min_fminimizer_x_lower (s); + max = gsl_min_fminimizer_x_upper (s); + status = gsl_min_test_interval (min, max, prec, prec); + } while (status == GSL_CONTINUE && iter < max_iter); + double x_low = guess; + + // initialize minimization for x₊ + guess = mode + 1; + gsl_min_fminimizer_set(s, &pdf, guess, mode, max_lim); + + // minimization + do { + iter++; + status = gsl_min_fminimizer_iterate (s); + guess = gsl_min_fminimizer_x_minimum (s); + min = gsl_min_fminimizer_x_lower (s); + max = gsl_min_fminimizer_x_upper (s); + status = gsl_min_test_interval (min, max, prec, prec); + + } while (status == GSL_CONTINUE && iter < max_iter); + double x_upp = guess; + + // Free memory + gsl_min_fminimizer_free (s); + return x_upp - x_low; +} + + +/* Here we generate random numbers in a uniform + * range and by using the quantile we map them + * to a Landau distribution. Then we generate an + * histogram to check the correctness. + */ +int main(int argc, char** argv) { + // initialize an RNG + gsl_rng_env_setup(); + gsl_rng *r = gsl_rng_alloc(gsl_rng_default); + + // prepare histogram + size_t samples = 100000; + double* sample = calloc(samples, sizeof(double)); + size_t bins = 40; + double min = -20; + double max = 20; + gsl_histogram* hist = gsl_histogram_alloc(bins); + gsl_histogram_set_ranges_uniform(hist, min, max); + + /* sample points from the Landau distribution + * and fill the histogram + */ + double x; + for(size_t i=0; i D) + D = d; + } + + // Kolmogorov-Smirnov test + double beta = kolmogorov_cdf(D, samples); + fprintf(stderr, "\n# K-S test results\n"); + fprintf(stderr, "D: %f\n", D); + fprintf(stderr, "α: %g\n", 1 - beta); + + // Mode comparison + // + // find the bin with the maximum number of events + double mode_o, maxbin = 0; + double f_mode_o = 0; + double m1, m2 = 0; + for(size_t i=0; ibin[i]; + if (m1 > m2){ + m2 = m1; + maxbin = (double)i; + f_mode_o = hist->bin[i]; + } + } + fprintf(stderr, "\nstep:%.2f\n ", (max - min)/bins); + f_mode_o = f_mode_o/samples; + mode_o = min + (maxbin + 0.5)*(max - min)/bins; + + // print the results + double mode_e = numeric_mode(min, max); + fprintf(stderr, "\n# Numeric mode\n"); + fprintf(stderr, "expected mode: %.7f\n", mode_e); + fprintf(stderr, "observed mode: %.3f\n", mode_o); + + // FWHM comparison + // + // find the bins x₋ and x₊ + double half = f_mode_o*samples/2; + m2 = samples; + double x_low = 0; + double x_upp = 0; + double diff; + for(size_t i=0; ibin[i]; + diff = fabs(m1 - half); + if (diff < m2){ + m2 = diff; + x_low = (double)i; + } + } + m2 = samples; + for(size_t i=maxbin; ibin[i]; + diff = fabs(m1 - half); + if (diff < m2){ + m2 = diff; + x_upp = (double)i; + } + } + x_low = min + (x_low + 0.5)*(max - min)/bins; + x_upp = min + (x_upp + 0.5)*(max - min)/bins; + double fwhm_o = x_upp - x_low; + + // print the results + fprintf(stderr, "\n# Numeric FWHM\n"); + double fwhm_e = numeric_fwhm(min, mode_e, max); + fprintf(stderr, "expected FWHM: %.7f\n", fwhm_e); + fprintf(stderr, "observed FWHM: %.3f\n", fwhm_o); + + + // print the counts + // gsl_histogram_fprintf(stdout, hist, "%g", "%g"); + + // clean up and exit + gsl_histogram_free(hist); + gsl_rng_free(r); + free(sample); + return EXIT_SUCCESS; +} diff --git a/ex-1/pdf-plot.py b/ex-1/pdf-plot.py new file mode 100755 index 0000000..db0bbc7 --- /dev/null +++ b/ex-1/pdf-plot.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +from pylab import * +import sys + +x, y = loadtxt(sys.stdin, unpack=True) +title('Landau distribution', loc='right') +plot(x, y, color='#92182b') +show() diff --git a/ex-1/pdf.c b/ex-1/pdf.c new file mode 100644 index 0000000..6baea2f --- /dev/null +++ b/ex-1/pdf.c @@ -0,0 +1,18 @@ +#include +#include + +int main(int argc, char** argv) { + size_t n = 1000; + double + left = -10, + right = +10, + dx = (right - left)/n; + + double x = left; + for (size_t i = 0; i < n; i++) { + x += dx; + printf("%f %f\n", x, gsl_ran_landau_pdf(x)); + } + + return EXIT_SUCCESS; +} diff --git a/ex-1/plot.py b/ex-1/plot.py new file mode 100755 index 0000000..4d554db --- /dev/null +++ b/ex-1/plot.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +from pylab import * +import sys + +a, b, f = loadtxt(sys.stdin, unpack=True) +title('Landau distribution sampling', loc='right') +hist(a, np.insert(b, 0, a[0]), weights=f/sum(f)) +annotate(f'N={sum(f)}\nbins={len(f)}', (50, 0.2)) +show() diff --git a/ex-2/fancier.c b/ex-2/fancier.c new file mode 100644 index 0000000..c3c4fd1 --- /dev/null +++ b/ex-2/fancier.c @@ -0,0 +1,511 @@ +#include +#include +#include +#include +#include + +/* The Euler-Mascheroni constant is here computed to + * arbitrary precision using GMP rational numbers + * with the formula: + * + * γ = A(N)/B(N) - C(N)/B(N)² - log(N) + * + * with: + * + * A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k) + * B(N) = Σ_(k = 0)^(k_max) (N^k/k!) + * C(N) = 1/4N Σ_(k = 0)^(2N) ((2k)!)^3/((k!)^4*(16N))^2k + * H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number + * + * where N is computed from D as written below and k_max is + * currently 5*N, chosen in a completely arbitrary way. + * + * source: http://www.numberworld.org/y-cruncher/internals/formulas.html + * + */ + +/* `mpq_dec_str(z, n)` + * returns the decimal representation of the + * rational z as a string, up to n digits. + */ +char* mpq_dec_str(mpq_t z, size_t mantissa_size) { + mpz_t num, den, quot, rem; + mpz_inits(num, den, quot, rem, NULL); + + // calculate integral part + mpq_get_num(num, z); + mpq_get_den(den, z); + mpz_tdiv_qr(quot, rem, num, den); + + // store the integral part + size_t integral_size = mpz_sizeinbase(quot, 10); + char* repr = calloc(integral_size + 1 + mantissa_size + 1, sizeof(char)); + mpz_get_str(repr, 10, quot); + + // add decimal point + repr[integral_size] = '.'; + + // calculate the mantissa + mpz_t cur; + mpz_init(cur); + size_t digit; + for (size_t i = 0; i < mantissa_size; i++) { + // calculate i-th digit as: + // digit = (remainder * 10) / denominator + // new_ramainder = (remainder * 10) % denominator + mpz_set(cur, rem); + mpz_mul_ui(cur, rem, 10); + mpz_tdiv_qr(quot, rem, cur, den); + + // store digit in mantissa + digit = mpz_get_ui(quot); + sprintf(repr + integral_size + 1 + i, "%ld", digit); + } + + // free memory + mpz_clears(num, den, quot, rem, cur, NULL); + + return repr; +} + + +void mpq_fprintf(FILE *fp, size_t digits, char* fmt, ...) { + va_list args; + va_start(args, fmt); + + char* str; + char fmtc[2] = "%x"; + + for (char* p = fmt; *p; p++) { + if (*p != '%') { + fprintf(fp, "%c", *p); + continue; + } + switch (*++p) { + case 's': + str = va_arg(args, char*); + fputs(str, fp); + break; + case 'l': + if (*++p == 'd') + fprintf(fp, "%ld", va_arg(args, size_t)); + break; + case 'q': + str = mpq_dec_str(*va_arg(args, mpq_t*), digits); + fputs(str, fp); + free(str); + break; + default: + fmtc[1] = *p; + gmp_fprintf(fp, fmtc, *va_arg(args, mpq_t*)); + } + } + va_end(args); +} + + +/* `mpq_log2(z, D)` + * calculate log(2) to D decimal places + * using the series + * + * log(2) = Σ_{k=1} 1/(k 2^k) + * + * The error of the n-th partial sum is + * estimated as + * + * Δn = 1/(n(n+2) 2^(n-1)). + * + * So to fix the first D decimal places + * we evaluate up to n terms such that + * + * Δn < 1/10^D + */ +void mpq_log2(mpq_t rop, size_t digits) { + mpq_t sum, term; + mpq_inits(sum, term, NULL); + + fprintf(stderr, "[mpq_log2] decimal digits: %ld\n", digits); + + /* calculate n, the number of required terms */ + size_t n; + mpz_t temp, prec, width; mpz_inits(temp, prec, width, NULL); + // prec = 10^digits + mpz_ui_pow_ui(prec, 10, digits + 2); + + for (n = 1;; n++) { + // width = n*(n + 2)*2^(n - 1) + mpz_set_ui(width, n); + mpz_set_ui(temp, n); + mpz_add_ui(temp, temp, 2); + mpz_mul(width, width, temp); + mpz_sub_ui(temp, temp, 3); + mpz_mul_2exp(width, width, mpz_get_ui(temp)); + + if (mpz_cmp(width, prec) > 0) break; + } + fprintf(stderr, "[mpq_log2] series terms: %ld\n", n); + + /* calculate series */ + for (size_t i=1; i 50 ? 50 : digits, + "[mpq_log2] log(2)=%q...\n", &sum); + + mpq_set(rop, sum); + + // free memory + mpq_clears(sum, term, NULL); + mpz_clears(temp, prec, width, NULL); +} + + +/* `mpq_log_ui(z, x, D)` + * calculates the logarithm of x up + * to D decimal places using the formula + * + * log(x) = log(a(x) * 2^(n-1)) + * = log(a) + (n-1) log(2) + * + * where 1 0) break; + } + + fprintf(stderr, "[mpq_log_ui] series terms: %ld\n", n); + fprintf(stderr, "[mpq_log_ui] decimal digits: %ld\n", digits); + + + /* calculate the series of + * log(A) = log((1+y)/(1-y)) = 2 Σ_{k=0} y^(2k+1)/(2k+1) + */ + mpq_t sum, term; mpq_inits(sum, term, NULL); + + for (size_t i = 0; i <= n; i++) { + // numerator: y^(2i+1) + mpq_set(temp, y); + mpz_pow_ui(mpq_numref(temp), mpq_numref(temp), 2*i + 1); + mpz_pow_ui(mpq_denref(temp), mpq_denref(temp), 2*i + 1); + mpq_canonicalize(temp); + mpq_set(term, temp); + + // denominator: 2i+1 + mpq_set_ui(temp, 2*i + 1, 1); + mpq_div(term, term, temp); + + // sum the term + mpq_add(sum, sum, term); + } + + // multiply by 2 + mpz_mul_ui(mpq_numref(sum), mpq_numref(sum), 2); + mpq_canonicalize(sum); + + // calculate log(2)*(m-1) + mpq_t log2; mpq_init(log2); + mpq_log2(log2, digits); + + mpz_mul_ui(mpq_numref(log2), mpq_numref(log2), m-1); + mpq_canonicalize(log2); + + // series += log(2)*(m-1) + mpq_add(sum, sum, log2); + mpq_set(rop, sum); + + mpq_fprintf(stderr, digits > 50 ? 50 : digits, + "[mpq_log_ui] log(%ld)=%q...\n", x, sum); + + // free memory + mpq_clears(a, y, temp, sum, term, log2, + y4, y2, y2n, t1, t2, NULL); + mpz_clears(xz, power, NULL); +} + + +/* A series */ +void a_series(mpq_t rop, size_t N) { + mpq_t sum, term, factq, harm, recip; + mpz_t fact, power, stop; + + mpq_inits(sum, term, factq, harm, recip, NULL); + mpz_inits(fact, power, stop, NULL); + mpz_set_ui(stop, N); + + for (size_t k = 0; k < 5*N; k++) { + mpz_fac_ui(fact, k); + mpz_pow_ui(power, stop, k); + + mpq_set_z(term, power); + mpq_set_z(factq, fact); + mpq_div(term, term, factq); + mpq_mul(term, term, term); + + if (k > 0) + mpq_set_ui(recip, 1, k); + mpq_add(harm, harm, recip); + mpq_mul(term, term, harm); + mpq_add(sum, sum, term); + } + + mpq_set(rop, sum); + + // free memory + mpq_clears(sum, term, factq, harm, recip, NULL); + mpz_clears(fact, power, stop, NULL); +} + + +/* B series */ +void b_series(mpq_t rop, size_t N){ + mpq_t sum, term, factq; + mpz_t fact, power, stop; + + mpq_inits(sum, term, factq, NULL); + mpz_inits(fact, power, stop, NULL); + mpz_set_ui(stop, N); + + for (size_t k = 0; k < 5*N; k++) { + mpz_fac_ui(fact, k); + mpz_pow_ui(power, stop, k); + + mpq_set_z(term, power); + mpq_set_z(factq, fact); + mpq_div(term, term, factq); + + mpq_mul(term, term, term); + mpq_add(sum, sum, term); + } + + mpq_set(rop, sum); + + // free memory + mpq_clears(sum, term, factq, NULL); + mpz_clears(fact, power, stop, NULL); +} + + +/* C series */ +void c_series(mpq_t rop, size_t N) { + mpq_t sum, term, stop2, fact2q; + mpz_t fact1, fact2, power, stop1; + + mpq_inits(sum, term, stop2, fact2q, NULL); + mpz_inits(fact1, fact2, power, stop1, NULL); + mpq_set_ui(stop2, 4*N, 1); + + for (size_t k = 0; k <= 2*N; k++) { + mpz_fac_ui(fact1, 2*k); + mpz_pow_ui(fact1, fact1, 3); + + mpz_fac_ui(fact2, k); + mpz_pow_ui(fact2, fact2, 4); + + mpz_set_ui(stop1, 16*N); + mpz_pow_ui(stop1, stop1, 2*k); + mpz_mul(fact2, fact2, stop1); + + mpq_set_z(term, fact1); + mpq_set_z(fact2q, fact2); + + mpq_div(term, term, fact2q); + mpq_add(sum, sum, term); + } + mpq_div(sum, sum, stop2); + + mpq_set(rop, sum); + + // free memory + mpq_clears(sum, term, stop2, fact2q, NULL); + mpz_clears(fact1, fact2, power, stop1, NULL); +} + + +int main(int argc, char** argv) { + /* if no argument is given, show the usage */ + if (argc != 2) { + fprintf(stderr, "usage: %s D\n", argv[0]); + fprintf(stderr, "Computes γ up to D decimal places.\n"); + return EXIT_FAILURE; + } + + mpq_t exact, indicator; + mpq_inits(exact, indicator, NULL); + + // value of γ up to 500 digits + mpq_set_str( + exact, + "47342467929426395252720948664321583815681737620205" + "78046578043524355342995304421105234825234769529727" + "60710541774258570118818437511973945213031180449905" + "77067110892216494927227098092412641670668896104172" + "59380467495735805599918127376547804186055507379270" + "09659478275476110597673948659342592173551684412254" + "47940426607883070678658320829056222506449877887289" + "02099638646695565284675455912794915138438516540912" + "22061900908485016328626399040675802962645141413722" + "567840383761005003563012969560659130635723002086605/" + "82018681765164048732047980836753451023877954010252" + "60062364748361667340168652059998708337602423525120" + "45225158774173869894826877890589130978987229877889" + "33367849273189687823618289122425446493605087108634" + "04387981302669131224273324182166778131513056804533" + "58955006355665628938266331979307689540884269372365" + "76288367811322713649805442241450184023209087215891" + "55369788474437679223152173114447113970483314961392" + "48250188991402851129033493732164230227458717486395" + "514436574417275149404197774547389507462779807727616", + 10); + + // number with decimal expansion equal to + // 0123456789 periodic: useful to count digits + mpq_set_str( + indicator, + "137174210/1111111111", + 10); + + // number of decimal places + size_t D = atol(argv[1]); + // number of terms + size_t N = floor(1.0/4 * log(10) * (double)D); + + /* calculate series */ + mpq_t A, B, C; + mpq_inits(A, B, C, NULL); + a_series(A, N); + b_series(B, N); + c_series(C, N); + + /* calculate γ... */ + mpq_t gamma, corr, logn; + mpq_inits(gamma, corr, logn, NULL); + + mpq_log_ui(logn, N, D); + mpq_div(gamma, A, B); + mpq_div(corr, C, B); + mpq_div(corr, corr, B); + mpq_sub(gamma, gamma, corr); + mpq_sub(gamma, gamma, logn); + + /* calculate difference */ + mpq_t diff; + mpq_init(diff); + mpq_sub(diff, gamma, exact); + + /* print results */ + size_t d = D+1 > 50 ? 50 : D+1; + + fprintf(stderr, "[main] N: %ld\n", N); + mpq_fprintf(stderr, d, "[main] approx:\t%q...\n", gamma); + mpq_fprintf(stderr, d, "[main] exact:\t%q...\n", exact); + mpq_fprintf(stderr, d, "[main] diff:\t%q...\n", diff); + mpq_fprintf(stderr, d, "[main] digits:\t%q\n", indicator); + + mpq_fprintf(stdout, D+1, "%q\n", gamma); + + // free memory + mpq_clears(A, B, C, gamma, corr, logn, + exact, indicator, diff, NULL); + + return EXIT_SUCCESS; +} diff --git a/ex-2/fancy.c b/ex-2/fancy.c new file mode 100644 index 0000000..103d4c6 --- /dev/null +++ b/ex-2/fancy.c @@ -0,0 +1,89 @@ +#include +#include +#include + +// The Euler-Mascheroni constant is computed through the formula: +// +// γ = A(N)/B(N) - ln(N) +// +// with: +// +// A(N) = Σ_(k = 1)^(k_max) (N^k/k!) * H(k) +// B(N) = Σ_(k = 0)^(k_max) (N^k/k!) +// H(k) = Σ_(j = 1)^(k) (1/k) +// +// where N is computed from D as written below and k_max is the value +// at which there is no difference between two consecutive terms of +// the sum because of double precision. +// +// source: http://www.numberworld.org/y-cruncher/internals/formulas.html + +// Partial harmonic sum h +double harmonic_sum(double n) { + double sum = 0; + for (double k = 1; k < n+1; k++) { + sum += 1/k; + } + return sum; +} + +// A series +double a_series(int N) { + double sum = 0; + double prev = -1; + for (double k = 1; sum != prev; k++) { + prev = sum; + sum += pow(((pow(N, k))/(tgamma(k+1))), 2) * harmonic_sum(k); + } + return sum; +} + +// B series +double b_series(int N){ + double sum = 0; + double prev = -1; + for (double k = 0; sum != prev; k++) { + prev = sum; + sum += pow(((pow(N, k))/(tgamma(k+1))), 2); + } + return sum; +} + +double c_series(int N) { + double sum = 0; + for (double k = 0; k < N; k++) { + sum += pow(tgamma(2*k + 1), 3)/(pow(tgamma(k + 1), 4) * pow(16.0*N, (int)2*k)); + } + return sum/(4.0*N); +} + +// Takes in input the number D of desired correct decimals +// Best result obtained with D = 15, N = 10 + +int main(int argc, char** argv) { + double exact = + 0.57721566490153286060651209008240243104215933593992; + + // If no argument is given is input, an error signal is displayed + // and the program quits + + if (argc != 2) { + fprintf(stderr, "usage: %s D\n", argv[0]); + fprintf(stderr, "Computes γ up to D decimal places.\n"); + return EXIT_FAILURE; + } + + int N = floor(2.0 + 1.0/4 * log(10) * (double)atoi(argv[1])); + double A = a_series(N); + double B = b_series(N); + double C = c_series(N); + double gamma = A/B - C/(B*B) - log(N); + + printf("N: %d\n", N); + printf("approx:\t%.30f\n", gamma); + printf("true:\t%.30f\n", exact); + printf("diff:\t%.30f\n", fabs(gamma - exact)); + printf("\t 123456789 123456789 123456789\n"); + + return EXIT_SUCCESS; +} diff --git a/ex-2/limit.c b/ex-2/limit.c new file mode 100644 index 0000000..61c7d2e --- /dev/null +++ b/ex-2/limit.c @@ -0,0 +1,43 @@ +#include +#include +#include + +/* Compute the Euler_Mascheroni constant through the formula: + * + * γ = lim_{M → +inf} sum_{k = 1}^{M} bin{M, k} (-1)^k / k * ln(Γ(k+1)) + * + * Best result is obtained for M = 41. + * Values from 1 to 100 were checked. + * + */ + +double exact = 0.577215664901532860; + +int main() { + double best = 0, num = 0; + double sum = 0, prev; + + for (double M = 1; M < 60; M++) { + prev = sum; + sum = 0; + for (double k = 1; k <= M; k++) { + sum += (tgamma(M + 1))/(tgamma(k + 1)*tgamma(M - k + 1)) * pow(-1, k)/k + * log(tgamma (k + 1)); + } + printf("M: \t%d\t", (int)M); + printf("diff:\t%.15f\n", fabs(sum - exact)); + if ((fabs(sum - exact) < fabs(prev - exact)) && + (fabs(sum - exact) < fabs(best - exact))){ + best = sum; + num = M; + } + } + + printf("M: %d\n", (int)num); + printf("approx:\t%.15f\n", best); + printf("true:\t%.15f\n", exact); + printf("diff:\t%.15f\n", fabs(best - exact)); + printf("\t 123456789_12345\n"); + + return EXIT_SUCCESS; +} diff --git a/ex-2/naive.c b/ex-2/naive.c new file mode 100644 index 0000000..cfa1e1b --- /dev/null +++ b/ex-2/naive.c @@ -0,0 +1,57 @@ +#include +#include + +/* Naive approach using the definition + * of γ as the difference between log(n) + * and the Rieman sum of its derivative (1/n). + * + * The result is: + * + * approx: 0.57721 56648 77325 + * true: 0.57721 56649 01533 + * diff: 0.00000 00000 24207 + * + * The convergence is logarithmic and so + * slow that the double precision runs out + * at the 10th decimal digit. + * + */ + +double exact = 0.577215664901532860; + +double partial_sum(size_t n) { + double sum = 0; + for (double k = 1; k <= n; k++) { + sum += 1/k; + } + return sum - log(n); +} + +int main () { + size_t prev_n, n = 2; + double prev_diff, diff=1; + double approx; + + /* Compute the approximation + * and the difference from the true value. + * Stop when the difference starts increasing + * due to roundoff errors. + * + */ + + do { + prev_n = n; + n *= 10; + prev_diff = diff; + approx = partial_sum(n); + diff = fabs(exact - approx); + printf("n:\t%.0e\t", (double)n); + printf("diff:\t%e\n", diff); + } while (diff < prev_diff); + + printf("n:\t%.0e\n", (double)prev_n); + printf("approx:\t%.15f\n", approx); + printf("true:\t%.15f\n", exact); + printf("diff:\t%.15f\n", prev_diff); + printf("\t 123456789_12345\n"); +} diff --git a/ex-2/recip.c b/ex-2/recip.c new file mode 100644 index 0000000..130d79e --- /dev/null +++ b/ex-2/recip.c @@ -0,0 +1,63 @@ +#include +#include +#include + +/* Compute the Euler-Mascheroni constant through the formula + * of the reciprocal Γ function: + * + * 1/Γ(z) = z*e^{γz} pro_{k = 1}^{+ inf} ((1 + z/k) e^{- z/k}) + * + * Thus: + * + * γ = - 1/z * ln(z*Γ(z)*pro{k = 1}^{+ inf} (1 + z/k) e^{- z/k}) + * + * Product stops when there is no difference between two consecutive + * terms of the productory due to limited precision. + * + * Range of z given in input as min, max and step. + * + */ + +double exact = 0.577215664901532860; + +int main(int argc, char** argv) { + + double min = (double)atof(argv[1]); + double max = (double)atof(argv[2]); + double step = (double)atof(argv[3]); + + double prev_gamma, gamma = 0; + double Z; + double best = 0; + + + for (double z = min; z <= max; z += step) { + + prev_gamma = gamma; + + double pro = 1; + double prev = -1; + for (double k = 1; pro != prev; k++) { + prev = pro; + pro *= (1 + z/k) * exp(- z/k); + if (z == 9 && pro == prev) printf("k:\t%.0f\n", k); + } + double gamma = - 1/z * log(z*tgamma(z)*pro); + printf("z:\t%.2f\t", z); + printf("diff:\t%.20f\n", fabs(gamma - exact)); + + if ((fabs(gamma - exact) < fabs(prev_gamma - exact)) && + (fabs(gamma - exact) < fabs(best - exact))){ + best = gamma; + Z = z; + } + } + + printf("z:\t%.2f\n", Z); + printf("approx:\t%.20f\n", best); + printf("true:\t%.20f\n", exact); + printf("diff:\t%.20f\n", best - exact); + printf("\t 123456789 123456789 123456789\n"); + + return EXIT_SUCCESS; +} diff --git a/ex-3/chisquared.c b/ex-3/chisquared.c new file mode 100644 index 0000000..efe3051 --- /dev/null +++ b/ex-3/chisquared.c @@ -0,0 +1,292 @@ +#include "common.h" +#include "chisquared.h" +#include +#include + +/* Minimisation function + * + * The χ² is defined as + * + * χ² = |f|² + * = Σi fi² + * + * where fi = (Oi - Ei)/√Ei are the residuals. + * + * This function takes the parameters (α,β,γ) as + * a gsl_vector `par`, the sample `data` and a + * gsl_vector `f`, that will store the results + * `fi`. + */ +int chisquared_f(const gsl_vector *par, + void *data_, gsl_vector *f) { + struct hist data = *((struct hist*) data_); + + struct event event; + double max, min; + double delta_th, delta_ph; + double expected, observed; + + /* Loop over the (i,j) bins of the 2D histogram. + * The index k of the k-th component of f is actually: + * + * k = i * φbins + j + * + * Note: this is called the C-order. + */ + + for (size_t i = 0; i < data.h->nx; i++) + for (size_t j = 0; j < data.h->ny; j++) { + /* Get the bin ranges and width for θ,φ. + * The event is supposed to happen in the + * midpoint of the range. + */ + gsl_histogram2d_get_xrange(data.h, i, &max, &min); + event.th = (max + min)/2; + delta_th = (max - min); + gsl_histogram2d_get_yrange(data.h, j, &max, &min); + event.ph = (max + min)/2; + delta_ph = (max - min); + + /* O = observed number of events in the k-th bin + * E = expected number of events in k-th bin + * + * E is given by: + * + * /---> total number of events + * / + * E = N F(α,β,γ; θ,φ) Δθ Δφ sin(θ) + * \__________________________/ + * | + * `-> probability + */ + observed = gsl_histogram2d_get(data.h, i, j); + expected = data.n * distr(par, &event) + * delta_th * delta_ph * sin(event.th); + + /* Return an error (invalid domain) if + * the current bin is empty. That would + * be division by zero. + */ + if (expected < 0) { + //fprintf(stderr, "[warning] bin %ld:%ld p<0 (%.2g, %.2g, %.2g)\n", + // i, j, gsl_vector_get(par, 0), + // gsl_vector_get(par, 1), + // gsl_vector_get(par, 2)); + expected = 1e-6; + } + + gsl_vector_set( + f, i * data.h->ny + j, + (observed - expected)/sqrt(expected)); + } + + return GSL_SUCCESS; +} + + +/* Jacobian function + * + * The gradient of the χ² function is: + * + * ∇χ² = ∇ |f|² + * = 2 J^T⋅f + * + * where J is the jacobian of the (vector) + * function f. Its entries are given by + * + * 2 Jij = 2 ∂fi/∂xj + * = -(Oi + Ei)/√Ei 1/Fi (∇Fi)j + * + */ +int chisquared_df(const gsl_vector *par, + void *data_, gsl_matrix *jac) { + + struct hist data = *((struct hist*) data_); + + struct event event; + double max, min; + double delta_th, delta_ph, prob; + double expected, observed; + gsl_vector *grad = gsl_vector_alloc(3); + + for (size_t i = 0; i < data.h->nx; i++) + for (size_t j = 0; j < data.h->ny; j++) { + /* Get the bin ranges for θ,φ. + * The event is supposed to happen in the + * midpoint of the range. + */ + gsl_histogram2d_get_xrange(data.h, i, &max, &min); + event.th = (max + min)/2; + delta_th = (max - min); + gsl_histogram2d_get_yrange(data.h, j, &max, &min); + event.ph = (max + min)/2; + delta_ph = (max - min); + + prob = distr(par, &event); + observed = gsl_histogram2d_get(data.h, i, j); + expected = data.n * prob * delta_th * delta_ph * sin(event.th); + + if (expected < 0) + expected = 1e-6; + if (observed == 0) + observed = 1; + + /* Compute the gradient of F(α,β,γ; θi,φi), + * then rescale it and set it to the i-th row + * of the jacobian. + */ + grad_distr(par, &event, grad); + gsl_vector_scale( + grad, + -0.5*(observed + expected)/sqrt(expected) * 1/prob); + gsl_matrix_set_row(jac, i * data.h->ny + j, grad); + + } + + // free memory + gsl_vector_free(grad); + + return GSL_SUCCESS; +} + + +/* This is a callback function called during + * the minimisation to show the current progress. + * It prints: + * 1. the condition number cond(J) of the jacobian + * 2. the reduced χ² value + * 3. the current parameters + */ +void callback(const size_t iter, void *params, + const gsl_multifit_nlinear_workspace *w) { + gsl_vector *f = gsl_multifit_nlinear_residual(w); + gsl_vector *x = gsl_multifit_nlinear_position(w); + + //if (iter % 4 != 0) + // return; + + /* Compute the condition number of the + * jacobian and the reduced χ² (χ²/d). + */ + double rcond, chi2; + int d = w->fdf->n - w->fdf->p; + gsl_multifit_nlinear_rcond(&rcond, w); + gsl_blas_ddot(f, f, &chi2); + + fprintf(stderr, "%2ld\t", iter); + vector_fprint(stderr, x); + fprintf( + stderr, "\tcond(J)=%.4g, χ²/d=%.4g\n\n", + 1.0/rcond, + chi2/d); +} + + +/* Minimum χ² estimation of of the parameters + * α,β,γ. + */ +min_result minChiSquared(struct hist data) { + /* Initialise the function to be minimised */ + gsl_multifit_nlinear_fdf chisquared; + chisquared.f = chisquared_f; // function + chisquared.df = chisquared_df; // gradient + chisquared.fvv = NULL; // something for geodesic accel. + chisquared.n = data.h->nx * data.h->ny; // numeber of data points + chisquared.p = 3; // numeber of data points + chisquared.params = &data; // histogram data + + /* Initialise the minimisation workspace */ + gsl_multifit_nlinear_parameters options = + gsl_multifit_nlinear_default_parameters(); + + gsl_multifit_nlinear_workspace *w = + gsl_multifit_nlinear_alloc( + gsl_multifit_nlinear_trust, // minimisation method + &options, // minimisation options + data.h->nx * data.h->ny, // number of data points + 3); // number of (unknown) params + + /* Set the starting point of the + * minimisation. + */ + gsl_vector *par = gsl_vector_alloc(3); + gsl_vector_set(par, 0, 0.50); // α + gsl_vector_set(par, 1, 0.01); // β + gsl_vector_set(par, 2, -0.10); // γ + gsl_multifit_nlinear_init(par, &chisquared, w); + + /* Configure the solver and run the minimisation + * using the high-level driver. + */ + fputs("\n# least χ²\n\n", stderr); + int status, info; + status = gsl_multifit_nlinear_driver( + 100, // max number of iterations + 1e-8, // tolerance for step test: |δ| ≤ xtol(|x| + xtol) + 1e-8, // tolerance for gradient test: max |∇i⋅ xi| ≤ gtol⋅xi + 1e-8, // tolerance for norm test + callback, // function called on each iteration + NULL, // callback parameters + &info, // stores convergence information + w); // minimisation workspace + + fprintf(stderr, "status: %s\n", gsl_strerror(status)); + if (status != GSL_SUCCESS) + fprintf(stderr, "info: %s\n", gsl_strerror(info)); + fprintf(stderr, "iterations: %ld\n", gsl_multifit_nlinear_niter(w)); + + /* Store results in the min_result type. + * Note: We allocate a new vector/matrix for the + * parameters because `w->x` will be freed + * along with the workspace `w`. + */ + min_result res; + res.par = gsl_vector_alloc(3); + res.err = gsl_vector_alloc(3); + res.cov = gsl_matrix_alloc(3, 3); + gsl_vector_memcpy(res.par, gsl_multifit_nlinear_position(w)); + + /* Compute the covariance of the fit parameters. + * The covariance Σ is estimated by + * + * Σ = H⁻¹ + * + * where H is the hessian of the χ², which is + * itself approximated by the jacobian as + * + * H = J^T⋅J + * + */ + gsl_matrix *jac = gsl_multifit_nlinear_jac(w); + gsl_multifit_nlinear_covar(jac, 0.0, res.cov); + + /* Compute the standard errors + * from the covariance matrix. + */ + gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov); + gsl_vector_memcpy(res.err, &diagonal.vector); + vector_map(&sqrt, res.err); + + /* Compute the reduced χ² */ + double chi2; + gsl_vector *f = gsl_multifit_nlinear_residual(w); + gsl_blas_ddot(f, f, &chi2); + chi2 /= chisquared.n - chisquared.p; + + /* Print the results */ + fputs("\n## results\n", stderr); + + fprintf(stderr, "\n* χ²/d: %.3f\n", chi2); + + fputs("\n* parameters:\n ", stderr); + vector_fprint(stderr, res.par); + + fputs("\n* covariance:\n", stderr); + matrix_fprint(stderr, res.cov); + + // free memory + gsl_multifit_nlinear_free(w); + gsl_vector_free(par); + + return res; +} diff --git a/ex-3/chisquared.h b/ex-3/chisquared.h new file mode 100644 index 0000000..dc0dafb --- /dev/null +++ b/ex-3/chisquared.h @@ -0,0 +1,17 @@ +#include "common.h" +#include + +/* Structure containing the histogram + * for the χ² minimisation. + * n: number of events + * h: 2D histogram + */ +struct hist { + size_t n; + gsl_histogram2d *h; +}; + +/* Minimum χ² estimation of of the parameters + * α,β,γ. + */ +min_result minChiSquared(struct hist data); diff --git a/ex-3/common.c b/ex-3/common.c new file mode 100644 index 0000000..4211124 --- /dev/null +++ b/ex-3/common.c @@ -0,0 +1,126 @@ +#include "common.h" + +/* `vector_fprint(fp, x)` prints to file `fp` + * the components of a gsl_vector. + */ +void vector_fprint(FILE *fp, const gsl_vector *x) { + fprintf(fp, "["); + for (size_t i = 0; i < x->size; i++) { + fprintf(fp, i < x->size -1 ? "%10.4g, " : "%10.4g", + gsl_vector_get(x, i)); + } + fprintf(fp, "]\n"); +} + + +/* `matrix_fprint(fp, x)` prints to file `fp` + * the components of a gsl_vector. + */ +void matrix_fprint(FILE *fp, const gsl_matrix *x) { + gsl_vector row; + for (size_t i = 0; i < x->size1; i++) { + row = gsl_matrix_const_row(x, i).vector; + fputs(" ", fp); + vector_fprint(fp, &row); + } +} + + +/* `vector_map(f, v)` applies the function + * `f` to each element of the vector `v`, inplace. + * The function return a GSL exit code to either + * report success or a failure. + */ +int vector_map(double (*f)(double x), gsl_vector *v) { + double y; + for (size_t i = 0; i < v->size; i++ ) { + y = (*f)(gsl_vector_get(v, i)); + gsl_vector_set(v, i, y); + } + return GSL_SUCCESS; +} + + +/* Angular distribution function F(θ,φ; α,β,γ) + * Takes three parameters α,β,γ and the angles θ,φ. + */ +double distr(const gsl_vector *par, + const struct event *event){ + double a = gsl_vector_get(par, 0), + b = gsl_vector_get(par, 1), + c = gsl_vector_get(par, 2); + + double th = event->th, + ph = event->ph; + + double A = 3 /(4 * M_PI); + double B = 0.5 * (1 - a); + double C = 0.5 * (3 * a - 1) * pow(cos(th), 2); + double D = b * pow(sin(th), 2) * cos(2 * ph); + double E = sqrt(2) * c * sin(2 * th) * cos(ph); + + return A * (B + C - D - E); +} + +/* `grad_distr(par, event, grad)` + * computes the gradient of F(α,β,γ; α,β) as: + * + * ⎡ 3/2⋅cos²(θ) - 1/2 ⎤ + * ⎢ ⎥ + * ⎢ sin²(θ)⋅cos(2φ) ⎥ 3/(4π) + * ⎢ ⎥ + * ⎣ √2⋅sin(2θ)⋅cos(φ) ⎦ + * + * where `par` is [α,β,γ], `event` is the angle + * structure. The results is stored in the + * gsl_vector `grad` + */ +void grad_distr(const gsl_vector *par, + const struct event *event, + gsl_vector *grad) { + double th = event->th, + ph = event->ph; + + gsl_vector_set(grad, 0, 1.5*pow(cos(th), 2) - 0.5); + gsl_vector_set(grad, 1, -pow(sin(th), 2) * cos(2*ph)); + gsl_vector_set(grad, 2, -sqrt(2)*sin(2*th) * cos(ph)); + gsl_vector_scale(grad, 3.0/(4.0 * M_PI)); +} + + +/* Test the compatibility between the original + * parameters `def` and the estimates `est.par` + * with standard errors `est.err`. + */ +void compatibility(gsl_vector *def, + min_result est) { + + char* names[] = { "α", "β", "γ" }; + double a, b; + double t, sigma, alpha; + + fprintf(stderr, "| par | σ | α-value |\n"); + fprintf(stderr, "|-----|----------|----------|\n"); + for (size_t i = 0; i < def->size; i++) { + // discrepancy + a = gsl_vector_get(def, i); + b = gsl_vector_get(est.par, i); + sigma = gsl_vector_get(est.err, i); + + // normal t-statistic + t = fabs(a - b)/sigma; + // α-value / significance + alpha = 1 - erf(t/sqrt(2)); + + fprintf(stderr, "| %s | %-8.2g | %-8.2g |\n", + names[i], sigma, alpha); + } +} + + +/* Free a min_result structure. + */ +void min_result_free(min_result x) { + gsl_vector_free(x.par); + gsl_matrix_free(x.cov); +} diff --git a/ex-3/common.h b/ex-3/common.h new file mode 100644 index 0000000..1ec5e57 --- /dev/null +++ b/ex-3/common.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include + +/* Structure that holds the θ,φ angles + * of a single event of the sample. + */ +struct event { + double th; + double ph; +}; + +/* Structure that contains the sample events + * and its size. + */ +struct sample { + const size_t size; + struct event *events; +}; + +/* This structure contains the result of a + * minimisation method: estimated parameters, + * standard error and their covariance. + */ +typedef struct { + gsl_vector *par; + gsl_vector *err; + gsl_matrix *cov; +} min_result; + + +/* `vector_fprint(fp, x)` prints to file `fp` + * the components of a gsl_vector. + */ +void vector_fprint(FILE *fp, const gsl_vector *x); + + +/* `matrix_fprint(fp, x)` prints to file `fp` + * the components of a gsl_vector. + */ +void matrix_fprint(FILE *fp, const gsl_matrix *x); + + +/* `vector_map(f, v)` applies the function + * `f` to each element of the vector `v`, inplace. + * The function return a GSL exit code to either + * report success or a failure. + */ +int vector_map(double (*f)(double x), gsl_vector *v); + + +/* Angular distribution function F(θ,φ; α,β,γ) + * Takes three parameters α,β,γ and the angles θ,φ. + */ +double distr(const gsl_vector *par, + const struct event *event); + + +/* `grad_distr(par, event, grad)` + * computes the gradient of F(α,β,γ; α,β) as: + * + * ⎡ 1/2 - 3/2⋅cos²(θ) ⎤ + * ⎢ ⎥ + * ⎢ sin²(θ)⋅cos(2φ) ⎥ 3/(4π) + * ⎢ ⎥ + * ⎣ √2⋅sin(2θ)⋅cos(φ) ⎦ + * + * where `par` is [α,β,γ], `event` is the angle + * structure. The results is stored in the + * gsl_vector `grad` + */ +void grad_distr(const gsl_vector *par, + const struct event *event, + gsl_vector *grad); + + +/* Test the compatibility between the original + * parameters `def` and the estimates `est.par` + * with standard errors `est.err`. + */ +void compatibility(gsl_vector *def, min_result est); + + +/* Free a min_result structure. + */ +void min_result_free(min_result x); diff --git a/ex-3/likelihood.c b/ex-3/likelihood.c new file mode 100644 index 0000000..53c137b --- /dev/null +++ b/ex-3/likelihood.c @@ -0,0 +1,239 @@ +#include "likelihood.h" +#include +#include +#include + +/* `f_logL(par, &sample)` + * gives the negative log-likelihood for the sample `sample`. + */ +double f_logL(const gsl_vector *par, void *sample_) { + struct sample sample = *((struct sample*) sample_); + double sum = 0; + + for (size_t i = 0; i < sample.size; i++) + sum += log(fabs(distr(par, &sample.events[i]))); + + /* Rescale the function to avoid + * round-off errors during minimisation + */ + sum *= 1e-5; + + return -sum; +} + + +/* `fdf_logL(par, &sample, fun, grad)` + * simultaneously computes the gradient and the value of + * the negative log-likelihood for the sample `sample` + * and stores the results in `fun` and `grad` respectively. + */ +void fdf_logL(const gsl_vector *par, void *sample_, + double *fun, gsl_vector *grad) { + struct sample sample = *((struct sample*) sample_); + + double prob; + struct event *event; + gsl_vector *term = gsl_vector_alloc(3); + + // Note: fun/grad are *not* guaranteed to be 0 + gsl_vector_set_zero(grad); + *fun = 0; + + for (size_t i = 0; i < sample.size; i++) { + event = &sample.events[i]; + prob = distr(par, event); + + /* The gradient of log(F(α,β,γ)) is: + * 1/F(α,β,γ) ∇F(α,β,γ) + */ + grad_distr(par, event, term); + gsl_vector_scale(term, -1.0/prob); + gsl_vector_add(grad, term); + + // compute function + *fun += log(fabs(prob)); + } + + /* Rescale the function to avoid + * round-off errors during minimisation + */ + *fun = -*fun*1e-5; + gsl_vector_scale(grad, 1e-5); + + // free memory + gsl_vector_free(term); +} + + +/* `df_logL(par, &sample, grad)` + * gives the gradient of the negative log-likelihood + * for the sample `sample`. The result is stored in + * the `grad` gsl_vector. + */ +void df_logL(const gsl_vector *par, void *sample, gsl_vector *grad) { + double fun; + fdf_logL(par, sample, &fun, grad); +} + + +/* `hf_logL(par, &sample, hess)` + * gives the hessian matrix of the negative log-likelihood + * for the sample `sample`. The result is stored in + * the `hess` gsl_matrix. + */ +gsl_matrix* hf_logL(const gsl_vector *par, void *sample_) { + struct sample sample = *((struct sample*) sample_); + + gsl_vector *grad = gsl_vector_calloc(3); + gsl_matrix *term = gsl_matrix_calloc(3, 3); + gsl_matrix *hess = gsl_matrix_calloc(3, 3); + + double prob; + struct event *event; + for (size_t n = 0; n < sample.size; n++) { + /* Compute gradient and value of F */ + event = &sample.events[n]; + prob = distr(par, event); + grad_distr(par, event, grad); + + /* Compute the hessian matrix of -log(F): + * H_ij = 1/F² ∇F_i ∇F_j + */ + for (size_t i = 0; i < 3; i++) + for (size_t j = 0; j < 3; j++) { + gsl_matrix_set( + term, i, j, + gsl_vector_get(grad, i) * gsl_vector_get(grad, j)); + } + gsl_matrix_scale(term, 1.0/pow(prob, 2)); + gsl_matrix_add(hess, term); + } + + // free memory + gsl_vector_free(grad); + gsl_matrix_free(term); + + return hess; +} + + +/* Maximum likelihood estimation of the parameters + * α,β,γ. + */ +min_result maxLikelihood(struct sample *sample) { + /* Starting point: α,β,γ = "something close + * to the solution", because the minimisation + * seems pretty unstable. + */ + gsl_vector *par = gsl_vector_alloc(3); + gsl_vector_set(par, 0, 0.79); + gsl_vector_set(par, 1, 0.02); + gsl_vector_set(par, 2, -0.17); + + /* Initialise the minimisation */ + gsl_multimin_function_fdf likelihood; + likelihood.n = 3; + likelihood.f = f_logL; + likelihood.df = df_logL; + likelihood.fdf = fdf_logL; + likelihood.params = sample; + + /* The minimisation technique used + * is the conjugate gradient method. */ + const gsl_multimin_fdfminimizer_type *type = + gsl_multimin_fdfminimizer_conjugate_fr; + + gsl_multimin_fdfminimizer *m = + gsl_multimin_fdfminimizer_alloc(type, 3); + + gsl_multimin_fdfminimizer_set( + m, // minimisation technique + &likelihood, // function to minimise + par, // initial parameters/starting point + 1e-5, // first step length + 0.1); // accuracy + + size_t iter; + int status = GSL_CONTINUE; + + /* Iterate the minimisation until the gradient + * is smaller that 10^-4 or fail. + */ + fputs("\n# maximum likelihood\n\n", stderr); + for (iter = 0; status == GSL_CONTINUE && iter < 100; iter++) { + status = gsl_multimin_fdfminimizer_iterate(m); + + if (status) + break; + + status = gsl_multimin_test_gradient(m->gradient, 1e-5); + + /* Print the iteration, current parameters + * and gradient of the log-likelihood. + */ + if (iter % 5 == 0 || status == GSL_SUCCESS) { + fprintf(stderr, "%2ld\t", iter); + vector_fprint(stderr, m->x); + fputs("\t", stderr); + vector_fprint(stderr, m->gradient); + putc('\n', stderr); + } + } + fprintf(stderr, "status: %s\n", gsl_strerror(status)); + fprintf(stderr, "iterations: %ld\n", iter); + + /* Store results in the min_result type. + * Note: We allocate a new vector for the + * parameters because `m->x` will be freed + * along with m. + */ + min_result res; + res.par = gsl_vector_alloc(3); + res.err = gsl_vector_alloc(3); + res.cov = gsl_matrix_alloc(3, 3); + gsl_vector_memcpy(res.par, m->x); + + /* Compute the standard errors of the estimates. + * The Cramér–Rao bound states that the covariance + * matrix of the estimates is + * + * Σ ≥ I⁻¹ = (-H)⁻¹ + * + * where: + * - I is called the Fisher information, + * - H is the hessian of log(L) at + * the maximum likelihood. + */ + res.cov = hf_logL(m->x, sample); + + /* Invert H by Cholesky decomposition: + * + * H = LL^T ⇒ H⁻¹ = L^T⁻¹L⁻¹ + * + * Note: H is positive defined (because -logL is + * minimised) and symmetric so this is valid. + * + */ + gsl_linalg_cholesky_decomp(res.cov); + gsl_linalg_cholesky_invert(res.cov); + + /* Compute the standard errors + * from the covariance matrix. + */ + gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov); + gsl_vector_memcpy(res.err, &diagonal.vector); + vector_map(&sqrt, res.err); + + fputs("\n## results\n", stderr); + + fputs("\n* parameters:\n ", stderr); + vector_fprint(stderr, res.par); + + fputs("\n* covariance (Cramér–Rao lower bound):\n", stderr); + matrix_fprint(stderr, res.cov); + + // free memory + gsl_multimin_fdfminimizer_free(m); + + return res; +} diff --git a/ex-3/likelihood.h b/ex-3/likelihood.h new file mode 100644 index 0000000..7330ace --- /dev/null +++ b/ex-3/likelihood.h @@ -0,0 +1,6 @@ +#include "common.h" + +/* Maximum likelihood estimation of the parameters + * α,β,γ. + */ +min_result maxLikelihood(struct sample *sample); diff --git a/ex-3/main.c b/ex-3/main.c new file mode 100644 index 0000000..3d9c981 --- /dev/null +++ b/ex-3/main.c @@ -0,0 +1,158 @@ +#include +#include +#include "common.h" +#include "likelihood.h" +#include "chisquared.h" +#include +#include + +/* Options for the program */ +struct options { + int show_hist; // whether to print the 2D histogram + int only_hist; // whether to skip the minimisations + size_t num_events; // number of generated events + size_t bins_theta; // number of bins for θ + size_t bins_phi; // number of bins for φ +}; + + +int main(int argc, char **argv) { + /* Set default options */ + struct options opts; + opts.num_events = 50000; + opts.show_hist = 0; + opts.only_hist = 0; + opts.bins_theta = 30; + opts.bins_phi = 60; + + /* Process CLI arguments */ + for (size_t i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-i")) opts.show_hist = 1; + if (!strcmp(argv[i], "-I")) opts.only_hist = 1; + else if (!strcmp(argv[i], "-n")) opts.num_events = atol(argv[++i]); + else if (!strcmp(argv[i], "-t")) opts.bins_theta = atol(argv[++i]); + else if (!strcmp(argv[i], "-p")) opts.bins_phi = atol(argv[++i]); + else { + fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]); + fprintf(stderr, "\t-h\tShow this message.\n"); + fprintf(stderr, "\t-i\tPrint the histogram to stdout.\n"); + fprintf(stderr, "\t-I\tPrint *only* the histogram.\n"); + fprintf(stderr, "\t-n N\tThe number of events to generate.\n"); + fprintf(stderr, "\t-t N\tThe number of θ bins.\n"); + fprintf(stderr, "\t-p N\tThe number of φ bins.\n"); + return EXIT_FAILURE; + } + } + + /* Initialize an RNG. */ + gsl_rng_env_setup(); + gsl_rng *r = gsl_rng_alloc(gsl_rng_default); + + /* Initialise the sample data structure with + * the number of events and allocate an array + * for the events. + */ + struct sample s = { opts.num_events, NULL }; + s.events = calloc(s.size, sizeof(struct event)); + + /* Prepare a 2D histogram of the sample + * with 30x60 bins. + */ + gsl_histogram2d* hist = + gsl_histogram2d_alloc(opts.bins_theta, opts.bins_phi); + // set the ranges θ∈[0, π] and φ∈[0, 2π] + gsl_histogram2d_set_ranges_uniform(hist, 0,M_PI, 0,2*M_PI); + + /* F(θ.φ) parameters we will generate + * a sample with, and later try to recover. + * Note: for this choice max F(θ,φ) ≈ 0.179. + */ + gsl_vector *def_par = gsl_vector_alloc(3); + gsl_vector_set(def_par, 0, 0.65); // α + gsl_vector_set(def_par, 1, 0.06); // β + gsl_vector_set(def_par, 2, -0.18); // γ + + /* Generate the points (θ, φ) on the unit sphere + * using the inverse transform: + * θ = acos(1 - 2X) + * φ = 2πY + * where X,Y are random uniform variables in [0,1). + * This ensures that θ,φ are uniformly distributed, then we + * proceed to reject the events as follow: + * 1. generate a y ∈ [0, max F(θ,φ)] uniformly + * 2. reject the sample if y > F(θ,φ) else save it. + */ + fputs("# event sampling\n\n", stderr); + fprintf(stderr, "generating %ld events...", opts.num_events); + struct event *e; + for (size_t i = 0; i < s.size; i++){ + e = &s.events[i]; + do { + e->th = acos(1 - 2*gsl_rng_uniform(r)); + e->ph = 2 * M_PI * gsl_rng_uniform(r); + } while(0.2 * gsl_rng_uniform(r) > distr(def_par, e)); + + // update the histogram + gsl_histogram2d_increment(hist, e->th, e->ph); + } + fputs("done\n", stderr); + + // print histogram to stdout + if (opts.show_hist || opts.only_hist) { + fputs("\n# histogram\n", stderr); + gsl_histogram2d_fprintf(stdout, hist, "%g", "%g"); + } + + if (!opts.only_hist) { + /* Estimate the parameters α,β,γ by + * the maximum likelihood method. + */ + min_result like = maxLikelihood(&s); + + /* Estimate the parameters α,β,γ by + * a minimum χ² method. + */ + struct hist data; + data.h = hist; + data.n = s.size; + min_result chi2 = minChiSquared(data); + + /* Compare the results with the original ones + * and check whether they are compatible. + */ + fputs("\n# vector decay compatibility\n", stderr); + + fputs("\n## likelihood results\n\n", stderr); + compatibility(def_par, like); + + fputs("\n## χ² results\n\n", stderr); + compatibility(def_par, chi2); + + /* Test the isotropic decay hypothesys + * with the χ² results + */ + fputs("\n# isotropic hypotesys\n\n", stderr); + + /* If the decay is isotropic then F = 1/4π. + * Solving for α,β,γ yields α=1/3,β=γ=0. + */ + gsl_vector *iso_par = gsl_vector_alloc(3); + gsl_vector_set(iso_par, 0, 0.33); // α + gsl_vector_set(iso_par, 1, 0.00); // β + gsl_vector_set(iso_par, 2, 0.00); // γ + + compatibility(iso_par, chi2); + + // free memory + min_result_free(like); + min_result_free(chi2); + }; + + // free memory + gsl_rng_free(r); + free(s.events); + gsl_histogram2d_free(hist); + gsl_vector_free(def_par); + + return EXIT_SUCCESS; +} diff --git a/ex-3/plot.py b/ex-3/plot.py new file mode 100755 index 0000000..9ae01c4 --- /dev/null +++ b/ex-3/plot.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +from pylab import * +from mpl_toolkits.mplot3d import Axes3D +from matplotlib import cm +import sys + +xedges, yedges, counts = loadtxt(sys.stdin, unpack=True, usecols=[0,2,4]) +bins = (30, 60) +counts = counts.reshape(bins) + +plt.rcParams['font.size'] = 15 +suptitle('Angular decay distribution') +# subplot2grid((1, 3), (0, 0), colspan=2, aspect='equal') +xlabel(r'$\phi$ (radians)') +ylabel(r'$\theta$ (radians)') +pcolor(yedges[:bins[1]], xedges[::bins[1]], counts) +norm = colorbar(fraction=0.023, pad=0.04).norm + +show() + +# Ax = subplot2grid((1, 3), (0, 2), projection='3d') +# Θ, φ = mgrid[0:pi:bins[0]*1j, 0:2*pi:bins[1]*1j] +# X = 5 * sin(θ) * cos(φ) +# Y = 5 * sin(θ) * sin(φ) +# Z = 5 * cos(θ) +# Ax.plot_surface( +# x, y, z, rstride=1, cstride=1, +# facecolors=cm.viridis(norm(counts))) +# Axis('off') +# show() diff --git a/ex-3/results.md b/ex-3/results.md new file mode 100644 index 0000000..0c76533 --- /dev/null +++ b/ex-3/results.md @@ -0,0 +1,85 @@ +# event sampling + +generating 50000 events...done + +# maximum likelihood + + 0 [ 0.79, 0.02, -0.17] + [ 0.18, -0.06812, 0.001955] + + 5 [ 0.7894, 0.02022, -0.17] + [ 0.1789, -0.06748, 0.002162] + +10 [ 0.7709, 0.02725, -0.1702] + [ 0.1465, -0.04898, 0.007551] + +15 [ 0.6525, 0.06317, -0.1725] + [ -0.002576, -0.002313, 0.0161] + +20 [ 0.654, 0.06135, -0.1763] + [-2.098e-06, 0.0001837, 0.0001225] + +24 [ 0.6541, 0.06125, -0.1763] + [ -3.24e-06, -7.905e-07, -1.432e-06] + +status: success +iterations: 25 + +## results + +* parameters: + [ 0, -0.5106, 1.089] + +* covariance (Cramér–Rao lower bound): + [ 6.604e-06, -3.474e-11, -1.798e-11] + [-3.474e-11, 1.227e-05, -1.267e-10] + [-1.798e-11, -1.267e-10, 9.564e-05] + +# least χ² + + 0 [ 0.5, 0.01, -0.1] + cond(J)=inf, χ²/d=3.32 + + 4 [ 0.6537, 0.05976, -0.1736] + cond(J)=2.725, χ²/d=1.017 + +status: success +iterations: 7 + +## results + +* χ²/d: 1.017 + +* parameters: + [ 0.6537, 0.05972, -0.1736] + +* covariance: + [ 0.00304, -1.66e-06, 2.383e-07] + [ -1.66e-06, 0.00212, 1.505e-06] + [ 2.383e-07, 1.505e-06, 0.001637] + +# vector decay compatibility + +## likelihood results + +| par | σ | α-value | +|-----|----------|----------| +| α | 6.6e-06 | 0 | +| β | 1.2e-05 | 0 | +| γ | 9.6e-05 | 0 | + +## χ² results + +| par | σ | α-value | +|-----|----------|----------| +| α | 0.003 | 0.22 | +| β | 0.0021 | 0.89 | +| γ | 0.0016 | 0.0001 | + +# isotropic hypotesys + +| par | σ | α-value | +|-----|----------|----------| +| α | 0.003 | 0 | +| β | 0.0021 | 0 | +| γ | 0.0016 | 0 | diff --git a/ex-4/dip.c b/ex-4/dip.c new file mode 100644 index 0000000..778ea9e --- /dev/null +++ b/ex-4/dip.c @@ -0,0 +1,117 @@ +#include +#include +#include +#include + +int main(int argc, char **argv) + { + + // Set default options. + // + size_t N = 50000; // number of events. + size_t n = 50; // number of bins. + double p_max = 10; // maximum value of momentum module. + + // Process CLI arguments. + // + for (size_t i = 1; i < argc; i++) + { + if (!strcmp(argv[i], "-N")) N = atol(argv[++i]); + else if (!strcmp(argv[i], "-n")) n = atol(argv[++i]); + else if (!strcmp(argv[i], "-p")) p_max = atof(argv[++i]); + else + { + fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]); + fprintf(stderr, "\t-h\tShow this message.\n"); + fprintf(stderr, "\t-N integer\tThe number of events to generate.\n"); + fprintf(stderr, "\t-n integer\tThe number of bins of the histogram.\n"); + fprintf(stderr, "\t-p float\tThe maximum value of momentum.\n"); + return EXIT_FAILURE; + } + } + + // Initialize an RNG. + // + gsl_rng_env_setup(); + gsl_rng *r = gsl_rng_alloc(gsl_rng_default); + + // Generate the angle θ uniformly distributed on a sphere using the + // inverse transform: + // + // θ = acos(1 - 2X) + // + // where X is a random uniform variable in [0,1), and the module p of + // the vector: + // + // p² = p_v² + p_h² + // + // uniformly distributed between 0 and p_max. The two components are + // then computed as: + // + // p_v = p⋅cos(θ) + // p_h = p⋅sin(θ) + // + // The histogram will be updated this way. + // The j-th bin where p_h goes in is given by: + // + // step = p_max / n + // j = floor(p_h / step) + // + // Thus an histogram was created and a structure containing the number of + // entries in each bin and the sum of |p_v| in each of them is created and + // filled while generating the events. + // + struct bin + { + size_t amo; // Amount of events in the bin. + double sum; // Sum of |p_v|s of all the events in the bin. + }; + struct bin *histo = calloc(n, sizeof(struct bin)); + + // Some useful variables. + // + double step = p_max / n; + struct bin *b; + double theta; + double p; + double p_v; + double p_h; + size_t j; + + for (size_t i = 0; i < N; i++) + { + // Generate the event. + // + theta = acos(1 - 2*gsl_rng_uniform(r)); + p = p_max * gsl_rng_uniform(r); + + // Compute the components. + // + p_v = p * cos(theta); + p_h = p * sin(theta); + + // Update the histogram. + // + j = floor(p_h / step); + b = &histo[j]; + b->amo++; + b->sum += fabs(p_v); + } + + // Compute the mean value of each bin and print it to stodut + // together with other useful things to make the histogram. + // + printf("bins: \t%ld\n", n); + printf("step: \t%.5f\n", step); + for (size_t i = 0; i < n; i++) + { + histo[i].sum = histo[i].sum / histo[i].amo; + printf("\n%.5f", histo[i].sum); + }; + + // free memory + gsl_rng_free(r); + free(histo); + + return EXIT_SUCCESS; + } diff --git a/ex-4/plot.py b/ex-4/plot.py new file mode 100755 index 0000000..9f9ecea --- /dev/null +++ b/ex-4/plot.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +import matplotlib.pyplot as plt +import numpy as np +import sys + + +bins = input() +step = input() +bins = int(bins.split("\t")[1]) +step = float(step.split("\t")[1]) +counts = np.loadtxt(sys.stdin) +edges = np.linspace(0, bins*step, bins+1) + +plt.rcParams['font.size'] = 17 + +plt.hist(edges[:-1], edges, weights=counts, + color='#eacf00', edgecolor='#3d3d3c') +plt.title('Vertical component distribution', loc='right') +plt.xlabel(r'$p_h$') +plt.ylabel(r'$\left<|p_v|\right>$') + +plt.show() diff --git a/ex-5/casino.c b/ex-5/casino.c new file mode 100644 index 0000000..961c494 --- /dev/null +++ b/ex-5/casino.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +int main(int argc, char** argv){ + size_t size = atoi(argv[1]); // size of the generated sample + + // Initialize an RNG. + gsl_rng_env_setup(); + gsl_rng *r = gsl_rng_alloc(gsl_rng_default); + + // Random points (x, y) generation: x in [0,1), y in [0,3) + // get if y < exp(x) + // leave if y > exp(x) + size_t in = 0; + size_t total = 0; + do{ + double x = gsl_rng_uniform(r); + double y = 3 * gsl_rng_uniform(r); + if (y <= exp(x)){ + in++; + } + total++; + } while (total < size); + + + // The integral is esitmated thorugh the proportion: + // integral : 3 = in : total + double integral = 3.0*(double)in/(double)total; + printf("∫exp(x)dx in [0,1]:\t%.07f\n", integral); + + // Free memory + gsl_rng_free(r); + return EXIT_SUCCESS; +} + diff --git a/ex-5/manual.c b/ex-5/manual.c new file mode 100644 index 0000000..16ba2f9 --- /dev/null +++ b/ex-5/manual.c @@ -0,0 +1,31 @@ +#include +#include +#include + +/* This program evaluates the integral of exp(x) in [0;1] through the + * trapezoidal rule. + * The integration step is given in input as d in step = 1e-d. + * + */ + +int main(int argc, char** argv){ + double left = 0; // left limit + double point = left; // points at the current position + double right = 1; // right limit + + double step = pow(10, -atoi(argv[1])); // step of integration + + double sum = 0; + + // Integral evaluation: stops when point goes over the right edge of the + // range. + do { + sum += step * 0.5 * (exp(point) + exp(point + step)); + point += step; + } while (point < right); + + // Print out the result + printf("∫exp(x)dx in [0,1]:\t%.7f\n", sum); + + return EXIT_SUCCESS; +} diff --git a/ex-5/trifecta.c b/ex-5/trifecta.c new file mode 100644 index 0000000..5cf3822 --- /dev/null +++ b/ex-5/trifecta.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + + +// Exact value of the integral. +double exact = 1.71828182845905; + + +// Function which must be passed to the gsl_function. +double function (double * x, size_t dim, void * params){ + // Avoid warning when no parameters are passed to the function. + (void)(params); + return exp(x[0]); +} + + +// Function which prints out the results when called. +void results (char *title, double result, double error) +{ + printf ("---------------------------\n%s\n", title); + printf ("result: % .10f\n", result); + printf ("sigma: % .10f\n", error); + printf ("exact: % .10f\n", exact); +} + + +int main(){ + // Some useful variables. + double integral, error; + size_t calls = 50000000; + size_t dims = 1; + double lower[1] = {0}; // Integration lower limit + double upper[1] = {1}; // Integration upper limit + + // Initialize an RNG. + gsl_rng_env_setup(); + gsl_rng *r = gsl_rng_alloc(gsl_rng_default); + + // Define GSL function. + gsl_monte_function expo = {&function, dims, NULL}; + expo.f = &function; + expo.dim = 1; + expo.params = NULL; + + { + gsl_monte_vegas_state *s = gsl_monte_vegas_alloc (dims); + gsl_monte_vegas_integrate (&expo, lower, upper, dims, calls, + r, s, &integral, &error); + do { gsl_monte_vegas_integrate (&expo, lower, upper, dims, + calls/5, r, s, &integral, &error); + } while (fabs (gsl_monte_vegas_chisq (s) - 1.0) > 0.5); + + // Free memory. + gsl_monte_vegas_free(s); + + // Display results + results("Vegas", integral, error); + } + + { + gsl_monte_miser_state *s = gsl_monte_miser_alloc (dims); + gsl_monte_miser_integrate (&expo, lower, upper, dims, calls, + r, s, &integral, &error); + // Free memory. + gsl_monte_miser_free (s); + + // Display results + results ("Miser", integral, error); + } + + { + gsl_monte_plain_state *s = gsl_monte_plain_alloc (dims); + gsl_monte_plain_integrate (&expo, lower, upper, dims, calls, + r, s, &integral, &error); + // Display results + results ("Montecarlo", integral, error); + + // Free memory. + gsl_monte_plain_free (s); + } + + // Free memory. + gsl_rng_free(r); + + return EXIT_SUCCESS; +} diff --git a/ex-6/fft.c b/ex-6/fft.c new file mode 100644 index 0000000..8a5de4b --- /dev/null +++ b/ex-6/fft.c @@ -0,0 +1,196 @@ +#include "fft.h" +#include +#include + +/* These macros are used to extract the + * real and imaginary parts of a complex + * vector */ +#define REAL(a,stride,i) ((a)[2*(stride)*(i)]) +#define IMAG(a,stride,i) ((a)[2*(stride)*(i)+1]) + + +/* Computes the (forward) real DFT of a vector + * and returns the results as a complex vector + */ +gsl_vector_complex* vector_fft(gsl_vector *data) { + /* Create a copy of the input data to + * preserve it: GSL functions work in-place. + */ + gsl_vector *fdata = gsl_vector_alloc(data->size); + gsl_vector_memcpy(fdata, data); + + /* Compute the forward Fourier transform */ + gsl_fft_real_wavetable *rtable = + gsl_fft_real_wavetable_alloc(fdata->size); + gsl_fft_real_workspace *wspace = + gsl_fft_real_workspace_alloc(fdata->size); + + gsl_fft_real_transform( + fdata->data, // array of data to transform + fdata->stride, // stride of the array, i.e. # steps between elements + fdata->size, // number of elements + rtable, // wavetable (real) + wspace); // workspace + + /* The data in `data` is in the half-complex + * packed format, which reduces memory usage + * by using the fact that + * + * z_i = z̅_(n-i) + * + * We unpack it to the normal packing and use + * the array to define a gsl_vector_complex. + */ + gsl_vector_complex *cdata = gsl_vector_complex_calloc(fdata->size); + gsl_fft_halfcomplex_unpack( + fdata->data, + cdata->data, + cdata->stride, + fdata->size); + + // clear memory + gsl_fft_real_wavetable_free(rtable); + gsl_fft_real_workspace_free(wspace); + gsl_vector_free(fdata); + + return cdata; +} + + +/* Inverse function of gsl_fft_halfcomplex_unpack. + * `gsl_fft_halfcomplex_pack(c, hc, stride, n)` + * creates a halfcomplex packed version of a + * complex packaged array `hc` with stride `stride` + * and `n` elements. + */ +int gsl_fft_halfcomplex_pack( + const gsl_complex_packed_array c, + double hc[], + const size_t stride, const size_t n) { + + hc[0] = REAL(c, stride, 0); + + size_t i; + for (i = 1; i < n - i; i++) { + hc[(2 * i - 1) * stride] = REAL(c, stride, i); + hc[2 * i * stride] = IMAG(c, stride, i); + } + + if (i == n - i) + hc[(n - 1) * stride] = REAL(c, stride, i); + + return 0; +} + + +/* `fft_deconvolve(data, kernel)` tries to deconvolve + * `kernel` from `data` by factoring out the `kernel` + * in the Fourier space. + */ +gsl_histogram* fft_deconvolve( + gsl_histogram *data, + gsl_histogram *kernel) { + /* Size of the original data + * before being convoluted. + * + * Notation of sizes: + * - original: m + * - kernel: n + * - "full" convolution: m + n - 1 + */ + size_t orig_size = data->n - kernel->n + 1; + + /* Create vector views */ + gsl_vector vdata = + gsl_vector_view_array(data->bin, data->n).vector; + gsl_vector vkernel = + gsl_vector_view_array(kernel->bin, kernel->n).vector; + + /* 0-pad the kernel to make it the + * same size as the convoluted data, + * which is m+n-1: + * + * 1. create a new vector + * 2. create a subvector view to the center + * 3. copy the kernel to the view + */ + gsl_vector *vpadded = gsl_vector_calloc(data->n); + gsl_vector center = gsl_vector_subvector( + vpadded, // vector: padded kernel (n+m-1) + orig_size/2, // offset: half of original data size (m/2) + kernel->n).vector; // length: kernel size (n) + gsl_vector_memcpy(¢er, &vkernel); + + /* Compute the DFT of the data and + * divide it by the DFT of the kernel. + */ + gsl_vector_complex *fdata = vector_fft(&vdata); + gsl_vector_complex *fkernel = vector_fft(vpadded); + gsl_vector_complex_div(fdata, fkernel); + + /* Pack the result in the half-complex + * format before computing the inverse DFT + */ + double *res = calloc(fdata->size, sizeof(double)); + gsl_fft_halfcomplex_pack(fdata->data, res, + fdata->stride, fdata->size); + + /* Compute the inverse DFT of the result */ + gsl_fft_halfcomplex_wavetable *htable = + gsl_fft_halfcomplex_wavetable_alloc(data->n); + gsl_fft_real_workspace *wspace = + gsl_fft_real_workspace_alloc(data->n); + gsl_fft_halfcomplex_inverse( + res, // array of data to transform + fdata->stride, // stride of the array + fdata->size, // number of elements + htable, // wavetable (complex) + wspace); // workspace + + /* Restore the natural order of frequency + * in the result of the DFT (negative→positive). + * To do that roll over the array by a half: + * 1. create a temp array of half size + * 2. copy first half over to temp + * 3. copy second half to the first + * 4. copy back the first to the second half + */ + size_t half_size = data->n/2 * sizeof(double); + double *temp = malloc(half_size); + memcpy(temp, res, half_size); + memcpy(res, res + data->n/2, half_size); + memcpy(res + data->n/2, temp, half_size); + + /* Create a histogram with the same edges + * as `data`, but with the original size, + * to return the cleaned result + */ + gsl_histogram *hist = gsl_histogram_calloc(orig_size); + + /* Set the same bin edges as `data`*/ + double max = gsl_histogram_max(data); + double min = gsl_histogram_min(data); + gsl_histogram_set_ranges_uniform(hist, min, max); + + /* Remove the extra size of the convolution + * (where the overlap is partial) when copying + * over the result to the histogram. + * This amounts to (n-1)/2 on each end of the + * vector. + */ + memcpy( + hist->bin, + res + (kernel->n - 1)/2, + orig_size * sizeof(double)); + + // free memory + gsl_vector_free(vpadded); + gsl_vector_complex_free(fdata); + gsl_vector_complex_free(fkernel); + gsl_fft_halfcomplex_wavetable_free(htable); + gsl_fft_real_workspace_free(wspace); + free(res); + free(temp); + + return hist; +} diff --git a/ex-6/fft.h b/ex-6/fft.h new file mode 100644 index 0000000..818ca66 --- /dev/null +++ b/ex-6/fft.h @@ -0,0 +1,15 @@ +#include +#include +#include + +/* Computes the (forward) real DFT of a vector + * and returns the results as a complex vector + */ +gsl_vector_complex* vector_fft(gsl_vector *data); + + +/* `fft_deconvolve(data, noise)` tries to deconvolve + * `noise` from `data` by factoring out the `noise` + * in the Fourier space. + */ +gsl_histogram* fft_deconvolve(gsl_histogram *data, gsl_histogram *noise); diff --git a/ex-6/main.c b/ex-6/main.c new file mode 100644 index 0000000..6005f52 --- /dev/null +++ b/ex-6/main.c @@ -0,0 +1,232 @@ +#include +#include +#include +#include +#include +#include "fft.h" +#include "rl.h" + +/* Program options */ +struct options { + int convolved; + int deconvolved; + int original; + size_t num_events; + size_t bins; + double sigma; + size_t rounds; + const char* mode; + double noise; +}; + + +/* Intensity function parameters */ +struct param { + double a; // aperture radius + double k; // wavenumber + double e; // wave amplitude + double l; // screen distance +}; + + +/* Intensity of the EM field at a large distance + * from a circular aperture diffraction of a plane + * wave. + * + * The intensity is given as a function of θ, + * the diffraction angle, all the other physical + * parameter are controlled by the structure `param`. + * + * See Fraunhöfer diffraction on "Hect - Optics" for + * a derivation of the formula. + */ +double intensity(double theta, struct param p) { + double x = p.k * p.a * sin(theta); + double R = p.l / cos(theta); + + /* The intensity function has an eliminable + * discontinuoty at the origin, which must + * be handled separately. + */ + if (fabs(x) < 1e-15) { + return 0.5 * pow(p.e/R * M_PI * pow(p.a, 2), 2); + } + + double y = 2*M_PI*pow(p.a, 2) * gsl_sf_bessel_J1(x) / x; + return 0.5 * pow(p.e * y/R, 2); +} + + +/* `gaussian_for(hist, sigma)` generates a histogram + * with the same bin width of `hist` but smaller, + * with a gaussian PDF with μ at the central bin + * and σ equal to `sigma`. + */ +gsl_histogram* gaussian_for(gsl_histogram *hist, double sigma) { + /* Set the size to 6% of the histogram. + * Always choose n even to correctly center the + * gaussian in the middle. + */ + size_t n = ((double) hist->n * 6) / 100; + n = n % 2 ? n+1 : n; + + /* Calculate the (single) bin width assuming + * the ranges are uniformely spaced + */ + double min, max; + gsl_histogram_get_range(hist, 0, &min, &max); + double dx = max - min; + gsl_histogram *res = gsl_histogram_alloc(n); + gsl_histogram_set_ranges_uniform(res, 0, dx * n); + + /* The histogram will be such that + * the zero falls in the central bin. + */ + long int offset = res->n/2; + + for (long int i = 0; i < (long int)res->n; i++) + res->bin[i] = gsl_ran_gaussian_pdf( + ((double)(i - offset) + 0.5) * dx, sigma * dx); + + return res; +} + + +/* Convolves two histograms while keeping the + * original bin edges. This is a simple wrapper + * function around `gsl_vector_convolve`. + */ +gsl_histogram* histogram_convolve(gsl_histogram *a, gsl_histogram *b) { + gsl_histogram *res = gsl_histogram_calloc(a->n + b->n - 1); + + /* Set the same edges as `a`*/ + double max = gsl_histogram_max(a); + double min = gsl_histogram_min(a); + gsl_histogram_set_ranges_uniform(res, min, max); + + /* Create vector views for everybody */ + gsl_vector_view va = gsl_vector_view_array(a->bin, a->n); + gsl_vector_view vb = gsl_vector_view_array(b->bin, b->n); + gsl_vector_view vres = gsl_vector_view_array(res->bin, res->n); + + gsl_vector_convolve(&va.vector, &vb.vector, &vres.vector); + + return res; +} + + +int main(int argc, char **argv) { + struct options opts; + /* Set default options */ + opts.convolved = 0; + opts.deconvolved = 0; + opts.original = 1; + opts.num_events = 50000; + opts.bins = 150; + opts.sigma = 0.8; + opts.rounds = 3; + opts.mode = "fft"; + opts.noise = 0; + + /* Process CLI arguments */ + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-c")) opts.convolved = 1; + else if (!strcmp(argv[i], "-d")) opts.deconvolved = 1; + else if (!strcmp(argv[i], "-o")) opts.original = 1; + else if (!strcmp(argv[i], "-e")) opts.num_events = atol(argv[++i]); + else if (!strcmp(argv[i], "-b")) opts.bins = atol(argv[++i]); + else if (!strcmp(argv[i], "-s")) opts.sigma = atof(argv[++i]); + else if (!strcmp(argv[i], "-r")) opts.rounds = atol(argv[++i]); + else if (!strcmp(argv[i], "-m")) opts.mode = argv[++i]; + else if (!strcmp(argv[i], "-n")) opts.noise = atof(argv[++i]); + else { + fprintf(stderr, "Usage: %s -[cdoebsh]\n", argv[0]); + fprintf(stderr, "\t-h\t\tShow this message.\n"); + fprintf(stderr, "\t-c\t\tPrint the convolved histogram to stdout.\n"); + fprintf(stderr, "\t-d\t\tAttempt and print the deconvolved histogram.\n"); + fprintf(stderr, "\t-o\t\tPrint the original histogram to stdout.\n"); + fprintf(stderr, "\t-e N\t\tThe number of events.\n"); + fprintf(stderr, "\t-b N\t\tThe number of θ bins.\n"); + fprintf(stderr, "\t-s SIGMA\tThe sigma of gaussian kernel.\n"); + fprintf(stderr, "\t-r N\t\tThe number of RL deconvolution rounds.\n"); + fprintf(stderr, "\t-n MU\t\tThe mean (μ) of Poisson noise to add to the convolution.\n"); + return EXIT_FAILURE; + } + } + + /* Initialize an RNG. */ + gsl_rng_env_setup(); + gsl_rng *r = gsl_rng_alloc(gsl_rng_default); + + gsl_histogram* hist = gsl_histogram_alloc(opts.bins); + gsl_histogram_set_ranges_uniform(hist, 0, M_PI/2); + + double theta; + struct param p = { 0.01, 0.0001, 1e4, 1 }; + + /* Sample events following the intensity + * of a circular aperture diffraction I(θ) + * while producing a histogram. + */ + fputs("# event sampling\n", stderr); + fprintf(stderr, "max itensity: %.2f\n", intensity(0, p)); + fprintf(stderr, "1. generating %ld events...", opts.num_events); + for (size_t i = 0; i < opts.num_events; i++){ + do { + theta = acos(1 - gsl_rng_uniform(r)); + } while(intensity(0, p) * gsl_rng_uniform(r) > intensity(theta, p)); + gsl_histogram_increment(hist, theta); + } + fputs("done\n", stderr); + + /* Generate the gaussian kernel, convolve the + * sample histogram with it and try to deconvolve it. + */ + fputs("\n# convolution\n", stderr); + fputs("1. generating gaussian kernel...", stderr); + gsl_histogram *kernel = gaussian_for(hist, opts.sigma); + fputs("done\n", stderr); + + fputs("2. convolving...", stderr); + gsl_histogram *conv = histogram_convolve(hist, kernel); + fputs("done\n", stderr); + + /* Add Poisson noise with μ=opts.noise to + * the convolution. + */ + if (opts.noise > 0) { + fputs("2.1 adding poisson noise...", stderr); + for (size_t i = 0; i < conv->n; i++) + conv->bin[i] += gsl_ran_poisson(r, opts.noise); + fputs("done\n", stderr); + } + + fprintf(stderr, "3. %s deconvolution...", opts.mode); + gsl_histogram *clean; + if (!strcmp(opts.mode, "fft")) + clean = fft_deconvolve(conv, kernel); + else if (!strcmp(opts.mode, "rl")) + clean = rl_deconvolve(conv, kernel, opts.rounds); + else { + fputs("\n\nerror: invalid mode. select either 'fft' or 'rl'\n", stderr); + return EXIT_FAILURE; + } + fputs("done\n", stderr); + + /* Print the selected histogram*/ + fputs("\n# histogram \n", stderr); + gsl_histogram *showing; + if (opts.convolved) showing = conv; + else if (opts.deconvolved) showing = clean; + else if (opts.original) showing = hist; + gsl_histogram_fprintf(stdout, showing, "%g", "%g"); + + // free memory + gsl_rng_free(r); + gsl_histogram_free(hist); + gsl_histogram_free(kernel); + gsl_histogram_free(conv); + gsl_histogram_free(clean); + + return EXIT_SUCCESS; +} diff --git a/ex-6/plot.py b/ex-6/plot.py new file mode 100755 index 0000000..eb57c84 --- /dev/null +++ b/ex-6/plot.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from pylab import * +import sys + +a, b, f = loadtxt(sys.stdin, unpack=True) +suptitle('Fraunhofer diffraction') +title(sys.argv[1] if len(sys.argv) > 1 else "", loc='right') +hist(a, np.insert(b, 0, a[0]), weights=f/sum(f), + color='#dbbf0d', edgecolor='#595856', linewidth=0.5) +show() diff --git a/ex-6/rl.c b/ex-6/rl.c new file mode 100644 index 0000000..72cdb56 --- /dev/null +++ b/ex-6/rl.c @@ -0,0 +1,152 @@ +#include "rl.h" +#include +#include + + +/* `gsl_vector_convolv(a, b, res)` computes the linear + * convolution of two vectors. The resulting vector + * has size `a->n + b->n - 1` and is stored in `res`. + */ +int gsl_vector_convolve(gsl_vector *a, gsl_vector *b, gsl_vector *res) { + size_t m = a->size; + size_t n = b->size; + + /* Vector views for the overlap dot product */ + gsl_vector_view va, vb; + + /* Reverse `b` before convolving */ + gsl_vector_reverse(b); + + double dot; + size_t start_a, start_b, many; + for (size_t i = 0; i < m + n - 1; i++) { + /* Calculate the starting indices + * of the two vectors a,b overlap. + */ + if (i < n) start_a = 0; + else start_a = i - (n - 1); + + if (i < n) start_b = (n - 1) - i; + else start_b = 0; + + /* Calculate how many elements overlap */ + if (i < n) many = i + 1; + else if (i >= m) many = n - (i - m) - 1; + else many = n; + + /* Take the dot product of the overlap + * and store it in the resulting histogram + */ + va = gsl_vector_subvector(a, start_a, many); + vb = gsl_vector_subvector(b, start_b, many); + gsl_blas_ddot(&va.vector, &vb.vector, &dot); + gsl_vector_set(res, i, dot); + } + + /* Put b back together */ + gsl_vector_reverse(b); + + return GSL_SUCCESS; +} + + +/* Performs the Richardson-Lucy deconvolution. + * In pseudo-python: + * + * def rl_deconvolve(data, kernel, rounds): + * est = np.full(data, 0.5) + * for _ in range(rounds): + * est_conv = convolve(est, kernel) + * est *= convolve(data / est_conv, reversed) + * + */ +gsl_histogram* rl_deconvolve( + gsl_histogram *data, + gsl_histogram *kernel, + size_t rounds) { + /* Size of the original data + * before being convoluted. + * + * Notation of sizes: + * - original: m + * - kernel: n + * - "full" convolution: m + n - 1 + */ + size_t orig_size = data->n - kernel->n + 1; + + /* Create a histogram with the same edges + * as `data`, but with the original size, + * to return the cleaned result + */ + gsl_histogram *hist = gsl_histogram_calloc(orig_size); + + /* Set the same bin edges as `data`*/ + double max = gsl_histogram_max(data); + double min = gsl_histogram_min(data); + gsl_histogram_set_ranges_uniform(hist, min, max); + + /* Vector views of the result, kernel + * and data. These are used to perform + * vectorised operations on histograms. + */ + gsl_vector est = + gsl_vector_view_array(hist->bin, hist->n).vector; + gsl_vector vkernel = + gsl_vector_view_array(kernel->bin, kernel->n).vector; + gsl_vector vdata = + gsl_vector_view_array(data->bin, data->n).vector; + gsl_vector center; + + /* Create a flipped copy of the kernel */ + gsl_vector *vkernel_flip = gsl_vector_alloc(kernel->n); + gsl_vector_memcpy(vkernel_flip, &vkernel); + gsl_vector_reverse(vkernel_flip); + + /* More vectors to store partial + * results + */ + gsl_vector* est_conv = gsl_vector_alloc(data->n); + gsl_vector* rel_blur = gsl_vector_alloc(data->n); + + /* The zero-order estimate is simply + * all elements at 0.5 */ + gsl_vector_set_all(&est, 0.5); + + for (size_t iter = 0; iter < rounds; iter++) { + /* The current estimated convolution is the + * current estimate of the data with + * the kernel */ + gsl_vector_convolve(&est, &vkernel, est_conv); + + /* Divide the data by the estimated + * convolution to calculate the "relative blur". + */ + gsl_vector_memcpy(rel_blur, &vdata); + gsl_vector_div(rel_blur, est_conv); + + /* Set NaNs to zero */ + for (size_t i = 0; i < rel_blur->size; i++) + if (isnan(gsl_vector_get(rel_blur, i))) { + fprintf(stderr, "gotcha: %ld!!\n", i); + double y = (i > 0)? gsl_vector_get(rel_blur, i - 1) : 1; + gsl_vector_set(rel_blur, i, y); + } + + /* Convolve the blur by the kernel + * and multiply the current estimate + * of the data by it. + */ + center = gsl_vector_subvector(rel_blur, (kernel->n-1)/2, orig_size).vector; + gsl_vector_convolve(¢er, vkernel_flip, est_conv); + center = gsl_vector_subvector(est_conv, (kernel->n-1)/2, orig_size).vector; + gsl_vector_mul(&est, ¢er); + } + + // free memory + gsl_vector_free(est_conv); + gsl_vector_free(rel_blur); + gsl_vector_free(vkernel_flip); + + return hist; + +} diff --git a/ex-6/rl.h b/ex-6/rl.h new file mode 100644 index 0000000..1f63ae4 --- /dev/null +++ b/ex-6/rl.h @@ -0,0 +1,20 @@ +#include +#include + +/* `rl_deconvolve(data, noise, rounds)` + * tries to deconvolve `noise` from + * `data` in `rounds` rounds. + * */ +gsl_histogram* rl_deconvolve( + gsl_histogram *data, + gsl_histogram *noise, + size_t rounds); + +/* `convolve(a, b)` computes the linear convolution + * of two histograms. The convolted histogram + * has length `a->n + b->n - 1`. + */ +int gsl_vector_convolve( + gsl_vector *a, + gsl_vector *b, + gsl_vector *res); diff --git a/ex-7/main.c b/ex-7/main.c new file mode 100644 index 0000000..34a6232 --- /dev/null +++ b/ex-7/main.c @@ -0,0 +1,433 @@ +#include +#include +#include +#include +#include +#include + +/* Parameters for bivariate + * gaussian PDF + */ +struct par { + double x0; // x mean + double y0; // y mean + double sigma_x; // x standard dev + double sigma_y; // y standard dev + double rho; // correlation: cov(x,y)/σx⋅σy +}; + + +/* A sample of N 2D points is an + * N×2 matrix, with vectors as rows. + */ +typedef struct { + struct par p; + gsl_matrix *data; +} sample_t; + + +/* Create a sample of `n` points */ +sample_t* sample_t_alloc(size_t n, struct par p) { + sample_t *x = malloc(sizeof(sample_t)); + x->p = p; + x->data = gsl_matrix_alloc(n, 2); + return x; +} + +/* Delete a sample */ +void sample_t_free(sample_t *x) { + gsl_matrix_free(x->data); + free(x); +} + + +/* `generate_normal(r, n, p)` will create + * a sample of `n` points, distributed + * according to a bivariate gaussian distribution + * of parameters `p`. + */ +sample_t* generate_normal( + gsl_rng *r, size_t n, struct par *p) { + sample_t *s = sample_t_alloc(n, *p); + + for (size_t i = 0; i < n; i++) { + /* Generate a vector (x,y) with + * a standard (μ = 0) bivariate + * gaussian PDF. + */ + double *x = gsl_matrix_ptr(s->data, i, 0); + double *y = gsl_matrix_ptr(s->data, i, 1); + gsl_ran_bivariate_gaussian( + r, + p->sigma_x, p->sigma_y, p->rho, + x, y); + + /* Shift the vector to (x₀,y₀) */ + *x += p->x0; + *y += p->y0; + } + + return s; +} + + +/* Builds the covariance matrix Σ + * from the standard parameters (σ, ρ) + * of a bivariate gaussian. + */ +gsl_matrix* normal_cov(struct par *p) { + double var_x = pow(p->sigma_x, 2); + double var_y = pow(p->sigma_y, 2); + double cov_xy = p->rho * p->sigma_x * p->sigma_y; + + gsl_matrix *cov = gsl_matrix_alloc(2, 2); + gsl_matrix_set(cov, 0, 0, var_x); + gsl_matrix_set(cov, 1, 1, var_y); + gsl_matrix_set(cov, 0, 1, cov_xy); + gsl_matrix_set(cov, 1, 0, cov_xy); + + return cov; +} + + +/* Builds the mean vector of + * a bivariate gaussian. + */ +gsl_vector* normal_mean(struct par *p) { + gsl_vector *mu = gsl_vector_alloc(2); + gsl_vector_set(mu, 0, p->x0); + gsl_vector_set(mu, 1, p->y0); + + return mu; +} + + +/* `fisher_proj(c1, c2)` computes the optimal + * projection map, which maximises the separation + * between the two classes. + * The projection vector w is given by + * + * w = Sw⁻¹ (μ₂ - μ₁) + * + * where Sw = Σ₁ + Σ₂ is the so-called within-class + * covariance matrix. + */ +gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) { + /* Construct the covariances of each class... */ + gsl_matrix *cov1 = normal_cov(&c1->p); + gsl_matrix *cov2 = normal_cov(&c2->p); + + /* and the mean values */ + gsl_vector *mu1 = normal_mean(&c1->p); + gsl_vector *mu2 = normal_mean(&c2->p); + + /* Compute the inverse of the within-class + * covariance Sw⁻¹. + * Note: by definition Σ is symmetrical and + * positive-definite, so Cholesky is appropriate. + */ + gsl_matrix_add(cov1, cov2); + gsl_linalg_cholesky_decomp(cov1); + gsl_linalg_cholesky_invert(cov1); + + /* Compute the difference of the means. */ + gsl_vector *diff = gsl_vector_alloc(2); + gsl_vector_memcpy(diff, mu2); + gsl_vector_sub(diff, mu1); + + /* Finally multiply diff by Sw. + * This uses the rather low-level CBLAS + * functions gsl_blas_dgemv: + * + * ___ double ___ 1 ___ nothing + * / / / + * dgemv computes y := α op(A)x + βy + * \ \__matrix-vector \____ 0 + * \__ A is symmetric + */ + gsl_vector *w = gsl_vector_alloc(2); + gsl_blas_dgemv( + CblasNoTrans, // do nothing on A + 1, // α = 1 + cov1, // matrix A + diff, // vector x + 0, // β = 0 + w); // vector y + + // free memory + gsl_matrix_free(cov1); + gsl_matrix_free(cov2); + gsl_vector_free(mu1); + gsl_vector_free(mu2); + gsl_vector_free(diff); + + return w; +} + + +/* Computes the determinant from the + * Cholesky decomposition of matrix. + * In this case it's simply the product + * of the diagonal elements, squared. + */ +double gsl_linalg_cholesky_det(gsl_matrix *LL) { + gsl_vector diag = gsl_matrix_diagonal(LL).vector; + double det = 1; + for (size_t i = 0; i < LL->size1; i++) + det *= gsl_vector_get(&diag, i); + return det * det; +} + + +/* `fisher_cut(ratio, w, c1, c2)` computes + * the threshold (cut), on the line given by + * `w`, to discriminates the classes `c1`, `c2`; + * with `ratio` being the ratio of their prior + * probabilities. + * + * The cut is fixed by the condition of + * conditional probability being the + * same for each class: + * + * P(c₁|x) p(x|c₁)⋅p(c₁) + * ------- = --------------- = 1; + * P(c₂|x) p(x|c₁)⋅p(c₂) + * + * together with x = t⋅w. + * + * If p(x|c) is a bivariate normal PDF the + * solution is found to be: + * + * t = (b/a) + √((b/a)² - c/a); + * + * where + * + * 1. a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w) + * 2. b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂) + * 3. c = (μ₁, Σ₁⁻¹μ₁) - (μ₂, Σ₂⁻¹μ₂) + log|Σ₂|/log|Σ₁| - 2 log(α) + * 4. α = p(c₁)/p(c₂) + * + */ +double fisher_cut( + double ratio, + gsl_vector *w, + sample_t *c1, sample_t *c2) { + + /* Construct the covariances of each class... */ + gsl_matrix *cov1 = normal_cov(&c1->p); + gsl_matrix *cov2 = normal_cov(&c2->p); + + /* and the mean values */ + gsl_vector *mu1 = normal_mean(&c1->p); + gsl_vector *mu2 = normal_mean(&c2->p); + + /* Temporary vector/matrix for + * intermediate results. + */ + gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2); + gsl_vector *vtemp = gsl_vector_alloc(w->size); + + /* Invert Σ₁ and Σ₂ in-place: + * we only need the inverse matrices + * in the steps to follow. + */ + gsl_linalg_cholesky_decomp(cov1); + gsl_linalg_cholesky_decomp(cov2); + // store determinant for later + double det1 = gsl_linalg_cholesky_det(cov1); + double det2 = gsl_linalg_cholesky_det(cov2); + gsl_linalg_cholesky_invert(cov1); + gsl_linalg_cholesky_invert(cov2); + + /* Compute the first term: + * + * a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w) + * + */ + // mtemp = cov1 - cov2 + gsl_matrix_memcpy(mtemp, cov1); + gsl_matrix_sub(mtemp, cov2); + + // vtemp = mtemp ⋅ vtemp + gsl_vector_memcpy(vtemp, w); + gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp); + + // a = (w, vtemp) + double a; gsl_blas_ddot(w, vtemp, &a); + + /* Compute the second term: + * + * b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂) + * + */ + // vtemp = cov1 ⋅ mu1 + // vtemp = cov2 ⋅ mu2 - vtemp + gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp); + gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp); + + // b = (w, vtemp) + double b; gsl_blas_ddot(w, vtemp, &b); + + /* Compute the last term: + * + * c = log|Σ₂|/|Σ₁| + (μ₂, Σ₂⁻¹μ₂) - (μ₁, Σ₁⁻¹μ₁) + * + */ + double c, temp; + c = log(det1 / det2) - 2*log(ratio); + + gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, 0, vtemp); + gsl_blas_ddot(mu1, vtemp, &temp); + c += temp; + + gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp); + gsl_blas_ddot(mu2, vtemp, &temp); + c -= temp; + + /* To get the thresold value we have to + * multiply t by |w| if not normalised + */ + double norm; gsl_blas_ddot(w, w, &norm); + + // free memory + gsl_vector_free(mu1); + gsl_vector_free(mu2); + gsl_vector_free(vtemp); + gsl_matrix_free(cov1); + gsl_matrix_free(cov2); + gsl_matrix_free(mtemp); + + return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm; +} + + +/* `fisher_cut2(ratio, w, c1, c2)` computes + * the threshold (cut), on the line given by + * `w`, to discriminates the classes `c1`, `c2`; + * with `ratio` being the ratio of their prior + * probabilities. + * + * The cut is fixed by the condition of + * conditional probability being the + * same for each class: + * + * P(c₁|x) p(x|c₁)⋅p(c₁) + * ------- = --------------- = 1; + * P(c₂|x) p(x|c₁)⋅p(c₂) + * + * where p(x|c) is the probability for point x + * along the fisher projection line. If the classes + * are bivariate gaussian then p(x|c) is simply + * given by a normal distribution: + * + * Φ(μ=(w,μ), σ=(w,Σw)) + * + * The solution is then + * + * t = (b/a) + √((b/a)² - c/a); + * + * where + * + * 1. a = S₁² - S₂² + * 2. b = M₂S₁² - M₁S₂² + * 3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α) + * 4. α = p(c₁)/p(c₂) + * + */ +double fisher_cut2( + double ratio, + gsl_vector *w, + sample_t *c1, sample_t *c2) { + + gsl_vector *vtemp = gsl_vector_alloc(w->size); + + /* Construct the covariances of each class... */ + gsl_matrix *cov1 = normal_cov(&c1->p); + gsl_matrix *cov2 = normal_cov(&c2->p); + + /* and the mean values */ + gsl_vector *mu1 = normal_mean(&c1->p); + gsl_vector *mu2 = normal_mean(&c2->p); + + /* Project the distribution onto the + * w line to get a 1D gaussian + */ + // mean + double m1; gsl_blas_ddot(w, mu1, &m1); + double m2; gsl_blas_ddot(w, mu2, &m2); + + // variances + gsl_blas_dgemv(CblasNoTrans, 1, cov1, w, 0, vtemp); + double var1; gsl_blas_ddot(w, vtemp, &var1); + gsl_blas_dgemv(CblasNoTrans, 1, cov2, w, 0, vtemp); + double var2; gsl_blas_ddot(w, vtemp, &var2); + + double a = var1 - var2; + double b = m2*var1 + m1*var2; + double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio); + + // free memory + gsl_vector_free(mu1); + gsl_vector_free(mu2); + gsl_vector_free(vtemp); + gsl_matrix_free(cov1); + gsl_matrix_free(cov2); + + return (b/a) + sqrt(pow(b/a, 2) - c/a); +} + + +int main(int argc, char **args) { + // initialize RNG + gsl_rng_env_setup(); + gsl_rng *r = gsl_rng_alloc(gsl_rng_default); + + /* Generate two classes of normally + * distributed 2D points with different + * paramters: signal and noise. + */ + struct par par_sig = { 0, 0, 0.3, 0.3, 0.5 }; + struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 }; + + // sample sizes + size_t nsig = 800; + size_t nnoise = 1000; + + sample_t *signal = generate_normal(r, nsig, &par_sig); + sample_t *noise = generate_normal(r, nnoise, &par_noise); + + /* Fisher linear discriminant + * + * First calculate the direction w onto + * which project the data points. Then the + * cut which determines the class for each + * projected point. + */ + gsl_vector *w = fisher_proj(signal, noise); + double t_cut = fisher_cut2(nsig / (double)nnoise, + w, signal, noise); + + fputs("# Linear Fisher discriminant\n\n", stderr); + fprintf(stderr, "* w: [%.3f, %.3f]\n", + gsl_vector_get(w, 0), + gsl_vector_get(w, 1)); + fprintf(stderr, "* t_cut: %.3f\n", t_cut); + + gsl_vector_fprintf(stdout, w, "%g"); + printf("%f\n", t_cut); + + /* Print data to stdout for plotting. + * Note: we print the sizes to be able + * to set apart the two matrices. + */ + printf("%ld %ld %d\n", nsig, nnoise, 2); + gsl_matrix_fprintf(stdout, signal->data, "%g"); + gsl_matrix_fprintf(stdout, noise->data, "%g"); + + // free memory + gsl_rng_free(r); + sample_t_free(signal); + sample_t_free(noise); + + return EXIT_SUCCESS; +} diff --git a/ex-7/plot.py b/ex-7/plot.py new file mode 100755 index 0000000..9d4515f --- /dev/null +++ b/ex-7/plot.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +from pylab import * +import sys + +w = loadtxt(sys.stdin, max_rows=2) +cut = float(input()) + +n, m, d = map(int, input().split()) +data = loadtxt(sys.stdin).reshape(n + m, d) +signal, noise = data[:n].T, data[n:].T + +figure() +scatter(*signal, c='xkcd:grey blue', label='signal') +scatter(*noise, c='xkcd:baby blue', label='noise') +scatter(*(cut * w/np.dot(w,w)), c='red') +arrow(*(-abs(w)), *4*abs(w)) +legend() + +figure() +sig_proj = np.dot(w, signal) +noise_proj = np.dot(w, noise) +hist(sig_proj, color='xkcd:grey blue') +hist(noise_proj, color='xkcd:baby blue') +axvline(cut, c='r') +show() diff --git a/makefile b/makefile new file mode 100644 index 0000000..3a93937 --- /dev/null +++ b/makefile @@ -0,0 +1,33 @@ +GMP_FLAGS = -L$(GMP_PATH) -lgmp +GSL_FLAGS = $(shell pkg-config --libs gsl) +CFLAGS := -Wall --debug $(GMP_FLAGS) $(GSL_FLAGS) + +CCOMPILE = \ + mkdir -p $(@D); \ + $(CC) $(CFLAGS) $^ -o $@ + +ex-1/bin/%: ex-1/%.c + $(CCOMPILE) + +ex-2/bin/%: ex-2/%.c + $(CCOMPILE) + +ex-3/bin/main: ex-3/common.c ex-3/likelihood.c ex-3/chisquared.c + $(CCOMPILE) + +ex-4/bin/main: ex-4/main.c + $(CCOMPILE) + +ex-5/bin/%: ex-5/%.c + $(CCOMPILE) + +ex-6/bin/main: ex-6/rl.c ex-6/fft.c + $(CCOMPILE) + +ex-7/main: ex-7/main.c + $(CCOMPILE) + +misc/pdfs: misc/pdfs.c + +clean: + rm -rf ex-*/bin diff --git a/misc/lessons/images/binomiale.png b/misc/lessons/images/binomiale.png new file mode 100644 index 0000000..c975953 Binary files /dev/null and b/misc/lessons/images/binomiale.png differ diff --git a/misc/lessons/images/chi2.png b/misc/lessons/images/chi2.png new file mode 100644 index 0000000..1d7190d Binary files /dev/null and b/misc/lessons/images/chi2.png differ diff --git a/misc/lessons/images/correlazione.png b/misc/lessons/images/correlazione.png new file mode 100644 index 0000000..d21310b Binary files /dev/null and b/misc/lessons/images/correlazione.png differ diff --git a/misc/lessons/images/exponential.png b/misc/lessons/images/exponential.png new file mode 100644 index 0000000..656b546 Binary files /dev/null and b/misc/lessons/images/exponential.png differ diff --git a/misc/lessons/images/fischer.png b/misc/lessons/images/fischer.png new file mode 100644 index 0000000..717b2f0 Binary files /dev/null and b/misc/lessons/images/fischer.png differ diff --git a/misc/lessons/images/gaussian.png b/misc/lessons/images/gaussian.png new file mode 100644 index 0000000..d543ba8 Binary files /dev/null and b/misc/lessons/images/gaussian.png differ diff --git a/misc/lessons/images/kurtosis.png b/misc/lessons/images/kurtosis.png new file mode 100644 index 0000000..018070f Binary files /dev/null and b/misc/lessons/images/kurtosis.png differ diff --git a/misc/lessons/images/landau.png b/misc/lessons/images/landau.png new file mode 100644 index 0000000..ea19913 Binary files /dev/null and b/misc/lessons/images/landau.png differ diff --git a/misc/lessons/images/logistic.png b/misc/lessons/images/logistic.png new file mode 100644 index 0000000..17e6d81 Binary files /dev/null and b/misc/lessons/images/logistic.png differ diff --git a/misc/lessons/images/poisson.png b/misc/lessons/images/poisson.png new file mode 100644 index 0000000..a7e84e7 Binary files /dev/null and b/misc/lessons/images/poisson.png differ diff --git a/misc/lessons/images/skewness.png b/misc/lessons/images/skewness.png new file mode 100644 index 0000000..43efb6d Binary files /dev/null and b/misc/lessons/images/skewness.png differ diff --git a/misc/lessons/images/student.png b/misc/lessons/images/student.png new file mode 100644 index 0000000..72a76af Binary files /dev/null and b/misc/lessons/images/student.png differ diff --git a/misc/lessons/images/uniform.png b/misc/lessons/images/uniform.png new file mode 100644 index 0000000..f573dac Binary files /dev/null and b/misc/lessons/images/uniform.png differ diff --git a/misc/lessons/images/wigner.png b/misc/lessons/images/wigner.png new file mode 100644 index 0000000..eecef49 Binary files /dev/null and b/misc/lessons/images/wigner.png differ diff --git a/misc/lessons/makefile b/misc/lessons/makefile new file mode 100644 index 0000000..86a1b30 --- /dev/null +++ b/misc/lessons/makefile @@ -0,0 +1,17 @@ +sections = $(shell find sections/ -name '*.md' | sort -V) + +pandoc = \ + @ pandoc $(1) \ + --toc \ + --standalone \ + --pdf-engine=xelatex \ + -F pandoc-crossref \ + -o $(2).pdf + +all: lessons.pdf + +clean: + rm lessons.pdf + +lessons.pdf: $(sections) images + $(call pandoc, $(sections), lessons) diff --git a/misc/lessons/sections/0.md b/misc/lessons/sections/0.md new file mode 100644 index 0000000..b5aa1d4 --- /dev/null +++ b/misc/lessons/sections/0.md @@ -0,0 +1,23 @@ +--- +title: Analisi statistica +subtitle: Riassunto delle slides +numbersections: true +documentclass: article +fontsize: 12pt + +geometry: + - width=150mm + - top=20mm + - bottom=30mm + +header-includes: | + ```{=latex} + % start new page with each section + \usepackage{etoolbox} + \usepackage{tikz} + \usepackage{amsmath} + \usepackage{amssymb} + \usepackage{siunitx} + \pretocmd{\section}{\clearpage}{}{} + ``` +--- diff --git a/misc/lessons/sections/modulo-1.md b/misc/lessons/sections/modulo-1.md new file mode 100644 index 0000000..92e409b --- /dev/null +++ b/misc/lessons/sections/modulo-1.md @@ -0,0 +1,140 @@ +# Calcolo numerico + +## Compilare un programma + +Per eseguire un programma, un computer fa questa cosa: legge le informazioni +dall'input e le mette nella memoria; poi da qui legge le istruzioni +sequenzialmente e se c'è qualche calcolo da fare, lo fa fare alla ALU +(aritmetic logic unit) e memorizza il risultato nella memoria; alla fine passa +tutto in output. +Per essere eseguibile, un programma deve essere scritto in codice macchina +e per questo si usano i linguaggi di programmazione che, attraverso il +compilatore, vengono tradotti in codice macchina: + +- codice sorgente, +- compilatore, +- codice oggetto, +- linker (aggiunge le librerie), +- codice eseguibile. + +## Errori di implementazione + +Quando si passa dal modello astratto a quello implementato, si distinguono +tre tipi di errori: + +- errori analitici: causati dalla necessità di una aritmetica discreta, + cioè il fatto che non si possa rappresentare una cosa continua e quindi + la si fa a punti; +- errori inerenti o algoritmici (o di round-off): causati dal numero finito di + cifre significative. Dipendono dall'algoritmo utilizzato, perché ci sono + metodi migliori di altri per evitare questo tipo di problemi. + +## Rappresentazione di un numero + +L'obiettivo è quello di tenere sotto controllo gli errori di round-off. +Per rappresentare un numero, esistono diverse rappresentazioni, tra cui quella +di Von Neumann: si dedicano $n$ cifre significative alla mantissa e poi un tot +anche alla caratteristica, cioè all'ordine di grandezza (che generalmente è in +base 2). +Con $n$ cifre significative in base 2, il numero più alto rappresentabile è +$2^n -1$ perché con due cifre (0 e 1), ci sono $2^n$ possibili numeri +rappresentabili e, se posti in ordine crescente, ognuno dista dal precedente +un'unità, con il minimo che vale 0 (tutti 0), per cui il numero più alto che si +può rappresentare è $2^n -1$: + +- 0 è l'1, +- 1 e il 2, +- ...$2^n -1$ è il $2^n$-esimo. + +questo numero corrisponde a un numero in base dieci di $m$ cifre significative, +per cui: + +$$ + m = \log(2^n -1) +$$ + +quindi per un numero con 24 bit per la mantissa, si hanno circa 7 cifre +significative in base 10. +In precisione semplice (floating point): + +- un bit per il segno, +- 23 bit per la mantissa (quindi 7 cifre significative), +- 8 bits per la caratteristica. + +In precisione doppia (duble) si raddoppiano i bite alla mantissa e le cifre +significative diventano 17, però il tempo per le moltiplicazioni è triplicato +e si occupa più memoria, quindi non è che convenga moltissimo. + +Esiste anche la rappresentazione fixed-point, in cui c'è un bit per il segno +e i restanti per il numero in sé, in cui da qualche parte c'è la virgola. +Nel caso del fixed pointd, l'errore relativo può variare moltissimo a seconda +del numero rappresentato, mentre per i float è lo stesso per ogni numero: la +densità relativa non è costante. +Quando un insieme di rappresentazione non è chiuso rispetto ad un'operazione, +il risultato presenta sicuramente errore di round-off e deve essere +approssimato. + + +## Errori algoritmici + +Per numeri troppo grandi o troppo piccoli che non si riesce a +rappresentare, si parla rispettivamente di overflow o underflow. Per questo +motivo, somme e soprattutto differenze di numeri grandi portano grandi errori +dovuti all'arrotondamento: bisogna cercare di evitarlo se possibile. Per +esempio, per calcolare un polinomio, conviene usare il metodi di +Ruffini-Horner perché riduce il numero di operazioni da effettuare: + +- metodo semplice: + + $$ + p(x) = a_0 + a_1x + a_2x^2 + ... +a_nx^n + $$ + + che sono n addizioni e n(n+1)/2 moltiplicazioni, quindi $\sim n^2$. +- metodo di R-H: + + $$ + p (x) = (((((a_n)x +a_{n-1})x +a_{n-2}x +... + $$ + + che sono n addizioni e n moltiplicazioni, quindi $\sim n$. + +Il problema si verifica anche se si sottraggono due numeri molto simili tra +loro, perché il risultato è un numero molto piccolo che potrebbe finire in +underflow (si parla di "cancellazione catastrofica"). + +\textcolor{orange}{oss:} Il costo computazionale della risoluzione di un +sistema lineare in $n$ equazioni è asintoticamente uguale al costo del +prodotto di due matrici $nxn$. Esistono algoritmi che non richiedono più di +$k \cdot n^{\alpha}$ operazioni, col il più piccolo valore noto di $\alpha$ +pari a 0.2375. + +Alcuni algoritmi sono definiti "instabili": accade quando gli errori di +round-off si accumulano fino a portare a risultati completamente errati. +Esistono problemi detti "mal condizionati" che con qualsiasi algoritmo danno +errori talmente elevati da rendere il risultato privo di significato. In +questi casi, piccole variazioni dei dati iniziali portano a grandi variazioni +nei risultati. Si chiama "numero di condizionamento del problema" il seguente +rapporto: + +$$ + \frac{\Delta r}{\Delta \alpha} = + \frac{\text{\% errore risultato}}{\text{\% errore dato iniziale}} +$$ + +che può quindi essere espresso in questo modo: + +$$ + \Delta \alpha = \frac{x + h - x}{x} = \frac{h}{x} + \hspace{100pt} + \Delta r = \frac{f(x + h)-f(x)}{f(x)} +$$ +$$ + \frac{\Delta r}{\Delta \alpha} = + \frac{f(x + h)-f(x)}{h} \cdot \frac{x}{f(x)} = + f'(x) \cdot \frac{x}{f(x)} +$$ + +Se questo numero è $<< 1$, allora il problema è poco sensibile ai dati +iniziali. + diff --git a/misc/lessons/sections/modulo-2-1.md b/misc/lessons/sections/modulo-2-1.md new file mode 100644 index 0000000..1b02c79 --- /dev/null +++ b/misc/lessons/sections/modulo-2-1.md @@ -0,0 +1,257 @@ +# Probabilità + +## Concetto di probabilità + +Consideriamo un insieme S di eventi detto "spazio campionario" e due eventi +casuali A e B. Essi sono soggetti agli assiomi di Kolmogorov: + +$\forall A \subseteq S, 0 \leqslant P(A) \leqslant 1$ +$P(S) = 1$ +$A \cap B = \varnothing \Longrightarrow P(A \cup B) = P(A) + P(B)$ + +Si definisce probabilità condizionata di A dato B: + +$$ + P(A|B) = \frac{P(A \cap B)}{P(B)} +$$ + +Se due eventi sono indipendenti, allora vale che: + +$$ + P(A \cap B) = P(A) \cdot P(B) \Longrightarrow + P(A|B) = \frac{P(A) \cdot P(B)}{P(B)} = P(A) +$$ + +Esistono diversi approcci per definire la probabilità: + +- approccio frequentista: + + $$ + P(A) = \lim_{n \rightarrow +\infty} \frac{\#A}{n} + $$ + + ma c'è il problema che è impossibile fare un numero infinito di tentativi. +- approccio bayesiano: + utilizzare il teorema di Bayes, per esempio: + + $$ + P(\text{teoria|dati}) \propto P(\text{dati|teoria}) \cdot P(\text{teoria}) + $$ + si tratta quindi di un approccio "soggettivo", almeno per l'ultimo termine + dell'equazione qua sopra, perché la probabilità della validità della teoria + può essere data sulla base di ragionamenti e osservazioni riguardanti il + fenomeno di cui si sta parlando. + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +### Esempio di probabilità condizionata + +Si osservano dei decadimenti e se ne misurano indipendentemente $N_a$ e $N_b$; +in comune ne sono stati misurati $N_{ab}$. Possiamo stimare il numero totale +di eventi $N$ e l'efficienza totale $\epsilon$. + +$$ + P(a) = \frac{N_a}{N} \hspace{100pt} P(b) = \frac{N_b}{N} +$$ +$$ + P(ab) = \frac{N_{ab}}{N} = P(a) \cdot P(b) = \frac{N_a \cdot N_b}{N^2} + \Longrightarrow N = \frac{N_a \cdot N_b}{N_{ab}} +$$ +$$ + \epsilon = P(a \cup b) = P(a) + P(b) -P(a \cap b) = + \frac{N_a + N_b - N_{ab}}{N} +$$ + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +## Teorema di Bayes + +$$ + P(A|B) = \frac{P(A \cap B)}{P(B)} + \hspace{100pt} + P(B|A) = \frac{P(B \cap A)}{P(A)} +$$ +$$ + P(A|B) = \frac{P(B|A) \cdot P(A)}{P(B)} +$$ + +Dal teorema di Bayes è possibile dedurre il teorema della probabilità totale: +consideriamo un insieme $S$ diviso in sottoinsiemi disgiunti $A_i$ la cui +unione dà l'insieme di partenza: + +$$ + \cup_i A_i = S +$$ + +e consideriamo un insieme $B$ anch'esso interno ad $S$: + +$$ + B = B \cap S = B \cap (\cup_i A_i) = \cup_i (B \cap A_i) +$$ +$$ + \Longrightarrow P(B) = \sum_i P(B \cap A_i) +$$ +$$ + \Longrightarrow P(B) = \sum_i P(B|A_i)P(A_i) +$$ + +da cui, per uno specifico insieme $A_j$, attraverso il teorema di Bayes: + +$$ + P(A_j|B) = \frac{P(B|A_j) \cdot P(A_j)}{\sum_i P(B|A_i)P(A_i)} +$$ + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +### Esempio di probabilità totale: segnale e fondo + +Un rilevatore misura segnale e fondo con relative efficienze $P(R|S)$ e +$P(R|F)$. Se sono note a priori la probabilità di segnale e di fondo +$P(S)$ e $P(F)$, allora si può risalire alla probabilità, data una +misurazione, di aver misurato il segnale: + +$$ + P(S|R) = \frac{P(R|S) \cdot P(S)}{P(R|S) \cdot P(S) + P(R|F) \cdot P(F)} +$$ + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +### Esempio di probabilità totale: AIDS + +$$ + P(\text{AIDS}) = 0.001 \hspace{50pt} P(\text{no AIDS}) = 0.999 +$$ +$$ + P(\text{+|AIDS}) = 0.98 \hspace{50pt} P(\text{-|AIDS}) = 0.02 +$$ +$$ + P(\text{+|no AIDS}) = 0.03 \hspace{50pt} P(\text{-|no AIDS}) = 0.97 +$$ + +Quindi se il test è positivo, la probabilità di avere davvero preso l'AIDS è: + +$$ + P(\text{AIDS|+}) = \frac{P(\text{+|AIDS}) \cdot P(\text{AIDS})} + {P(\text{+|AIDS}) \cdot P(\text{AIDS}) + P(\text{+|no AIDS}) \cdot + P(\text{no AIDS})} = 0.032 +$$ + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +### Approccio bayesiano con una teoria + +Cominciamo com un esempio: con un esperimento è stata misurata la massa di +un elettrone e sono stati trovati i valori {$m_i$}, di cui il valore medio +è $m_e = \SI{520 \pm 10}{KeV}$. Si assume quindi che il valore vero sia +compreso tra \SI{510}{KeV} e \SI{530}{KeV} con una probabilità del 68% data +dal confidence level. Per cui, la probabilità che la massa vera sia proprio +\SI{520}{KeV} è data da: + +$$ + P(m_e|{m_i}) = \frac{}{} +$$ + +\textcolor{red}{**rivedere negli appunti questa formula**} + +Quando si fa una misurazione, si misurano $N$ valori e si calcolano il valore +medio $\bar{x}$ e la deviazione standard $\sigma$ e si dice che il risultato è: + +$$ + \mu = \bar{x} \pm \frac{\sigma}{\sqrt{N}} +$$ + +e di solito lo si interpreta dicendo che: + +$$ + P \left( \bar{x} - \frac{\sigma}{\sqrt{N}} \leqslant \mu \leqslant + \bar{x} + \frac{\sigma}{\sqrt{N}} \right) = 68\% +$$ + +ma in realtà quello che sappiamo è solo che: + +$$ + P \left( \mu - \frac{\sigma_{true}}{\sqrt{N}} \leqslant \bar{x} \leqslant + \mu + \frac{\sigma_{true}}{\sqrt{N}} \right) = 68\% +$$ + +**\textcolor{red}{WHAT?!?}** diff --git a/misc/lessons/sections/modulo-2-2.md b/misc/lessons/sections/modulo-2-2.md new file mode 100644 index 0000000..06bacc9 --- /dev/null +++ b/misc/lessons/sections/modulo-2-2.md @@ -0,0 +1,480 @@ +# Statistica + +## Distribuzioni di probabilità + +Una funzione di densità di probabilità $f$ è definita in modo che la probabilità +che una variabile $x$ sia compresa tra $x$ e $x + dx$ sia data da: + +$$ + P(x \subset [x, x + dx]) = f(x)dx +$$ + +dunque vale che: + +$$ + \int\limits_{- \infty}^{+ \infty} dx f(x) = 1 +$$ + +Si definisce funzione cumulante: + +$$ + F(x) = \int\limits_{- \infty}^x dx' f(x') +$$ + +e quantile di ordine $\alpha$ il valore di $x$ per cui $F(x) = \alpha$. +Nel caso multidimensionale in cui si abbiano due o più variabili, si parla di +joint pdf: + +$$ + f(x, y) \hspace{30pt} \Longrightarrow \hspace{30pt} + \int\limits_{- \infty}^{+ \infty} \int\limits_{- \infty}^{+ \infty} + dx dy f(x, y) = 1 +$$ + +e si definiscono due distribuzioni marginali: + +$$ + f_x (x) = \int\limits_{- \infty}^{+ \infty} dy f(x, y) + \hspace{50pt} + f_y (y) = \int\limits_{- \infty}^{+ \infty} dx f(y, x) +$$ + +dunque due variabili $x$ e $y$ sono indipendenti se $f(x) = f_x(x) \cdot +f_y(y)$. Ora, se $A$ è l'evento di probabilità $f_x(x)dx$, mentre $B$ ha +probabilità $f_y(y)dy$, allora si possono definire le pdf condizionali come +segue: + +$$ + P(B|A) = \frac{P (A \cap B)}{P(A)} = \frac{f(x, y)dxdy}{f_x(x)dx} + \hspace{20pt} \Longrightarrow \hspace{20pt} h(y|x) = \frac{f(x, y)}{f_x(x)} +$$ + +per cui il teorema di Bayes diventa: + +$$ + g(x|y) = \frac{h(y|x)f_x(x)}{f_y(y)} +$$ + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +### Paradosso di Borel-Kolmogorov + +Si considerino dei punti distribuiti uniformemente sulla superficie del pianeta +Terra: ci si aspetterebbe che i punti siano uniformemente distribuiti anche +lungo un parallelo o un meridiano... ma consideriamo un meridiano: esso giace +per il 25% a nord del 45'esimo parallelo e quindi, secondo la logica di prima, +anche il 25% dei punti che si trovano su di esso. Però non è vero che il 45% +della superficie terrestre è al di sopra del 45'esimo parallelo! +Il paradosso è risolto perché non ci si può basare su un insieme di misura +nulla quale il meridiano (perché è unidimensionale). Lo si vede chiaramente +adottando la terminologia poc'anzi introdotta: +Se la distribuzione è uniforme, la probabilità di trovare un punto in una +certa superficie è dato dal rapporto tra l'angolo solido descritto da tale +superficie e l'angolo solido totale: + +$$ + f(\theta, \phi) d\theta d\phi= \frac{d\phi d\theta \cos(\theta)}{4 \pi} +$$ + +da cui è possibile determinare la due probabilità marginali: + +$$ + f_{\phi}(\phi) = \int\limits_{0}^{\pi} f(\theta, \phi) d\theta = + \int\limits_{0}^{\pi} \frac{\cos(\theta)}{4 \pi} = \frac{\cos(\theta)}{2} +$$ +$$ + f_{\theta}(\theta) = \int\limits_{0}^{2 \pi} f(\theta, \phi) d\phi = + \frac{1}{2 \pi} +$$ + +per cui si tratta di due costanti rispetto alle rispettive variabili. Da ciò +si può dunque dedurre che, mentre la densità lungo un parallelo è +effettivamente costante, lo stesso non si può dire riguardo a un meridiano. + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +Una funzione di una variabile casuale è essa stessa una variabile casuale. +Consideriamo la pdf $f(x)$ e una funzione $a(x)$ di cui si vuole trovare la pdf +$g(a)$. Nel caso in cui l'inversa di $a(x)$ sia univoca, definita $dS$ la +regione delle $x$ per cui $a \subset [a, a +da]$: + +$$ + g(a)da = \int\limits_{dS} dxf(x) + = \left| \int\limits_{x(a)}^{x(a +da)} f(x')dx' \right| + = \int\limits_{x(a) + \left| \frac{dx}{da} \right| da}^{x(a +da)} f(x')dx' +$$ + +Ovvero: + +$$ + g(a) = f(x(a)) \left| \frac{dx}{da} \right| +$$ + +e se $x(a)$ non è univoca, allora bisogna considerare tutti gli intervalli $dS$ +di $dx$ che corrispondono a $da$. +Nel caso di funzioni di $N$ variabili, siccome vale che: + +$$ + g(a')da' = \int \dots \int\limits_{dS} f(x_1 \dots x_N) dx_1 \dots dx_N +$$ + +con $dS$ regione dello spazio delle $x$ compreso tra le isosuperfici: + +$$ + a(\vec{x}) = a' \hspace{10pt} \wedge \hspace{10pt} a(\vec{x}) = a' + da' +$$ + +Nel caso in cui $z = x \cdot y$, si trova la convoluzione di Mellin: + +$$ + g(z)dz = \int\limits_{dS} dxdy f(x, y) + = \int\limits_{-\infty}^{+\infty} dx + \int\limits_{\frac{z}{x}}^{\frac{z + dz}{x}} dy f(x, y) +$$ + +**\textcolor{red}{Non ho capito questa parte...}** + +## Propagazione degli errori + +Consideriamo una variabile $x$ con pdf $f(x)$. Si definisce valore di +aspettazione o media (e lo si indica spesso con $\mu$): + +$$ + E[x] = \int dx f(x) x +$$ + +Nel caso di una variabile $y(x)$ con pdf $g(x)$, invece: + +$$ + E[y] = \int dy \cdot y \cdot g(y) = \int dx f(x) g(x) +$$ + +Mentre si definisce varianza (e la si indica spesso con $\sigma^2$, mentre +con deviazione standard si intende $\sigma$): + +$$ + V[x] = E[x - E[x]^2] = E[x^2] - \mu^2 +$$ + +Più in generale si definiscono 'momenti algebrici' $E[x^n] =\mu'_n$ con +$\mu'_1 = \mu$ e 'momenti centrali' $E[(x -\mu)^n] = \mu_n$ con $\mu_2 = +\sigma^2$. +Si definiscono inoltre due grandezze di correlazione. La covarianza: + +$$ + \text{cov} [x, y] = E[xy] - E[x]E[y] = E[xy] - \mu_x \mu_y +$$ + +che equivale a: + +\begin{align*} + \text{cov} [x, y] + &= E[(x -\mu_x)(y -\mu_y)] \\ + &= E[xy -x\mu_y -y\mu_x + \mu_x\mu_Y] \\ + &= E[xy] -\mu_y E[x] -\mu_x E[y] + \mu_x \mu_y \\ + &= E[xy] -\mu_y mu_x - \mu_x \mu_y + \mu_x \mu_y \\ + &= E[xy] - \mu_x \mu_y +\end{align*} + +Notare che se $x$ e $y$ sono indipendenti, allora $f(x, y) = f_x(x)f_y(y)$, +perciò: + +$$ + E[xy] = \int dx \int dy xy f(x, y) = \mu_x \mu_y + \hspace{20pt} \Longrightarrow \hspace{20pt} \text{cov} [x, y] = 0 +$$ + +e il coefficiente di correlazione: + +$$ + \rho_{xy} = \frac{\text{cov} [xy]}{\sigma_x \sigma_y} +$$ + +![Esempio di correlazione tra due +grandezze.](images/correlazione.png){width=70%} + +Anche se la $f(\vec{x})$ non è completamente nota, è comunque possibile stimare +il valore medio e la varianza di una grandezza $y(\vec{x})$ conoscendo solo le +stime di media e varianza della pdf. Espandiamo attraverso la serie di +Taylor: + +$$ + y(\vec{x}) = y(\vec{\mu}) + \sum_{i= 1}^N \left[ + \frac{\partial y}{\partial x_i} \right]_{\vec{x} + = \vec{\mu}} (x_i - \mu_i) +$$ +$$ + \Longrightarrow \hspace{20pt} E[y] = y(\vec{\mu}) + \Longleftarrow \hspace{20pt} E[x_i] = \mu_i +$$ + +Mentre per la varianza servono $E[y^2]$ ed $E[y]$. Sempre passando +attraverso uno sviluppo di Taylor attorno al valore medio: + +\begin{align*} + E[y^2] &= y^2(\vec{\mu}) + 2y(\vec{\mu}) \sum_{i = 1}^{N} + \left[ \frac{\partial y} {\partial x_i} \right]_{\vec{x} = \vec{\mu}} + E[x_i - \mu_i] \\ + &+ E \left[ \left( \sum_{i_1}^N + \left[ \frac{\partial y}{\partial x_i} \right]_{\vec{x} = \vec{\mu}} + (x_i - \mu_i) \right) \left( \sum_{j = 1}^N \left[ \frac{\partial y} + {\partial x_i} \right]_{\vec{x} = \vec{\mu}} (x_j - \mu_j) \right) \right] +\end{align*} + +Siccome il secondo termine si annulla sempre perché $E[x_i] = \mu_i$, allora +rimane che: + +$$ + V[y] = E[y^2] - E[y]^2 = \sigma_y^2 = \sum_{i,j = 1}^N \left[ + \frac{\partial y}{\partial x_i} + \frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij} +$$ + +Con $V_{ij}$ che è la matrice di covarianza, che ha come entrate: + +$$ + V_{ij} = E[(x_i - \mu_i)(x_j - \mu_j)] = \rho_{ij} \sigma_i \sigma_j +$$ + +e quindi, nel caso in cui le variabili siano scorrelate, si ottiene che: + +$$ + V_{ij} = \sigma_i^2 \delta_{ij} + \hspace{20pt} \Longrightarrow \hspace{20pt} + \sigma_y^2 = \sum_{i = 1}^N \left[ \frac{\partial y}{\partial x_i} + \right]_{\vec{x} = \vec{\mu}}^2 \sigma_i^2 +$$ + +Cioè dice quanto cambia la $y$ al variare del 'dato iniziale' $\vec{x}$. +Ma quindi, per quanto visto prima: + +$$ + \text{cov} [x_i, x_j] = E[(x_i - \mu_i)(x_j - \mu_j)] = V_{ij} +$$ + +Più in generale, date $\vec{y}$ variabili dipendenti da $\vec{x}$, vale che: + +$$ + U = AVA^T \hspace{30pt} \text{con} \hspace{30pt} A_{ij} = \left[ + \frac{\partial y_i}{\partial x_j} \right]_{\vec{x} = \vec{\mu}} + \hspace{30pt} \text{e con} \hspace{30pt} U_{kl} = \text{cov}[y_k, y_l] +$$ + +dove $U$ è detta matrice di covarianza delle $y$. +Attenzione: quanto detto fin'ora, che descrive in che modo gli errori di +$\vec{x}$ influenzano $y$, vale solo nel caso in cui $y$ sia lineare nelle $x$. +Quindi, in casi come $y(x) = 1/x$, non si può fare questo discorso. + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +### Esempio + +Consideriamo: + +$$ + y = x_1 - x_2 +$$ +$$ + \text{con} \hspace{30pt} \mu_1 = \mu_2 = 10 \hspace{30pt} \wedge + \hspace{30pt} \sigma_1 = \sigma_2 = 1 +$$ + +allora abbiamo che $y = y(x_1, x_2)$, quindi: + +$$ + E[y] = y(\mu_1, \mu_2) = 10 - 10 = 0 +$$ +$$ + V[y] = \sum_{i, j = 1}^2 \left[ \frac{\partial y}{\partial x_i} + \frac{\partial y}{\partial x_j}\right]_{\vec{x} = \vec{\mu}} V_{ij} = + 1 \cdot V_{11} + 1 \cdot V_{22} -1 \cdot 2 \cdot V_{12} +$$ + +Se le correlazioni sono nulle, allora $V_{12} = 0 \Longrightarrow V[y] = 2 +\Longrightarrow \sigma_y = 1.4$, se invece $x_1$ e $x_2$ sono correlate, nel +caso in cui il coefficiente di correlazione sia unitario si ha che $V[y] = +0$. Quindi la correlazione può cambiare di molto le cose. + +\begin{tikzpicture} + \draw [thick, pink] (0,0) -- (1,0); + \draw [red] (1.5,0) circle [radius=0.1]; + \draw [thick, pink] (2,0) -- (3,0); + \draw [red] (3.5,0) circle [radius=0.1]; + \draw [thick, pink] (4,0) -- (5,0); + \draw [red] (5.5,0) circle [radius=0.1]; + \draw [thick, pink] (6,0) -- (7,0); + \draw [red] (7.5,0) circle [radius=0.1]; + \draw [thick, pink] (8,0) -- (9,0); + \draw [red] (9.5,0) circle [radius=0.1]; + \draw [thick, pink] (10,0) -- (11,0); + \draw [red] (11.5,0) circle [radius=0.1]; + \draw [thick, pink] (12,0) -- (13,0); + \draw [red] (13.5,0) circle [radius=0.1]; + \draw [thick, pink] (14,0) -- (15,0); +\end{tikzpicture} + +### Errori sistematici + +Consideriamo due grandezze $x_1$ e $x_2$ con un errore sistematico in comune +$S$: + +\begin{align*} + &x_1 = x_{1_0} + x_{1_s} \\ + &x_2 = x_{2_0} + x_{2_s} +\end{align*} + +si avrà che i termini con pedice $0$ sono indipendenti tra loro, mentre gli +altri due saranno correlati. Dato che gli errori si sommano in quadratura, la +matrice di covarianza sarà quindi: + +$$ +\text{cov}[x_1, x_2] = S^2 \hspace{30pt} \Longrightarrow \hspace{30pt} +V = \begin{pmatrix} +\sigma_1^2 + S^2 & S^2\\ +S^2 & \sigma_2^2 + S^2 +\end{pmatrix} +$$ + +perché: + +\begin{align*} + \text{cov}[x_1, x_2] &= E[x_1 x_2] - E[x_1]E[x_2] = \\ + &= E[(x_{1_0} + x_{1_s})(x_{2_0} + x_{2_s})] - E[x_{1_0} + x_{1_s}] + E[x_{2_0} + x_{2_s}] = \\ + &= E[x_{1_0}x_{2_0}] + E[x_{1_0}x_{2_s}] + E[x_{1_s}x_{2_0}] + + E[x_{1_s}x_{2_s}] + \\ + &\hspace{13pt} - E[x_{1_0}]E[x_{2_0}] - E[x_{1_0}]E[x_{2_2}] + - E[x_{1_s}]E[x_{2_0}] - E[x_{1_s}]E[x_{2_s}] = \\ + &= \mu_1 \mu_2 + \mu_1E[x_{2_s}] + E[x_{1_s}]\mu_2 +E[x_{1_s}x_{2_s}] + \\ + &\hspace{13pt} - \mu_1 \mu_2 - \mu_1 E[x_{2_s}] - E[x_{1_s}] \mu_2 + - E[x_{1_s}] E[x_{2_s}] = \\ + &= E[x_{1_s} x_{2_s}] - E[x_{1_s}] E[x_{2_s}] = \text{cov}[x_{1_s}, x_{2_s}] +\end{align*} + +### Trasformazione ortogonale + +Può tornare utile fare un cambio di variabile che permetta di ottenere una +matrice di covarianza delle $y$ diagonale. +Consideriamo le solite variabili $Y_i$ legate linearmente alle $x_j$: + +$$ + y_i = \sum_j = A^i_j x_j + \hspace{30pt} \Longrightarrow \hspace{30pt} + U_{ij} = \sum_{k,l} A_{ik} V_{kl} A^T_{lj} +$$ + +Si tratta quindi di diagonalizzare la matrice $U$: la soluzione è semplice, +la matrice $A$ è quella formata dagli autovalori di $V$. Questo concetto è +utile nel caso della scelta delle coordinate da utilizzare. +Se immaginiamo di star utilizzando le coordinate polari $\vec{x} = (x, y)$, la +matrice di covarianza sarà: + +$$ +V = \begin{pmatrix} +\sigma_1^2 & \rho \sigma_1 \sigma_2 \\ +\rho \sigma_1 \sigma_2 & \sigma_2^2 +\end{pmatrix} +$$ + +Diagonaliziamola: prima di tutto troviamo gli autovalori della matrice $V - +\lambda I$: + +$$ + \begin{vmatrix} + \sigma_1^2 - \lambda & \rho \sigma_1 \sigma_2 \\ + \rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda + \end{vmatrix} + = (\sigma_1^2 - \lambda) (\sigma_2^2 - \lambda) - \rho^2 \sigma_1^2 + \sigma_2^2 = 0 +$$ +\begin{align*} + &\Longrightarrow \hspace{30pt} + \lambda^2 -(\sigma_1^2 + \sigma_2^2) \lambda + \sigma_1^2 \sigma_2^2 + (1 - \rho^2) = 0 \\ + &\Longrightarrow \hspace{30pt} + \lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm + \sqrt{\sigma_1^4 + \sigma_2^4 +2 \sigma_1^2 \sigma_2^2 + - 4 \sigma_1^2 \sigma_2^2 + 4\rho^2 \sigma_1^2 \sigma_2^2}}{2} = \\ + &\Longrightarrow \hspace{30pt} + \lambda_{1,2} = \frac{\sigma_1^2 + \sigma_2^2 \pm + \sqrt{(\sigma_1^2 - \sigma_2^2)^2 +4 \rho^2 \sigma_1^2 \sigma_2^2}}{2} +\end{align*} + +e ora calcoliamo gli autovettori: + +$$ + (V - \lambda I)\vec{r} = 0 + \hspace{30pt} \Longrightarrow \hspace{30pt} + \begin{pmatrix} + \sigma_1^2 - \lambda & \rho \sigma_1 \sigma_2 \\ + \rho \sigma_1 \sigma_2 & \sigma_2^2 - \lambda + \end{pmatrix} + \begin{pmatrix} + r_1 \\ + r_2 + \end{pmatrix} + = + \begin{pmatrix} + r_1 \\ + r_2 + \end{pmatrix} +$$ +$$ + \Longrightarrow \hspace{30pt} + \begin{cases} + (\sigma_1^2 - \lambda)r_1 + \rho \sigma_1 \sigma_2 r_2 = r_1 \\ + \rho \sigma_1 \sigma_2 r_1 + (\sigma_2^2 - \lambda)r_2 = r_2 + \end{cases} + \hspace{30pt} \Longrightarrow \hspace{30pt} + r_1 = \frac{}{} r_2 +$$ + +eccetera eccetera... diff --git a/misc/lessons/sections/modulo-2-3.md b/misc/lessons/sections/modulo-2-3.md new file mode 100644 index 0000000..43272d2 --- /dev/null +++ b/misc/lessons/sections/modulo-2-3.md @@ -0,0 +1,466 @@ + Distribuzioni di probabilità + +## Distribuzione binomiale + +![Distribuzione binomiale: p = 0.3, n = 10, +N = 1000.](images/binomiale.png) + +Si considerino $N$ tentativi di un esperimento che può avere come esiti +soltanto successo o fallimento e che la probabilità di ogni successo sia $p$. +Definiamo $n$ il numero dei successi. Dunque la probabilità di ottenere $n$ +successi su $N$ tentativi totali è data da: + +$$ + P(N, n, p) = \binom{N}{n} p^n (1 - p)^{N -n} +$$ + +perché: + +- la probabilità che un successo si verifichi è $p$; +- la probabilità che $k$ successi si verifichino è data dal prodotto di tutte + le probabilità: $p^n$; +- lo stesso discorso vale per gli insuccessi: ognuno ha probabilità $(1_p)$ e + se ne verificano $N -n$; +- il termine binomiale rappresenta tutte le possibili permutazioni: il + concetto è semplice se si immagina di posizionare successi e fallimenti + all'interno di una griglia con $N$ possibili posizioni: un successo è un + pallino bianco e un fallimento è un pallino nero. In quanti posti posso + mettere il primo successo? $N$. E il secondo? $N -1$. E il terzo? E così + via, finché ho messo tutti i successi, che occupano $n$ posizioni, l'ultima + delle quali è stata scelta tra $N - (n -1)$ posizioni, per cui: + + $$ + N \cdot (N -1) \dots (N -n + 1) = + \frac{N \cdot (N -1) \dots (N -n + 1) \cdot 2 \cdot 1} + {(N -n) \cdot (N - n -1) \dots 2 \cdot 1} = \frac{N!}{(N -n)!} + $$ + + ma non bisogna considerare che poi tutte le posizioni delle palline nere + sono uguali, quindi va ulteriormente diviso per $n!$ per le stesse ragioni. + Da cui: + + $$ + \binom{N}{n} = \frac{N!}{n! (N -n)!} + $$ + +Per la normalizzazione, vale che: + +$$ + \sum_{n = 0}^N P(N, n, p) = 1 +$$ + +Possiamo definire un valore di aspettazione e una varianza: + +\begin{align*} + &E[n] = \sum_{n = 0}^N n P(N, n, p) = Np \\ + &V[n] = E[n^2] - E[n]^2 = Np(1 -p) +\end{align*} + +## Distribuzione multinomiale + +È la generalizzazione della pdf precedente nel caso in cui ci siano $m$ +possibili risultati, ciascuno con una probabilità $P_m$ di verificarsi. Per +esempio, è il caso di un istogramma riguardo al quale ci si domanda quale sia la +probabilità di trovarlo esattamente con quelle specifiche entrate. +La probabilità è conseguentemente data da: + +$$ + P(N, \vec{n}, \vec{p}) = \frac{N!}{n_1! n_2! \dots n_m!} + p_1^{n_1} p_2^{n_2} \dots p_m^{n_m} +$$ + +E come valore di aspettazione e deviazione standard si ottiene che: + +\begin{align*} + &E[n_i] = Np_i \\ + &V[n_i] = Np_i (1 -p_i) +\end{align*} + +### Legge dei grandi numeri + +La legge dei grandi numeri afferma che la media sperimentale di una variabile +$x$, per un numero di tentativi $N$ che tende all'infinito, si avvicina molto +alla media vera. Questa legge può essere utilizzata per stimare le probabilità +$P_i$ di una distribuzione muiltinomiale tramite le frequenze con cui i diversi +eventi si verificano. +Si consideri la frequenza $f_j$ con cui l'evento j-esimo si verifica, dato un +set di $N$ tentativi: + +$$ + f_j = \frac{1}{N} \sum_{i = 1}^N x_i = \frac{x_j}{N} +$$ + +dove $x_i$ è una variabile che vale 1 se l'evento j-esimo si è verificato e +vale 0 quando se ne è verificato un altro e $x_j$ è quindi il numero di volte +che l'evento j-esimo si è verificato. +A differenza di $P_j$, $f_j$ è una variabile casuale perché dipende da $x_j$ +che è la somma di variabili casuali. Definiamo valore medio: + +$$ + E(f_j) = \frac{E(x_j)}{N} = P_j +$$ + +e calcoliamo la varianza: + +$$ + V \left[ f_j \right] = V \left[ \frac{x_j}{N} \right] = + E \left[ \frac{x_j^2}{N^2} \right] - \left( E \left[ \frac{x_j}{N} + \right] \right)^2 = \frac{1}{N^2} V \left[ x_j \right] +$$ + +ora, $x_j$ è esattamente $n_j$ della multinomiale, perciò: + +$$ + V[x_j] = NP_j (1 - P_j) + \hspace{30pt} \Longrightarrow \hspace{30pt} + V[f_j] = \frac{1}{N} P_j (1 - P_j) \leqslant \frac{1}{N} +$$ + +## Distribuzione di Poisson + +![Distribuzione Poissoniana: $\nu$ = 1, +N = 1000.](images/poisson.png) + +Se si considera la distribuzione binomiale e ci si pone nel limite in cui il +numero di tentativi ripetuti tenda all'infinito e che la probabilità di +successo tenda a zero (con il vincolo che $N \cdot p = cost = \nu$), si ottiene +la distribuzione di Poisson: + +$$ + P(N, n, \nu) = \frac{\nu^n}{n!} e^{-\nu} +$$ + +con: + +\begin{align*} + &E[n] = \nu \\ + &V[n] = \nu +\end{align*} + +dove $\nu = NP = \frac{}{}$ numero medio di successi. +Quando $N$ è talmente grande da non essere definito (come nel caso in cui si +osservino i decadimenti di un atomo e dunque i tentativi sono le osservazioni, +che sono dunque continue), non è più possibile definire una probabilità di +successo per ogni evento (perché sarebbe nulla, da cui il motivo per cui +la Poissoniana è definita con questi due limiti), e quindi $\nu$ va definita in +un altro modo. Infatti la distribuzione di Poisson, trattandosi di un limite in +$N$ e $p$, non dipende più esplicitamente da queste due grandezze. +Nel caso in cui si osservi il decadimento di un atomo, si è soliti procedere in +questo modo: si suddivide il tempo di osservazione in intervalli (il che +significa aver suddiviso gli infiniti tentativi in sottoinsiemi di infiniti +tentativi) e si misura quante volte in ognuno di questi intervalli si verifica +un successo. L'esperimento è ora praticamente suddiviso in più esperimenti +minori da cui è possibile dedurre un numero medio frequentistico di successi. +Per esempio: + +---------------------------------------------------------- +# successi 0 1 2 3 4 5 6 7 +------------- ------ ----- ----- ---- ---- --- ----- ----- +# intervalli 1042 860 307 78 15 3 0 0 + +Poisson 1064 823 318 82 16 2 0.3 0.3 +---------------------------------------------------------- + +Table: Decadimento di un atomo. Il tempo di osservazione è stato suddiviso in +intervalli e per ogni intervallo è stato contato il numero di successi +osservati. + +\newpage + +Il numero medio di eventi è: + +$$ + \frac{1042 \cdot 0 + 860 \cdot 1 + 307 \cdot 2 + 78 \cdot 3 + 15 \cdot 4 + + 3 \cdot 5 + 0 \cdot 6 + 0 \cdot 7}{1064 + 860 + 307 + 78 + 15 + 3 + + 0 + 0} = 0.77 +$$ + +Da cui è possibile calcolare i valori sempre riportati nella tabella precedente. + +## Distribuzione uniforme + +![Distribuzione uniforme: $a = 0$, $b = 100$.](images/uniform.png) + +Una pdf di numeri che hanno tutti uguale probabilità di verificarsi è detta +uniforme: + +$$ + P (n, a, b) = \begin{cases} + \frac{1}{b - a} \hspace{30pt} a \leqslant x \leqslant b \\ + 0 \hspace{42pt} \text{altrove} + \end{cases} +$$ + +con: + +\begin{align*} + E[n] = \frac{1}{2} (a + b) \\ + V[n] = \frac{1}{12} (a + b)^2 +\end{align*} + +Se una variabile è distribuita secondo una pdf $f(x)$, la sua cumulante è +uniformemente distribuita. Intuitivamente è semplice perché basta vederla in +questo modo: si immagini il grafico della pdf; ogni volta che si estrae un +numero, questo cadrà in un punto casuale nell'area al di sotto della pdf, +lasciando uno spazio casuale alla sua sinistra (che è il valore della +cumulante) + +## Distribuzione Gaussiana e CLT + +![Distribuzione Gaussiana: $\mu = 30$, +$\sigma = 5$.](images/gaussian.png) + +La distribuzione Gaussiana (o normale) è definita come: + +$$ + P (x, \mu, \sigma) = \frac{1}{\sqrt{2 \pi} \sigma} + e^{\frac{(x - \mu)^2}{2 \sigma^2}} +$$ + +con: + +\begin{align*} + E[x] = \mu \\ + V[x] = \sigma^2 +\end{align*} + +La error function, che è la cumulativa di questa pdf, è molto utile in +laboratorio e i suoi valori sono tabulati. +Il teorema centrale del limite afferma che date $n$ variabili casuali +indipendenti distribuite con una pdf comune e varianze $\sigma_i^2$, nel +limite in qui $n \rightarrow + \infty$, la somma di queste variabili segue un +andamento gaussiano con valore medio la somma dei valori medi e varianza la +somma delle varianze. +Ciò può essere sfruttato per generare numeri casuali distribuiti secondo una +distribuzione normale. + + +Per grandi valori di $\mu$ (vale a dire qualche unità), la distribuzione di +Poisson tende a quella Gaussiana con $\mu = \nu$ e $\sigma = \sqrt{\nu}$. +Analogamente per $N \rightarrow + \infty$ la binomiale tende alla Gaussiana +con $\mu = Np$ e $\sigma = \sqrt{Np (1 - p)}$. + +## Distribuzione Gaussiana multivariata + +Nel caso multidimensionale, la pdf per il vettore $\vec{x} = {x_1 ... n_n}$ è +data da: + +$$ + f(\vec{x}, \vec{\mu}, V) = \frac{1}{(2 \pi)^{N/2} \mid V \mid^{1/2}} + \exp \left[ - \frac{1}{2} (\vec{x} - \vec{\mu})^t V^{-1} (\vec{x} + - \vec{\mu}) \right] +$$ + +con $E[x_i] = \mu_i$ e $\text{cov}[x_i, x_j] = V_{ij}$ + +## Media pesata + +Quando si hanno misure con diversi errori, vanno combinate attraverso il +concetto di media pesata: + +$$ + E[x] = \frac{\sum_{i = 1}^N \frac{x_i}{\sigma_i^2}}{\sum_{i = 1}^N + \frac{1}{\sigma_i^2}} +$$ +$$ + V[x] = \frac{1}{\sum_{i = 1}^N \frac{1}{\sigma_i^2}} +$$ + +Ma non ha senso mediare valori che non sono compatibili! + +## Distribuzione di Breit-Wigner + +![Distribuzione di Breit-Wigner: $x_0 = 20$, +$\Gamma = 10$.](images/wigner.png) + +Esistono alcune distribuzioni che hanno momenti non ben definiti e che per +questo si dicono "patologiche". Un esempio è la distribuzione di Breit-Wigner: + +$$ + f (x, \Gamma, x_o) = \frac{1}{\pi} \cdot + \frac{\Gamma/2}{\Gamma^2/4 + (x - x_0)^2} +$$ + +Un caso particolare è quello in cui $x_0 = 0$ e $\Gamma = 2$, caso in cui è +detta distribuzione di Cauchy: + +$$ + f(x, 2, 0) = f(x) = \frac{1}{\pi} \cdot \frac{1}{1 + x^2} +$$ + +Il valore medio e la varianza non sono definiti perché l'integrale è +divergente. Conviene usare la moda e l'ampiezza a mezza altezza, che sono +rispettivamente $x_0$ e $\Gamma$. +Nella libreria *GSL*, la pdf è scritta in questo modo: + +$$ + p(x) = \frac{1}{a \pi (1 + (x/a))^2} + \hspace{50pt} \Longrightarrow \hspace{50pt} + a = \Gamma/2 +$$ + +## Distribuzione di Landau + +Per una particella carica con $\beta = v/c$ che attraversa un materiale sottile +di spessore $d$, la perdita di energia $\Delta$ segue la distribuzione di +Landau: + +![Distribuzione di Landau.](images/landau.png) + +Ha una forma complicatissima che racchiude integrali, logaritmi... Anche in +questo caso non si possono definire i momenti algebrici perché l'integrale +diverge. + +## Distribuzione del chi-quadro + +![Distribuzione del $\chi^2$: $n = 5$.](images/chi2.png) + +Date $N$ grandezze distribuite ciascuna con una propria distribuzione +Gaussiana, la somma dei loro quadrati segue la distribuzione $\chi^2$. +Formalmente è definita così: + +$$ + f(z, n) \frac{1}{2^{n/2} \Gamma (n/2)} z^{n/2 - 1}e^{-z/2} +$$ + +\begin{align*} + &E[z] = n \\ + &V[z] = 2n +\end{align*} + +dove $z$ è la variabile e $n$ è il numero di fradi di libertà. +Quando si fa un esperimento e si campiona $y(x)$ e poi si fittano i dati +trovati con una funzione teorica $f(x)$, ciascun valore $y(x)$ si assume +distribuito come una gaussiana attorno al suo valore vero, che assumiamo +essere $f(x)$: dunque i residui, che sono la differenza $R(x)= y(x) - f(x)$, +sono ancora una gaussiana, ma centrata in zero. Il chi quadro è definito come: + +$$ + \sum_i \frac{[y(x_i) - f(x_i)]^2}{f(x_i)} + \hspace{50pt} \text{oppure} \hspace{50pt} + \sum_i \frac{[y(x_i) - f(x_i)]^2}{\sigma_i^2} +$$ + +Ne consegue che il chi quadro segua appunto la distribuzione del chi quadro. + +Nella libreria *GSL* la distribuzione $\chi^2$ corrisponde alla distribuzione +gamma con $a = n/2$ e $b = 2$. + +## Distribuzione esponenziale + +![Distribuzione del esponenziale: $\lambda = 3$.](images/exponential.png) + +$$ + f(x, \lambda) = \lambda e^{-\lambda x} +$$ + +\begin{align*} + &E[z] = \frac{1}{\lambda} \\ + &V[z] = \frac{1}{\lambda^2} +\end{align*} + +## Distribuzione t di Student + +![Distribuzione t di Student: $\nu = 3$.](images/student.png) + +È la distribuzione seguita dalla media di una popolazione gaussiana quando +la si stima con un piccolo campione e senza conoscere la deviazione standard. +Se $y_1$ è distribuita come una Gaussiana e $y_2$ come un $\chi^2$, se $\nu$ +sono i gradi di libertà, allora $x$ segue la t di Student: + +$$ + x = \frac{y_1}{\sqrt{\frac{y_2}{\nu}}} +$$ + +che è così definita: + +$$ + f(x, \nu) = \frac{\Gamma \left( \frac{\nu + 1}{2} \right)}{\sqrt{\nu \pi} + \Gamma \left( \frac{\nu}{2} \right)} \left( 1 + \frac{x^2}{\nu} + \right)^{- \frac{\nu + 1}{2}} +$$ + +\begin{align*} + &E[z] = 0 \\ + &V[z] = + \begin{cases} + \frac{\nu}{\nu - 2} \hspace{15pt} \nu \greater 2 \\ + \infty \hspace{30pt} \nu \leqslant 2 + \end{cases} +\end{align*} + + +## Distribuzione di Fischer-Snedecor + +![Distribuzione di Fischer: $n = 3$, $m = 4$.](images/fischer.png) + +Se si hanno due campioni $\vec{x}$ e $\vec{y}$ di variabili che seguono le +rispettive Gaussiane, si può usare la distribuzione di Fisher-Snedecor per +comparare le due varianze. Se nel primo caso le variabili sono $n$ e nel +secondo sono $m$, allora la distribuzione di Fisher con gradi di libertà +$n-1$ e $m-1$ dà la distribuzione del rapporto: + +$$ + \frac{S^2_x / S^2_y}{\sigma^2_x / \sigma^2_y} = + \frac{S^2_x / \sigma^2_x}{S^2_y / \sigma^2_y} +$$ + +con: + +$$ + S^2_x = \frac{1}{n -1} \sum_{i = 1}^n (x_i - \mu_i)^2 + \hspace{50pt} \text{,} \hspace{50pt} + S^2_y = \frac{1}{m -1} \sum_{i = 1}^m (y_i - \mu_i)^2 +$$ + +che quindi è il rapporto di due grandezze distribuite secondo il $chi^2$. +La definizione della pdf è complicata... + +## Funzione caratteristica + +Si definisce funzione caratteristica di una variabile $x$ distribuita secondo +una $f(x)$, la trasformata di Fourier di quest'ultima: + +$$ + \hat{f}(k) = E[e^{ikx}] = \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} +$$ + +come per ogni trasformata, tutte le informazioni contenute nella funzione +originaria sono contenute anche nella funzione caratteristica, perché per +tornare alla prima è sufficiente calcolare la trasformata inversa: + +$$ + f(x) = \frac{1}{2 \pi} \int\limits_{-\infty}^{+\infty} dx \hat{f}(k) e^{-ikx} +$$ + +la funzione caratteristica è utile per semplificare alcuni conti. Se +$x_1 \dots x_N$ sono variabili casuali indipendenti: + +\begin{align*} + z = \sum_{i = 1}^N x_i + \hspace{20pt} \Longrightarrow \hspace{20pt} + \hat{f}_z(k) &= \int dx_1 \dots dx_N f_1(x_1) + \dots f_N(x_N) e^{ik \sum_{i=1}^N x_i} = \\ + &= \int dx_1 f_1(x_1) e^{ikx_1} \dots \int dx_N f_N(x_N) e^{ikx_N} = \\ + &= \hat{f}_1(k) \dots \hat{f}_N(k) +\end{align*} + +Inoltre vale anche che: + +$$ + \frac{d^m}{dk^m} \hat{f}(k) \big|_{k = 0} = + \frac{d^m}{dk^m} + \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} \big|_{k = 0} = + i^m \int\limits_{-\infty}^{+\infty} dx f(x) e^{ikx} x^m \big|_{k = 0} = + i^m \mu_m = i^m E[x^m] +$$ + +che è il momento algebrico di ordine $m$. +Per esempio, nel caso di due variabili indipendenti $x$ e $y$ gaussiane, si +può notare subito che la loro somma è una gaussiana con $\mu = \mu_x + \mu_y$ e +$\sigma^2 = \sigma_x^2 + \sigma_y^2$. Analogamente per la Poissoniana. +Inoltre è facile osservare quale sia il comportamento delle pdf nei vari limiti +che abbiamo visto in precedenza: se si manda $N \rightarrow \infty$ mantenendo +il valore medio costante nella funzione caratteristica di una binomiale, si +ottiene la funzione caratteristica di una Poissoniana. Anche il teorema +centrale del limite si può dimostrare in questo modo. diff --git a/misc/lessons/sections/modulo-3.md b/misc/lessons/sections/modulo-3.md new file mode 100644 index 0000000..4b8d693 --- /dev/null +++ b/misc/lessons/sections/modulo-3.md @@ -0,0 +1,281 @@ +# BPH + +## Statistica descrittiva + +Si possono distinguere due tipi di analisi dei dati: "model independent" +(statistica descrittiva) e "model dependent", che si basano su un modello +teorico. In questo capitolo studiamo quelli del primo tipo. +Alcuni argomenti tipici di statistica descrittiva sono: + +- test per stabilire se due datasets provengono dalla stessa distribuzione + $f(x)$; +- test per stabilire la correlazione tra due datasets (test di ipotesi); +- metodi per determinare i momenti di una distribuzione; +- metodi per lo smoothing dei dati sperimentali. + +### Momenti di una distribuzione + +Definire i momenti di una distribuzione ha senso quando gli eventi che la +costituiscono hanno la tendenza ad agglomerarsi attorno ad un valore centrale. +Se i dati sono discreti, si usano le seguenti definizioni: + +Media campionaria: se $n_i$ è la frequenza con cui si presenta ciascun valore +$x_j$: + +$$ + \bar{x} = \frac{1}{N} \sum_{i = 1}^N x_i + = \frac{1}{N} \sum_{j = 1}^{N'} n_j x_j +$$ + +Momento centrale di ordine $r$: + +$$ + V_r = \frac{1}{N} \sum_{j=1}^{N'} n_j^r (x_j - \bar{x})^r +$$ + +Esistono anche altri due valori "centrali", che nel caso continuo diventano: +mediana: + +$$ + \int\limits_{-\infty}^{x_{\text{med}}} dx \, f(x) = \frac{1}{2} +$$ + +moda: valore per cui $f(x)$ è massima, ovvero valore che si ripete con +maggiore frequenza. +Se la pdf ha code molto estese, è possibile che gli integrali non convergano +e questi valori non siano definiti. Per questo motivo la mediana è uno +stimatore del valore centrale più robusto della media. + +I momenti centrali definiscono il modo in cui i dati si distribuiscono attorno +al valore centrale: quanto sono "diffusi". Il primo è la varianza (si noti la +correzione di Bessel per cui $N \rightarrow N -1$ al denominatore): + +$$ + V = \frac{1}{N - 1} \sum_{i = 1}^N (x_i - \bar{x})^2 +$$ + +La skewness (letteralmente "asimmetria") descrive quanto i valori siano +distribuiti in modo disuniforme attorno al valore medio: + +$$ + \gamma = \frac{1}{\sigma^3} E[(x - \bar{x})^3] +$$ + +dove $\sigma$ è la deviazione standard. + +![Skewness.](images/skewness.png){width=12cm} + +Quanto una pdf è più o meno piccata rispetto ad una gaussuana è dato dalla +kurtosis ("curved", "arching"): + +$$ + K = \frac{1}{\sigma^4} E[(x -\bar{x})^4] -3 +$$ + +![Kurtosis.](images/kurtosis.png){width=8cm} + +Esiste una stima per le deviazioni standard di questi parametri nel caso di +distribizioni circa gaussiane: + +\begin{align*} + &V(\sigma^2) = \frac{2 \sigma^4}{N} \\ + &V(\gamma) \approx \frac{15}{N} \\ + &V(K) \approx \frac{96}{N} +\end{align*} + +### Smoothing dei dati + +Lo smoothing dei dati si rende necessario quando i dati sono corrotti da un +rumore casuale. Solitamente si attua una media su finestre che inglobano dati +contigui. Fare una media, però, significa abbassare inevitabilmente il valore +nei picchi, perché la maggior parte delle volte conservano l'area al di sotto +del picco e la posizione, ma non l'altezza. +Uno dei più efficienti metodi di smoothing è il filtro di Savitsky-Golay. + +Il segnale viene analizzato a gruppi di punti incentrati ciascuno in $Y_i$, con +$i$ che scorre su tutto l'array. Chiamiamo $y_0$ il punto centrale e $Y_N$ e +$Y_{-N}$ gli estremi. $Y_0$ viene sostituito con un valore calcolato in un modo +spiegato di seguito. Durante questo processo, i valori di $Y_i$ non vengono +sostituiti con $f_i$, bensì si crea un array parallelo che sarà poi quello +definitivo smoothato. +I valori di $Y_i$ si ottengono tramite un fit sui punti della finestra con +un polinomio di grado arbitrario $g$: $P_g(j)$. Il polinomio viene poi +valutato in zero e sostituito al valore di $y_0$. + +### Test di ipotesi + +Supponiamo di voler dimostrare che una certa variabile casuale $x$ segua una +pdf $f(x)$: questa è detta ipotesi nulla $H_0$. Se $f(x)$ non dipende da alcun +parametro, si parla di ipotesi semplice, altrimenti di dice composta. Oltre +alla ipotesi nulla si possono avere una o più ipotesi alternative $H_1$, +$H_2$... +Consideriamo il semplice caso in cui abbiamo una sola ipotesi alternativa +$H_1$ che proponga a sua volta una pdf. Per valutare l'accordo tra i dati e +un'ipotesi nulla si costruisce una statistica di test $t(x)$, che è una +variabile che dipende da $\vec{x}$ che definisco per determinare se l'ipotesi +nulla sia vera oppure no (vedi $t_{\text{cut}}$ oppure la discrepanza...) e che +segue a sua volta due pdf, una prevista da $H_0$ e una da $H_1$. + +\begin{center} +\begin{tikzpicture} + + \draw [thick, ->] (0,0) -- (12,0); + \draw [thick, ->] (0,0) -- (0,6); + \node [left] at (0,6) {g(t)}; + \node [below] at (12,0) {t}; + \draw [thick, dashed] (6,0) -- (6,6); + \node [below] at (6,0) {$t_{\text{cut}}$}; + \draw [thick, blue] (0,0) to [out = 20, in = 180] (3,5) + to [out = 0, in = 180] (8,0); + \draw [thick, red] (4,0) to [out = 20, in = 180] (7,3) + to [out = 0, in = 180] (11,0); + \node [blue] at (2.5, 2) {$g(t \, | \, H_0)$}; + \node [red] at (8, 1) {$g(t \, | \, H_1)$}; + +\end{tikzpicture} +\end{center} + +Si definisce 'significanza del criterio di test' $\alpha$ (mentre $(1 - +\alpha)$ è il 'livello di confidenza del criterio di test', o 'efficienza'): + +$$ + \alpha = \int\limits_{t_{\text{cut}}}^{+ \infty} dt \, g(t \, | \, H_0) +$$ + +mentre $\beta$ è chiamato 'potenza del test' (mentre $(1 - \beta)$ è +detto 'purezza'): + +$$ + \beta = \int\limits_{-\infty}^{t_{\text{cut}}} dt \, g(t \, | \, H_1) +$$ + +Si chiamano: + +- errore di prima specie: rigezione di $H_0$ qualora questa sia vera (con + relativa probabilità $P_1$); +- errore di seconda specie: accettazion di $H_0$ qualora questa sia falsa + (con relativa probabilità $P_2$); + +Per $t < t_{\text{cut}}$ deciso arbitrariamente, imponiamo che l'ipotesi +nulla sia verificata. Ne consgue che $\alpha = P_1$ e $\beta = P_2$. +La scelta migliore di $y_{\text{cut}}$ è quella che dà la massima purezza data +una certa efficienza. Nel caso 1D lo si ottiene automaticamente (vedi esempio), +altrimenti può essere complicato. + +Facciamo un esempio in cui applichiamo il lemma di Neyman-Pearson. +Immaginiamo di avere i valori $\vec{x} = (x_1 ... x_N)$ che appartengono ad +una distribuzione normale la cui varianza $\sigma$ è nota e si deve distinguere +tra due valori medi $\mu_0$ e $\mu_1$, cioé: + +$$ + H_0 = [\mu = \mu_0] + \hspace{100pt} + H_1 = [\mu = \mu_1] +$$ + +A questo punto le pdf previste da $H_0$ e $H_1$ sono due gaussiane centrate +ciascuna nel proprio valore medio. Secondo il lemma di cui sopra, dobbiamo +calcolare la Likelihood, che è la produttoria su tutte le misure effettuate +$x_i$ della pdf prevista di un'ipotesi calcolata in $x_i$: + +$$ + L(\vec{x}, \mu, \sigma) = \frac{1}{(\sigma \sqrt{2 \pi})^N} \Pi_{i=1}^N + N(x_i, \nu, \sigma) +$$ + +dove con $N$ si indica la distribuzione normale. Si tratta, cioè, della +probabilità di avere ottenuto quelle misure secondo l'ipotesi considerata. +Vorremo, quindi, che $L(H_0) >> L(H_1)$. A questo scopo si guarda $r$, +parametro previsto dal lemma, che vale: + +$$ + r = \frac{(L(\vec{x}) \, | \, H_0)}{(L(\vec{x}) \, | \, H_1)} + \hspace{30pt} \Longrightarrow \hspace{30pt} + \ln{r} = \ln{L(\vec{x}, \mu_0, \sigma)} - \ln{L(\vec{x}, \mu_0, \sigma)} +$$ + +Che deve essere a sua volta molto grande. La regione in cui si deve accettare +l'ipotesi nulla è infatti quella con $r > c$, dove $c$ deve ancora essere +valutato. + +$$ + \ln{r} = R(\vec{x}) > \ln{c} + \hspace{30pt} \Longrightarrow \hspace{30pt} + \vec{x} > (\text{oppure} <) \, g(c) = t_{\text{cut}} +$$ + +Per scegliere $k$, si impone che: + +$$ + P_1 = \alpha = Pr(\vec{x} > (\text{oppure} <) \, t_{\text{cut}} \, + | \, H_0) +$$ + +Quindi ciò che può essere scelto arbitrariamente, alla fine dei conti, è +$\alpha$, che solitamente si impone $= 5 \%$. + +### Discriminante lineare di Fisher + +In che modo si possono definire $f(t \, | \, H_0)$ e $f(t \, | \, H_1)$? Si +possono fare degli *ansatz* riguardo alla forma di $t$. Il modello di Fischer +utilizza una funzione lineare: + +$$ + t = \sum_{i = 1}^N a_i x_i = \vec{a} \cdot \vec{x} +$$ + +dove il vettore $\vec{a}$ è da determinare. Definiamo l'insieme dei valori medi +e delle "varianze" delle variabili misurate come segue: $\mu_{k, i}$ è il valore +medio della variabile $i$-esima secondo l'ipotesi $k$-esima: + +$$ + \mu_{k,i} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N + \, x_i f(\vec{x} \, | \, H_k) +$$ + +dove $k$ può quindi essere 0 o 1; mentre: + +$$ + (V_k)_{i,j} = \int\limits_{-\infty}^{+\infty} dx_1 \dots dx_N + \, (x_i - \mu_{k,i})(x_j - \mu_{k,j}) f(\vec{x} \, | \, H_k) +$$ + +Si può dimostrare che, per funzioni +gaussiane, la migliore statistica di test (ovvero che massimizza $1 - \beta$ +per un dato $a$) è quella per cui: + +$$ + \vec{a} = \frac{1}{w} (\vec{\nu}_0 - \vec{\nu}_1) + \hspace{40pt} \text{con} \hspace{40pt} + W_{i,j} = (V_0 + V_i)_{i,j} +$$ + +In genere si introduce anche un offset: + +$$ + t = a_0 + \sum_{i = 1}^N a_i x_i +$$ + +### Reti neuronali + +Si può dimostrare che se si usa il discriminante lineare di Fisher, allora dati +i dati $\vec{x}$, la probabilità che sia giusta $H_0$ è: + +$$ + P(H_0 | \vec{x}) = frac{1}{1 + e^{-t}} +$$ + +![Logistic function.](images/logistic.png){width=6cm} + +che è la funzione logistica. Se le due pdf $f(\vec{x} | H_0)$ e $f(\vec{x} | +H_1)$ non sono gaussiane, allora il discriminante lineare di Fisher non è più +ottimale e si può generalizzare $t(\vec{x})$ con un caso speciale di Artificial +Neural Network (ANN). +Supponiamo di prendere + +$$ + t(\vec{x}) = s_0 \left( a_0 \sum_{i = 1}^N a_i x_i \right) +$$ + +con $s$ detta funzione di attivazione e $a_0$ detta soglia. Siccome la sigmoide +è monotona, questa ANN è equivalente ad un test lineare. diff --git a/misc/pdfs.c b/misc/pdfs.c new file mode 100644 index 0000000..9bbbe60 --- /dev/null +++ b/misc/pdfs.c @@ -0,0 +1,58 @@ +// This program generates a collection of random numbers which follow the +// distribution whose name is given in input by the user when calling the +// program and outputs an histogram of the result. Maximum and minimum of +// the range must also be given. The random number distribution functions +// are taken from the GSL library at: +// https://www.gnu.org/software/gsl/doc/html/randist.html + +#include +#include +#include +#include +#include + +int main (int argc, char** argv) + { + // An histogram with 100 bins which range from 0 to 100 is set: + size_t bins = 100; + int low = atoi(argv[2]); + int upp = atoi(argv[3]); + gsl_histogram * histo = gsl_histogram_alloc(bins); + gsl_histogram_set_ranges_uniform(histo, low, upp); + + // The random number generator is set: + gsl_rng* gen = gsl_rng_alloc(gsl_rng_taus); + // 100'000 random numbers are generated and put into the histogram: + size_t N = 100000; + for (size_t i = 0; i < N; i++) + { + if (strcmp(argv[1], "binomial") == 0) + gsl_histogram_increment(histo, gsl_ran_binomial(gen, 0.3, 10)); + if (strcmp(argv[1], "poisson") == 0) + gsl_histogram_increment(histo, gsl_ran_poisson(gen, 1)); + if (strcmp(argv[1], "uniform") == 0) + gsl_histogram_increment(histo, gsl_rng_get(gen)*100); + if (strcmp(argv[1], "gaussian") == 0) + gsl_histogram_increment(histo, gsl_ran_gaussian(gen, 5)+30); + if (strcmp(argv[1], "wigner") == 0) + gsl_histogram_increment(histo, gsl_ran_cauchy(gen, 10)+20); + if (strcmp(argv[1], "landau") == 0) + gsl_histogram_increment(histo, gsl_ran_landau(gen)); + if (strcmp(argv[1], "chi2") == 0) + gsl_histogram_increment(histo, gsl_ran_gamma(gen, 5, 2)); + if (strcmp(argv[1], "exponential") == 0) + gsl_histogram_increment(histo, gsl_ran_exponential(gen, 5)); + if (strcmp(argv[1], "student") == 0) + gsl_histogram_increment(histo, gsl_ran_tdist(gen, 3)); + if (strcmp(argv[1], "fischer") == 0) + gsl_histogram_increment(histo, gsl_ran_fdist(gen, 3, 4)); + } + // The histogram is given in output: + gsl_histogram_fprintf (stdout, histo, "%g", "%g"); + + // The memory is freed. + gsl_rng_free(gen); + gsl_histogram_free(histo); + + return 0; + } diff --git a/misc/plot b/misc/plot new file mode 100755 index 0000000..bb70e3f --- /dev/null +++ b/misc/plot @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +import matplotlib.pyplot as plt +import numpy as np +import sys + +lows, upps, values = np.loadtxt(sys.stdin, unpack=True,) +dim = upps[1] - lows[1] +values = values/np.sum((values)*dim) +plt.bar(lows, align='edge', width=dim, height=values, + color='#dbbf0d', edgecolor='#595856') +plt.annotate("N = 100000\nbins=100", (15, 0.16)) +plt.ylabel("pdf(x)") +plt.xlabel("x") +plt.show() diff --git a/notes/images/conti-ex6.jpg b/notes/images/conti-ex6.jpg new file mode 100644 index 0000000..8088b6a Binary files /dev/null and b/notes/images/conti-ex6.jpg differ diff --git a/notes/images/dip.png b/notes/images/dip.png new file mode 100644 index 0000000..8b64727 Binary files /dev/null and b/notes/images/dip.png differ diff --git a/notes/images/gamma-area.png b/notes/images/gamma-area.png new file mode 100644 index 0000000..7f01fa5 Binary files /dev/null and b/notes/images/gamma-area.png differ diff --git a/notes/images/landau-hist.png b/notes/images/landau-hist.png new file mode 100644 index 0000000..9c4b3b7 Binary files /dev/null and b/notes/images/landau-hist.png differ diff --git a/notes/makefile b/notes/makefile new file mode 100644 index 0000000..cb0f269 --- /dev/null +++ b/notes/makefile @@ -0,0 +1,17 @@ +sections = $(shell find sections/ -name '*.md' | sort -V) + +pandoc = \ + @ pandoc $(1) \ + --toc \ + --standalone \ + --pdf-engine=xelatex \ + -F pandoc-crossref \ + -o $(2).pdf + +all: exercises.pdf + +clean: + rm exercises.pdf + +exercises.pdf: $(sections) images + $(call pandoc, $(sections), exercises) diff --git a/notes/sections/0.md b/notes/sections/0.md new file mode 100644 index 0000000..f42dc91 --- /dev/null +++ b/notes/sections/0.md @@ -0,0 +1,50 @@ +--- +title: Statistical Analysis +subtitle: Exercises +numbersections: true +documentclass: article +fontsize: 12pt + +geometry: + - width=150mm + - top=20mm + - bottom=30mm + +mainlang: english +otherlang: russian + +header-includes: | + ```{=latex} + % start new page with each section + \usepackage{etoolbox} + \usepackage{tikz} + \usepackage{amsmath} + \usepackage{amssymb} + \usepackage{siunitx} + \pretocmd{\section}{\clearpage}{}{} + + % strict figure placement + \usepackage{float} + \floatplacement{figure}{H} + + % customer macros + %% "with" in formulas + \DeclareMathOperator*{\with}{% + \hspace{30pt} \text{with} \hspace{30pt} + } + %% "thus" in formulas + \DeclareMathOperator*{\thus}{% + \hspace{30pt} \Longrightarrow \hspace{30pt} + } + %% "et" in formulas + \DeclareMathOperator*{\et}{% + \hspace{30pt} \wedge \hspace{30pt} + } + + % configure captions + \usepackage{caption} + \captionsetup{font={footnotesize,it}} + \captionsetup{width=11cm} + \usepackage{stmaryrd} + ``` +--- diff --git a/notes/sections/1.md b/notes/sections/1.md new file mode 100644 index 0000000..8667d41 --- /dev/null +++ b/notes/sections/1.md @@ -0,0 +1,226 @@ +# Exercise 1 + +## Random numbers following the Landau distribution + +The Landau distribution is a probability density function which can be defined +as follows: + +$$ + f(x) = \int \limits_{0}^{+ \infty} dt \, e^{-t log(t) -xt} \sin (\pi t) +$$ + +![Landau distribution.](images/landau-small.pdf){width=50%} + +The GNU Scientific Library (GSL) provides a number of functions for generating +random variates following tens of probability distributions. Thus, the function +for generating numbers from the Landau distribution, namely `gsl_ran_landau()`, +was used. +For the purpose of visualizing the resulting sample, the data was put into +an histogram and plotted with matplotlib. The result is shown in @fig:landau. + +![Example of N points generated with the `gsl_ran_landau()` +function and plotted in a 100-bins histogram ranging from -10 to +80.](images/landau.png){#fig:landau} + + +## Randomness testing of the generated sample + +### Kolmogorov-Smirnov test + +In order to compare the sample with the Landau distribution, the +Kolmogorov-Smirnov (KS) test was applied. This test statistically quantifies the +distance between the cumulative distribution function of the Landau distribution +and the one of the sample. The null hypothesis is that the sample was +drawn from the reference distribution. +The KS statistic for a given cumulative distribution function $F(x)$ is: + +$$ + D_N = \text{sup}_x |F_N(x) - F(x)| +$$ + +where: + + - $x$ runs over the sample, + - $F(x)$ is the Landau cumulative distribution and function + - $F_N(x)$ is the empirical cumulative distribution function of the sample. + +If $N$ numbers have been generated, for every point $x$, +$F_N(x)$ is simply given by the number of points preceding the point (itself +included) normalized by $N$, once the sample is sorted in ascending order. +$F(x)$ was computed numerically from the Landau distribution with a maximum +relative error of $10^{-6}$, using the function `gsl_integration_qagiu()`, +found in GSL. + +Under the null hypothesis, the distribution of $D_N$ is expected to +asymptotically approach a Kolmogorov distribution: + +$$ + \sqrt{N}D_N \xrightarrow{N \rightarrow + \infty} K +$$ + +where $K$ is the Kolmogorov variable, with cumulative +distribution function given by: + +$$ + P(K \leqslant K_0) = 1 - p = \frac{\sqrt{2 \pi}}{K_0} + \sum_{j = 1}^{+ \infty} e^{-(2j - 1)^2 \pi^2 / 8 K_0^2} +$$ + +Plugging the observed value $\sqrt{N}D_N$ in $K_0$, the $p$-value can be +computed. At 95% confidence level (which is the probability of confirming the +null hypothesis when correct) the compatibility with the Landau distribution +cannot be disproved if $p > α = 0.05$. +To approximate the series, the convergence was accelerated using the Levin +$u$-transform with the `gsl_sum_levin_utrunc_accel()` function. The algorithm +terminates when the difference between two successive extrapolations reaches a +minimum. + +For $N = 1000$, the following results were obtained: + + - $D = 0.020$ + - $p = 0.79$ + +Hence, the data was reasonably sampled from a Landau distribution. + +**Note**: +Contrary to what one would expect, the $\chi^2$ test on a histogram is not very +useful in this case. For the test to be significant, the data has to be binned +such that at least several points fall in each bin. However, it can be seen +(@fig:Landau) that many bins are empty both in the right and left side of the +distribution, so it would be necessary to fit only the region where the points +cluster or use very large bins in the others, making the $\chi^2$ test +unpractical. + + +### Parameters comparison + +When a sample of points is generated in a given range, different tests can be +applied in order to check whether they follow an even distribution or not. The +idea which lies beneath most of them is to measure how far the parameters of +the distribution are from the ones measured in the sample. +The same principle can be used to verify if the generated sample effectively +follows the Landau distribution. Since it turns out to be a very pathological +PDF, only two parameters can be easily checked: the mode and the +full-width-half-maximum (FWHM). + +![Landau distribution with emphatized mode and + FWHM = ($x_+ - x_-$).](images/landau.pdf) + +\begin{figure} +\hypertarget{fig:parameters}{% +\begin{tikzpicture}[overlay] + \begin{scope}[shift={(0,0.4)}] + % Mode + \draw [thick, dashed] (7.57,3.1) -- (7.57,8.55); + \draw [thick, dashed] (1.9,8.55) -- (7.57,8.55); + \node [above right] at (7.6,3.1) {$m_e$}; + \node [below right] at (1.9,8.55) {$f(m_e)$}; + % FWHM + \draw [thick, dashed] (1.9,5.95) -- (9.05,5.95); + \draw [thick, dashed] (6.85,5.83) -- (6.85,3.1); + \draw [thick, dashed] (8.95,5.83) -- (8.95,3.1); + \node [below right] at (1.9,5.95) {$\frac{f(m_e)}{2}$}; + \node [above right] at (6.85,3.1) {$x_-$}; + \node [above right] at (8.95,3.1) {$x_+$}; + \end{scope} +\end{tikzpicture} +} +\end{figure} + +The mode of a set of data values is defined as the value that appears most +often, namely: it is the maximum of the PDF. Since there is no way to find +an analytic form for the mode of the Landau PDF, it was numerically estimated +through a minimization method (found in GSL, called method 'golden section') +with an arbitrary error of $10^{-2}$, obtaining: + + - expected mode $= m_e = \SI{-0.2227830 \pm 0.0000001}{}$ + +The minimization algorithm begins with a bounded region known to contain a +minimum. The region is described by a lower bound $x_\text{min}$ and an upper +bound $x_\text{max}$, with an estimate of the location of the minimum $x_e$. +The value of the function at $x_e$ must be less than the value of the function +at the ends of the interval, in order to guarantee that a minimum is contained +somewhere within the interval. + +$$ + f(x_\text{min}) > f(x_e) < f(x_\text{max}) +$$ + +On each iteration the interval is divided in a golden section (using the ratio +($3 - \sqrt{5}/2 \approx 0.3819660$ and the value of the function at this new +point $x'$ is calculated. If the new point is a better estimate of the minimum, +namely is $f(x') < f(x_e)$, then the current estimate of the minimum is +updated. +The new point allows the size of the bounded interval to be reduced, by choosing +the most compact set of points which satisfies the constraint $f(a) > f(x') < +f(b)$ between $f(x_\text{min})$, $f(x_\text{min})$ and $f(x_e)$. The interval is +reduced until it encloses the true minimum to a desired tolerance. + +In the sample, on the other hand, once the data were binned, the mode can be +estimated as the central value of the bin with maximum events and the error +is the half width of the bins. In this case, with 40 bins between -20 and 20, +the following result was obtained: + + - observed mode $= m_o = \SI{0 \pm 1}{}$ + +In order to compare the values $m_e$ and $x_0$, the following compatibility +t-test was applied: + +$$ + p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with + t = \frac{|m_e - m_o|}{\sqrt{\sigma_e^2 + \sigma_o^2}} +$$ + +where $\sigma_e$ and $\sigma_o$ are the absolute errors of $m_e$ and $m_o$ +respectively. At 95% confidence level, the values are compatible if $p > 0.05$. +In this case: + +$$ + p = 0.82 +$$ + +Thus, the observed value is compatible with the expected one. + +--- + +The same approach was taken as regards the FWHM. It is defined as the distance +between the two points at which the function assumes half times the maximum +value. Even in this case, there is not an analytic expression for it, thus it +was computed numerically ad follow. +First, some definitions must be given: + +$$ + f_{\text{max}} := f(m_e) \et \text{FWHM} = x_{+} - x_{-} \with + f(x_{\pm}) = \frac{f_{\text{max}}}{2} +$$ + +then the function $f'(x)$ was minimized using the same minimization method +used for finding $m_e$, dividing the range into $[x_\text{min}, m_e]$ and +$[m_e, x_\text{max}]$ (where $x_\text{min}$ and $x_\text{max}$ are the limits +in which the points have been sampled) in order to be able to find both the +minima of the function: + +$$ + f'(x) = |f(x) - \frac{f_{\text{max}}}{2}| +$$ + +resulting in: + +$$ + \text{expected FWHM} = w_e = \SI{4.0186457 \pm 0.0000001}{} +$$ + +On the other hand, the observed FWHM was computed as the difference between +the center of the bins with the values closer to $\frac{f_{\text{max}}}{2}$ +and the error was taken as twice the width of the bins, obtaining: + +$$ + \text{observed FWHM} = w_o = \SI{4 \pm 2}{} +$$ + +This two values turn out to be compatible too, with: + +$$ + p = 0.99 +$$ + diff --git a/notes/sections/2.md b/notes/sections/2.md new file mode 100644 index 0000000..5ecbe83 --- /dev/null +++ b/notes/sections/2.md @@ -0,0 +1,311 @@ +# Exercise 2 + +## Euler-Mascheroni constant + +The Euler-Mascheroni constant is defined as the limiting difference between the +partial sums of the harmonic series and the natural logarithm: + +$$ + \gamma = \lim_{n \rightarrow +\infty} \left( \sum_{k=1}^{n} \frac{1}{k} + - \ln(n) \right) +$$ {#eq:gamma} + +and represents the limiting blue area in @fig:gamma. The first 30 digits of +$\gamma$ are: + +$$ + \gamma = 0.57721\ 56649\ 01532\ 86060\ 65120\ 90082 \dots +$$ {#eq:exact} + +In complex analysis, this constant is related to many functions and can be +evaluated through a variety of identities. In this work, five methods were +implemented and their results discussed. In fact, evaluating $\gamma$ with a +limited precision due to floating points number representation entails limited +precision on the estimation of $\gamma$ itself due to roundoff errors. All the +known methods involve sums, subtractions or products of very big or small +numbers, packed in series, partial sums or infinite products. Thus, the +efficiency of the methods lies on how quickly they converge to their limit. + +![The area of the blue region converges to the Euler–Mascheroni +constant.](images/gamma-area.png){#fig:gamma width=7cm} + +## Computing the constant + +### Definition + +First, in order to have a quantitative idea of how hard it is to reach a good +estimation of $\gamma$, it was naively computed using the definition given in +@eq:gamma. The difference was computed for increasing value of $n$, with +$n_{i+1} = 10 \cdot n_i$ and $n_1 = 20$, till the approximation starts getting +worse, namely: + +$$ + | \gamma(n_{i+1}) - \gamma | > | \gamma(n_i) - \gamma| +$$ + +and $\gamma (n_i)$ was selected as the result (see @tbl:1_results). + +----------------------------------------------- +n sum $|\gamma(n)-\gamma|$ +----------- ------------- --------------------- +\SI{2e1}{} \SI{2.48e-02}{} + +\SI{2e2}{} \SI{2.50e-03}{} + +\SI{2e3}{} \SI{2.50e-04}{} + +\SI{2e4}{} \SI{2.50e-05}{} + +\SI{2e5}{} \SI{2.50e-06}{} + +\SI{2e6}{} \SI{2.50e-07}{} + +\SI{2e7}{} \SI{2.50e-08}{} + +\SI{2e8}{} \SI{2.50e-09}{} + +\SI{2e9}{} \SI{2.55e-10}{} + +\SI{2e10}{} \SI{2.42e-11}{} + +\SI{2e11}{} \SI{1.44e-08}{} +----------------------------------------------- + +Table: Partial results using the definition of $\gamma$ with double +precision. {#tbl:1_results} + +The convergence is logarithmic: to fix the first $d$ decimal places about +$10^d$ terms are needed. The double precision runs out at the +10\textsuperscript{th} place, $n=\SI{2e10}{}$. +Since all the number are given with double precision, there can be at best 15 +correct digits but only 10 were correctly computed: this means that when the +terms of the series start being smaller than the smallest representable double, +the sum of all the remaining terms give a number $\propto 10^{-11}$. + + +--------- ----------------------- +true: 0.57721\ 56649\ 01533 + +approx: 0.57721\ 56648\ 77325 + +diff: 0.00000\ 00000\ 24207 +--------- ----------------------- + +Table: First method results. {#tbl:first} + +### Alternative formula + +As a first alternative, the constant was computed through the identity which +relates $\gamma$ to the $\Gamma$ function as follow: + +$$ + \gamma = \lim_{M \rightarrow + \infty} \sum_{k = 1}^{M} + \binom{M}{k} \frac{(-1)^k}{k} \ln(\Gamma(k + 1)) +$$ + +Varying $M$ from 1 to 100, the best result was obtained for $M = 41$ (see +@tbl:second). It went sour: the convergence is worse than using the definition +itself. Only two places were correctly computed. + +--------- ----------------------- +true: 0.57721\ 56649\ 01533 + +approx: 0.57225\ 72410\ 34058 + +diff: 0.00495\ 84238\ 67473 +--------- ----------------------- + +Table: Second method results. {#tbl:second} + +Here, the problem lies in the binomial term: computing the factorial of a +number greater than 18 goes over 15 places and so cannot be correctly +represented. Furthermore, the convergence (even if this is not a series +and, consequently, it is not properly a "convergence") is slowed down by +the logarithmic factor. + +### Reciprocal $\Gamma$ function + +A better result was found using the well known reciprocal $\Gamma$ function +formula: + +$$ + \frac{1}{\Gamma(z)} = z e^{yz} \prod_{k = 1}^{+ \infty} + \left( 1 + \frac{z}{k} \right) e^{-z/k} +$$ + +which gives: + +$$ + \gamma = - \frac{1}{z} \ln \left( z \Gamma(z) + \prod_{k = 1}^{+ \infty} \left( 1 + \frac{z}{k} \right) e^{-z/k} \right) +$$ + +The execution stops when there is no difference between two consecutive therms +of the infinite product (it happens for $k = 456565794$, meaning that for this +value of $k$, the term of the product is equal to 1). Different values of $z$ +were checked, with $z_{i+1} = z_i + 0.01$, ranging from 0 to 20 and the best +result was found for $z = 9$. As can be seen in @tbl:3_results, it's only by +chance, since all $|\gamma(z) - \gamma |$ are of the same order of magnitude. +The best one is compared with the exact value of $\gamma$ in @tbl:third. + +--------------------------------------------------------------- + z $|\gamma(z) - \gamma |$ z $|\gamma(z) - \gamma |$ +----- ------------------------ ------ ------------------------ + 1 \SI{9.712e-9}{} 8.95 \SI{9.770e-9}{} + + 3 \SI{9.320e-9}{} 8.96 \SI{9.833e-9}{} + + 5 \SI{9.239e-9}{} 8.97 \SI{9.622e-9}{} + + 7 \SI{9.391e-9}{} 8.98 \SI{9.300e-9}{} + + 9 \SI{8.482e-9}{} 8.99 \SI{9.059e-9}{} + + 11 \SI{9.185e-9}{} 9.00 \SI{8.482e-9}{} + + 13 \SI{9.758e-9}{} 9.01 \SI{9.564e-9}{} + + 15 \SI{9.747e-9}{} 9.02 \SI{9.260e-9}{} + + 17 \SI{9.971e-9}{} 9.03 \SI{9.264e-9}{} + + 19 \SI{10.084e-9}{} 9.04 \SI{9.419e-9}{} +--------------------------------------------------------------- + +Table: Differences between the obtained values of $\gamma$ and the exact +one. {#tbl:3_results} + +--------- ----------------------- +true: 0.57721\ 56649\ 01533 + +approx: 0.57721\ 56564\ 18607 + +diff: 0.00000\ 00084\ 82925 +--------- ----------------------- + +Table: Third method results for z = 9.00. {#tbl:third} + +This time, the convergence of the infinite product is fast enough to ensure the +$8^{th}$ place. + +### Fastest convergence formula + +The fastest known convergence belongs to the following formula: +(source: http://www.numberworld.org/y-cruncher/internals/formulas.html): + +$$ + \gamma = \frac{A(N)}{B(N)} -\frac{C(N)}{B^2(N)} - \ln(N) +$$ {#eq:faster} + +with: + +\begin{align*} + &A(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!} \cdot H(k) + \with H(k) = \sum_{j=1}^{k} \frac{1}{j} \\ + &B(N) = \sum_{k=1}^{+ \infty} \frac{N^k}{k!} \\ + &C(N) = \frac{1}{4N} \sum_{k=0}^{2N} + \frac{((2k)!)^3}{(k!)^4 \cdot (16k)^2k} \\ +\end{align*} + +The series $A$ and $B$ are computed till there is no difference between two +consecutive terms. +The number of desired correct decimals is given in input and $N$ is +consequently computed through a formula given in the same article +above-mentioned. Results are shown in @tbl:fourth. + +--------- ------------------------------ +true: 0.57721\ 56649\ 01532\ 75452 + +approx: 0.57721\ 56649\ 01532\ 86554 + +diff: 0.00000\ 00000\ 00000\ 11102 +--------- ------------------------------ + +Table: Fourth method results. {#tbl:fourth} + +Due to roundoff errors, the best results is obtained for $N = 10$. Since up to +15 places were correctly computed, an approximation of $\gamma$ better than the +one reached with the definition in @eq:gamma was obtained. + +### Arbitrary precision + +To overcome the issues related to the double representation, one can resort to a +representation with arbitrary precision. In the GMP library (which stands for +GNU Multiple Precision arithmetic), for example, real numbers can be +approximated by a ratio of two integers (fraction) with arbitrary precision: +this means a check is performed on every operation and in case of an integer +overflow, additional memory is requested and used to represent the larger result. +Additionally, the library automatically switches to the optimal algorithm +to compute an operation based on the size of the operands. + +The terms in @eq:faster can therefore be computed with arbitrarily large +precision. Thus, a program that computes the Euler-Mascheroni constant within +a user controllable precision has been implemented. Unlike the previously +mentioned programs, this one was more carefully optimized. + +The $A$ and $B$ series are computed up to an arbitrary limit $k_{\text{max}}$. +Different values of $k_{\text{max}}$ were tested but, obviously, they all +eventually reach a point where the approximation cannot guarantee the +requested precision; the solution turned out to be to let $k_{\text{max}}$ depends on +$N$. Consequently, $k_{\text{max}}$ was chosen to be $5N$, after it has been +verified to produce the correct digits up to 500 decimal places. + +The GMP library offers functions to perform some operations such as addition, +multiplication, division, etc. however, the logarithm function is not +implemented. Thus, most of the code carries out the $\ln(N)$ computation. +First, it should be noted that the logarithm of only some special numbers can +be computed with arbitrary precision, namely the ones of which a converging +series is known. This forces $N$ to be rewritten in the following way: + +$$ + N = N_0 \cdot b^e \thus \ln(N) = \ln(N_0) + e \cdot \ln(b) +$$ + +Since a fast converging series for $\ln(2)$ is known $b = 2$ was chosen. As +well as for the scientific notation, in order to get the mantissa $1 \leqslant +N_0 < 2$, the number of binary digits of $N$ must be computed (conveniently, a +dedicated function `mpz_sizeinbase()` can be found in GMP). If the digits are $n$: + +$$ + e = b - 1 \thus N_0 = \frac{N}{2^{n - 1}} +$$ + +Then, by defining: + +$$ + N_0 = \frac{1 + y}{1 - y} \thus y = \frac{N_0 - 1}{N_0 + 1} < 1 +$$ + +and the following series (which is convergent for $y < 1$) can therefore be +used: + +$$ + \ln \left( \frac{1 + y}{1 - y} \right) = + 2 \sum_{k = 0}^{+ \infty} \frac{y^{2k + 1}}{2k + 1} +$$ + +But when to stop computing the series? +Given a partial sum $S_k$ of the series, it is possible to know when a digit is +definitely correct, meaning that no matter how large $k$ can be, +it will not affect that decimal place. The key lies in the following concept. +Letting $S$ the value of the series: + +$$ + S_k + \frac{a_{k+1}}{1 - \frac{a_{k+1}}{a_k}} < S < S_k + a_k \frac{L}{1 -L} +$$ + +where $L$ is the limiting ratio of the series terms, which must be $< 1$ in +order for it to converge (in this case, it is easy to prove that $L = y^2$). +The width $\Delta S$ of the interval containing $S$, for a given $S_k$, gives +the precision of the partial sum with respect to $S$. By imposing $\Delta S < +1/10^D$ where $D$ is the correct decimal place required, the desired precision +is obtained. This is achieved by trials, checking at every step whether $\Delta +S$ is less or greater than $1/10^D$. + +The same holds for $\ln(2)$, which is given by the following series: + +$$ + \log(2) = \sum_{k=1}^{+ \infty} \frac{1}{k \cdot 2^k} +$$ + +In this case the ratio is $L = 1/2$. diff --git a/notes/sections/3.md b/notes/sections/3.md new file mode 100644 index 0000000..a9ddb55 --- /dev/null +++ b/notes/sections/3.md @@ -0,0 +1,423 @@ +# Exercise 3 {#sec:3} + +## Meson decay events generation + +A number of $N = 50'000$ points on the unit sphere, each representing a +particle detection event, must be generated according to then angular +probability distribution function $F_0$: + +\begin{align*} + F_0 (\theta, \phi) = \frac{3}{4 \pi} \Bigg[ + \frac{1}{2} (1 - \alpha_0) + \frac{1}{2} (3\alpha_0 - 1) \cos^2(\theta)& \\ + - \beta_0 \sin^2(\theta) \cos(2 \phi) + - \gamma_0 \sqrt{2} \sin(2 \theta) \cos(\phi)& \Bigg] +\end{align*} + +where $\theta$ and $\phi$ are, respectively, the polar and azimuthal angles, and + +$$ + \alpha_0 = 0.65 \et \beta_0 = 0.06 \et \gamma_0 = -0.18 +$$ + +To generate the points a *try and catch* method was employed: + +1. generate a point $(\theta , \phi)$ uniformly on a unit sphere +2. generate a third value $Y$ uniformly in $[0, 1]$ +3. if @eq:requirement is satisfied save the point +4. repeat from 1. + +$$ + Y < F_0(\theta, \phi) +$$ {#eq:requirement} + +To increase the efficiency of the procedure, the $Y$ values were actually +generated in $[0, 0.2]$, since $\max F_0 = 0.179$ for the given parameters +(an other option would have been generating the numbers in the range $[0 ; 1]$, +but all the points with $Y_{\theta, \phi} > 0.179$ would have been rejected with +the same probability, namely 1). + +While $\phi$ can simply be generated uniformly between 0 and $2 \pi$, for +$\theta$ one has to be more careful: by generating evenly distributed numbers +between 0 and $\pi$, the points turn out to cluster on the poles. The point +$(\theta,\phi)$ is *not* uniform on the sphere but is uniform on a cylindrical +projection of it. (The difference can be appreciated in @fig:compare). + +The proper distribution of $\theta$ is obtained by applying a transform +to a variable $x$ with uniform distribution in $[0, 1)$, +easily generated by the GSL function `gsl_rng_uniform()`. + +
+ ![Uniformly distributed points with $\theta$ evenly distributed between + 0 and $\pi$.](images/histo-i-u.pdf){width=45%} + ![Points uniformly distributed on a spherical + surface.](images/histo-p-u.pdf){width=45%} + + ![Sample generated according to $F$ with $\theta$ evenly distributed between + 0 and $\pi$.](images/histo-i-F.pdf){width=45%} + ![Sample generated according to $F$ with $\theta$ properly + distributed.](images/histo-p-F.pdf){width=45%} + +Examples of samples. On the left, points with $\theta$ evenly distributed +between 0 and $\pi$; on the right, points with $\theta$ properly distributed. +
+ +The transformation can be found by imposing the angular PDF to be a constant: + +\begin{align*} + \frac{d^2 P}{d \Omega^2} = \text{const} = \frac{1}{4 \pi} + \hspace{20pt} &\Longrightarrow \hspace{20pt} + d^2P = \frac{1}{4 \pi} \, d \Omega^2 = \frac{1}{4 \pi} \, + d \phi \sin(\theta) d \theta + \\ + &\Longrightarrow \hspace{20pt} + \frac{dP}{d \theta} = \int_0^{2 \pi} d \phi \, \frac{1}{4 \pi} \sin(\theta)= + \frac{1}{4 \pi} \sin(\theta) 2 \pi = \frac{1}{2} \sin(\theta) +\end{align*} +\begin{align*} + \theta = \theta (x) + \hspace{20pt} &\Longrightarrow \hspace{20pt} + \frac{dP}{d \theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d \theta} \right| + = \left. \frac{dP}{dx} \middle/ \, \left| \frac{d \theta}{dx} \right| \right. + \\ + &\Longrightarrow \hspace{20pt} + \frac{1}{2} \sin(\theta) = \left. 1 \middle/ \, \left| + \frac{d \theta}{dx} \right| \right. +\end{align*} + +Since $\theta \in [0, \pi]$, then the absolute value symbol can be omitted: + +\begin{align*} + \frac{d \theta}{dx} = \frac{2}{\sin(\theta)} + \hspace{20pt} &\Longrightarrow \hspace{20pt} + d \theta \sin(\theta) = 2 dx + \\ + &\Longrightarrow \hspace{20pt} + - \cos (\theta') |_{0}^{\theta} = 2[x(\theta) - x(0)] = 2(x - 0) = 2x + \\ + &\Longrightarrow \hspace{20pt} + - \cos(\theta) + 1 = 2x + \\ + &\Longrightarrow \hspace{20pt} + \theta = \text{acos} (1 -2x) +\end{align*} + +Since 1 is not in $[0 , 1)$, $\theta = \pi$ can't be generated. Being it just +a single point, the effect of this omission is negligible. + +\vspace{30pt} + + +## Parameter estimation + +The sample must now be used to estimate the parameters $\alpha_0$, +$\beta_0$ and $\gamma_0$ of the angular distribution $F_0$. + + +### Maximum Likelihood method + +The Maximum Likelihood method (MLM) is based on the observation that the best +estimate {$\bar{\alpha}$, $\bar{\beta}$, $\bar{\gamma}$} of the parameters +{$\alpha$, $\beta$, $\gamma$} should maximize the Likelihood function $L$ +defined in @eq:Like0, where the index $i$ runs over the sample and $F$ is the +function $F_0$ with free parameters $\alpha$, $\beta$ and $\gamma$: + +$$ + L = \prod_i F(\theta_i, \phi_i) = \prod_i F_i +$$ {#eq:Like0} + +The reason is that the points were generated according to the probability +distribution function $F$ with parameters $\alpha_0$, $\beta_0$ and $\gamma_0$, +thus the probability $P$ of sampling this special sample is, by definition, the +product of their probabilities $F_i$ and it must be maximum for {$\alpha$, +$\beta$, $\gamma$} = {$\alpha_0$, $\beta_0$, $\gamma_0$}. Thus, by viewing $F_i$ +as a function of the parameters, by maximizing $P = L$ with respect +to $\alpha$, $\beta$ and $\gamma$, a good estimate should be found. +Instead of actually maximising $L$, the function $-\ln(L)$ +was minimised, as minimisation methods as usually described in literature +and $\ln$ since it simplifies the math: + +$$ + L = \prod_i F_i \thus - \ln(L) = - \sum_{i} \ln(F_i) +$$ {#eq:Like} + + +The problem of multidimensional minimisation was tackled using the GSL library +`gsl_multimin.h`, which implements a variety of methods, including the +conjugate gradient Fletcher-Reeves algorithm, used in the solution, which requires +the implementation of $-\ln(L)$ and its gradient. + +To minimise a function $f$, given an initial guess point $x_0$, the gradient +$\nabla f$ is used to find the initial direction $v_0$ (the steepest descent) along +which a line minimisation is performed: the minimum occurs where the +directional derivative along $v_0$ is zero: +$$ + \frac{\partial f}{\partial v_0} = \nabla f \cdot v_0 = 0 +$$ +or, equivalently, at the point where the gradient is orthogonal to $v_0$. +After this first step the following procedure is iterated: + + 1. Find the steepest descent $v_n$. + 2. Compute the *conjugate* direction: $w_n = v_n + \beta_n w_{n-1}$ + 3. Perform a line minimisation along $w_n$ + 4. Update the position $x_{n+1} = x_n + \alpha_n w_n$ + +where $\alpha_n$ is line minimum and $\beta_n$ is given by the Fletcher-Reeves +formula: +$$ + \beta = \frac{\|\nabla f(x_n)\|^2}{\|\nabla f(x_{n-1})\|^2} +$$ + +The accuracy of each line minimisation is controlled the parameter `tol`, +meaning: + +$$ + w \cdot \nabla f < \text{tol} \, \|w\| \, \|\nabla f\| +$$ + +The minimisation is quite unstable and this forced the starting point to be +taken very close to the solution, namely: + +$$ + \alpha_{sp} = 0.79 \et + \beta_{sp} = 0.02 \et + \gamma_{sp} = -0.17 +$$ + +The overall minimisation ends when the gradient module is smaller than $10^{-4}$. +The program took 25 of the above iterations to reach the result shown in @eq:Like_res. + +The Cramér-Rao bound states that the covariance matrix of parameters estimated +by MLM is greater than the of the inverse of the Hessian matrix of $-\log(L)$ +at the minimum. Thus, the Hessian matrix was computed +analytically and inverted by a Cholesky decomposition, which states that +every positive definite symmetric square matrix $H$ can be written as the +product of a lower triangular matrix $L$ and its transpose $L^T$: + +$$ + H = L \cdot L^T +$$ + +The Hessian matrix is, indeed, both symmetric and positive definite, when +computed in a minimum. Once decomposed the inverse is given by + +$$ + H^{-1} = (L \cdot L^T)^{-1} = (L^{-1})^T \cdot L^{-1} +$$ +The inverse of $L$ is easily computed since it's a triangular matrix. + +The GSL library provides two functions `gsl_linalg_cholesky_decomp()` and +`gsl_linalg_cholesky_invert()` to decompose and invert in-place a matrix. +The result is shown below: + +$$ + \bar{\alpha_L} = 0.6541 \et + \bar{\beta_L} = 0.06125 \et + \bar{\gamma_L} = -0.1763 +$$ {#eq:Like_res} + +$$ +\text{cov}_L = \begin{pmatrix} + 9.225 \cdot 10^{-6} & -1.785 \cdot 10^{-6} & 2.41 \cdot 10^{-7} \\ +-1.785 \cdot 10^{-6} & 4.413 \cdot 10^{-6} & 1.58 \cdot 10^{-6} \\ + 2.41 \cdot 10^{-7} & 1.58 \cdot 10^{-6} & 2.602 \cdot 10^{-6} +\end{pmatrix} +$$ {#eq:Like_cov} + +Hence: +\begin{align*} + \alpha_L &= 0.654 \pm 0.003 \\ + \beta_L &= 0.0613 \pm 0.0021 \\ + \gamma_L &= -0.1763 \pm 0.0016 +\end{align*} + + +### Least squares estimation + +Another method that can be used to estimate {$\alpha$, $\beta$, $\gamma$} are +the least squares (LSQ), which is a multidimensional minimisation +specialised to the case of sum of squared functions called residuals. +The residuals can be anything in general but are usually interpreted as the +difference between an expected (fit) and an observed value. The least squares +then correspond to the expected values that best fit the observations. + +To apply the LSQ method, the data must be properly binned, meaning that each +bin should contain a significant number of events (say greater than four or +five): in this case, 30 bins for $\theta$ and 60 for $\phi$ turned out +to be satisfactory. The expected values were given as the product of $N$, $F$ +computed in the geometric center of the bin and the solid angle enclosed by the +bin itself, namely: + +$$ + E_{\theta, \phi} = N F(\theta, \phi) \, \Delta \theta \, \Delta \phi \, + \sin{\theta} +$$ + +Once the data are binned, the number of events in each bin plays the role of the +observed value, while the expected one is given by the probability of finding +an event in that bin multiplied by the total number of points, $N$. Then, the +$\chi^2$, defined as follow, must be minimized with respect to the three +parameters to be estimated: + +$$ + \chi^2 = \sum_i f_i^2 = \|\vec f\|^2 \with f_i = \frac{O_i - E_i}{\sqrt{E_i}} +$$ + +where the sum runs over the bins, $O_i$ are the observed values and $E_i$ the +expected ones. Besides the more generic `gsl_multimin.h` library, GSL provides a +special one for least square minimisation, called `gsl_multifit_nlinear.h`. A +trust region method was used to minimize the $\chi^2$. +In such methods, the $\chi^2$, its gradient and its Hessian matrix are computed +in a series of points in the parameters space till the gradient and the matrix +reach given proper values, meaning that the minimum has been well approached, +where "well" is related to that values. +If {$x_1 \dots x_p$} are the parameters which must be estimated, first $\chi^2$ +is computed in the starting point $\vec{x_k}$, then its value in a point +$\vec{x}_k + \vec{\delta}$ is modelled by a function $m_k (\vec{\delta})$ which +is the second order Taylor expansion around the point $\vec{x}_k$, namely: + +$$ + \chi^2 (\vec{x}_k + \vec{\delta}) \sim m_k (\vec{\delta}) = + \chi^2 (\vec{x}_k) + \nabla_k^T \vec{\delta} + \frac{1}{2} + \vec{\delta}^T H_k \vec{\delta} +$$ + +where $\nabla_k$ and $H_k$ are the gradient and the Hessian matrix of $\chi^2$ +in the point $\vec{x}_k$ and the superscript $T$ stands for the transpose. The +initial problem is reduced the so called trust-region subproblem (TRS), which +is the minimisation of $m_k(\vec{\delta})$ in a region where +$m_k(\vec{\delta})$ should be a good approximation of $\chi^2 (\vec{x}_k ++ \vec{\delta})$, given by the condition: +$$ + \|D_k \vec\delta\| < \Delta_k +$$ +If $D_k = I$ the region is a hypersphere of radius $\Delta_k$ +centered at $\vec{x}_k$ but can be deformed according to $D_k$. This is +necessary in the case one or more parameters have scale very different scales. + +Given an initial point $x_0$, radius $\Delta_0$, scale matrix $D_0$ +and step $\vec\delta_0$ the LSQ algorithm consist in the following iteration: + + 1. Construct the function $m_k(\vec\delta)$. + 2. Solve the TRS for step $\vec\delta_k$. + 3. Check whether the solution actually decreases $\chi^2$ + 1. If positive increase the trust region radius $\Delta_{k+1} = + \alpha\Delta_k$ and shift the position $\vec x_{k+1} = \vec x_k + + \vec \delta_k$. + 2. If negative decrease the radius $\Delta_{k+1} = \Delta_k/\beta$. + 4. Repeat + +This method is advantageous compared to a general minimisation because +the TRS usually amounts to solving a linear system. There are many algorithms +to solve the problem, in this case the Levenberg-Marquardt was used. It is based +on a theorem that proves the existence of a number $\mu_k$ such that +\begin{align*} + \Delta_k \mu_k = \|D_k \vec\delta_k\| && + (H_k + \mu_k D_k^TD_k) \vec\delta_k = -\nabla_k +\end{align*} +Using the approximation[^1] $H\approx J^TJ$, obtained by computing the Hessian +of the first-order Taylor expansion of $\chi^2$, $\vec\delta_k$ can +be found by solving the system + +$$ +\begin{cases} + J_k \vec{\delta_k} + \vec{f_k} = 0 \\ + \sqrt{\mu_k} D_k \vec{\delta_k} = 0 +\end{cases} +$$ + +[^1]: Here $J_{ij} = \partial f_i/\partial x_j$ is the Jacobian matrix of the +vector-valued function $\vec f(\vec x)$. + +The algorithm terminates if on of the following condition are satisfied: + + 1. $|\delta_i| \leq \text{xtol} (|x_i| + \text{xtol})$ for every component + of $\vec \delta$. + 2. $\max_i |\nabla_i \cdot \max(x_i, 1)| \leq \text{gtol}\cdot \max(\chi^2, 1)$ + 3. $\|\vec f(\vec x+ \vec\delta) - \vec f(\vec x)|| \leq \text{ftol} + \cdot \max(\|\vec f(\vec x)\|, 1)$ + +These tolerance have all been set to \SI{1e-8}{}. The program converged in 7 +iterations giving the results below. The covariance of the parameters can again +been estimated through the Hessian matrix at the minimum. The following results +were obtained: + +$$ + \bar{\alpha_{\chi}} = 0.6537 \et + \bar{\beta_{\chi}} = 0.05972 \et + \bar{\gamma_{\chi}} = -0.1736 +$$ {#eq:lsq-res} + +$$ +\text{cov}_{\chi} = \begin{pmatrix} + 9.244 \cdot 10^{-6} & -1.66 \cdot 10^{-6} & 2.383 \cdot 10^{-7} \\ +-1.66 \cdot 10^{-6} & 4.495 \cdot 10^{-6} & 1.505 \cdot 10^{-6} \\ + 2.383 \cdot 10^{-7} & 1.505 \cdot 10^{-6} & 2.681 \cdot 10^{-6} +\end{pmatrix} +$$ + +Hence: +\begin{align*} + &\alpha_{\chi} = 0.654 \pm 0.003 \\ + &\beta_{\chi} = 0.0597 \pm 0.0021 \\ + &\gamma_{\chi} = -0.1736 \pm 0.0016 +\end{align*} + + +### Results compatibility + +In order to compare the values $x_L$ and $x_{\chi}$ obtained from both methods +with the correct ones (namely {$\alpha_0$, $\beta_0$, $\gamma_0$}), the +following compatibility t-test was applied: + +$$ + p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with + t = \frac{x_i - x_0}{\sqrt{\sigma_{x_i}^2 + \sigma_{x_0}^2}} + = \frac{x_i - x_0}{\sigma_{x_i}} +$$ + +where $i$ stands either for the MLM or LSQ parameters and $\sigma_{x_i}$ is the +uncertainty of the value $x_i$. At 95% confidence level, the values are +compatible if $p > 0.05$. + +Likelihood results: + +---------------------------- + par $p$-value +------------ --------------- + $\alpha_L$ 0.18 + + $\beta_L$ 0.55 + + $\gamma_L$ 0.023 +---------------------------- + +Table: Likelihood results compatibility. + +$\chi^2$ results: + +--------------------------------- + par $p$-value +----------------- --------------- + $\alpha_{\chi}$ 0.22 + + $\beta_{\chi}$ 0.89 + + $\gamma_{\chi}$ 0.0001 +--------------------------------- + +Table: $\chi^2$ results compatibility. + +It can be concluded that only the third parameter, $\gamma$ is not compatible +with the expected one in both cases. An in-depth analysis of the algebraic +arrangement of $F$ would be required in order to justify this outcome. + +\vspace{30pt} + +## Isotropic hypothesis testing + +What if the probability distribution function was isotropic? Could it be +compatible with the found results? If $F$ was isotropic, then $\alpha_I$, +$\beta_I$ and $\gamma_I$ would be $1/3$, 0, and 0 respectively, since this +gives $F_I = 1/{4 \pi}$. The t-test gives a $p$-value approximately zero for all +the three parameters, meaning that there is no compatibility at all with this +hypothesis. diff --git a/notes/sections/4.md b/notes/sections/4.md new file mode 100644 index 0000000..29b162c --- /dev/null +++ b/notes/sections/4.md @@ -0,0 +1,269 @@ +# Exercise 4 + +**Kinematic dip pdf derivation** + +Consider a great number of non-interacting particles, each of which with a +random momentum $\vec{p}$ with module between 0 and $p_{\text{max}}$ randomly +angled with respect to a coordinate system {$\hat{x}$, $\hat{y}$, $\hat{z}$}. +Once the polar angle $\theta$ is defined, the momentum vertical and horizontal +components of a particle, which will be referred as $p_v$ and $p_h$, are the +ones shown in @fig:components. +If $\theta$ is evenly distributed on the sphere and the same holds for the +module $p$, which distribution will the average value of the absolute value of +$p_v$ follow as a function of $p_h$? + +\begin{figure} +\hypertarget{fig:components}{% +\centering +\begin{tikzpicture} + % Axes + \draw [thick, ->] (5,2) -- (5,8); + \draw [thick, ->] (5,2) -- (2,1); + \draw [thick, ->] (5,2) -- (8,1); + \node at (1.5,0.9) {$x$}; + \node at (8.5,0.9) {$y$}; + \node at (5,8.4) {$z$}; + % Momentum + \definecolor{cyclamen}{RGB}{146, 24, 43} + \draw [ultra thick, ->, cyclamen] (5,2) -- (3.8,6); + \draw [thick, dashed, cyclamen] (3.8,0.8) -- (3.8,6); + \draw [thick, dashed, cyclamen] (5,7.2) -- (3.8,6); + \draw [ultra thick, ->, pink] (5,2) -- (5,7.2); + \draw [ultra thick, ->, pink] (5,2) -- (3.8,0.8); + \node at (4.8,1.1) {$\vec{p_h}$}; + \node at (5.5,6.6) {$\vec{p_v}$}; + \node at (3.3,5.5) {$\vec{p}$}; + % Angle + \draw [thick, cyclamen] (4.4,4) to [out=80,in=210] (5,5); + \node at (4.7,4.2) {$\theta$}; +\end{tikzpicture} +\caption{Momentum components.}\label{fig:components} +} +\end{figure} + +The aim is to compute $\langle |p_v| \rangle (p_h) dp_h$. + +Consider all the points with $p_h \in [p_h ; p_h - dp_h]$: the values of +$p_v$ that these points can assume depend on $\theta$ and the total momentum +length $p$. + +$$ + \begin{cases} + p_h = p \sin{\theta} \\ p_v = p \cos{\theta} + \end{cases} + \thus |p_v| = p |\cos{\theta}| = p_h \frac{|\cos{\theta}|}{\sin{\theta}} + = |p_v| (\theta) +$$ + +It looks like the dependence on $p$ has disappeared, but obviously it has +not. In fact, it lies beneath the limits that one has to put on the possible +values of $\theta$. For the sake of clarity, take a look at @fig:sphere (the +system is rotation-invariant, hence it can be drown at a fixed azimuthal angle). + +\begin{figure} +\hypertarget{fig:sphere}{% +\centering +\begin{tikzpicture} + % p_h slice + \definecolor{cyclamen}{RGB}{146, 24, 43} + \filldraw [cyclamen!15!white] (1.5,-3.15) -- (1.5,3.15) -- (1.75,3.05) + -- (2,2.85) -- (2,-2.85) -- (1.75,-3.05) + -- (1.5,-3.15); + \draw [cyclamen] (1.5,-3.15) -- (1.5,3.15); + \draw [cyclamen] (2,-2.9) -- (2,2.9); + \node [cyclamen, left] at (1.5,-0.3) {$p_h$}; + \node [cyclamen, right] at (2,-0.3) {$p_h + dp_h$}; + % Axes + \draw [thick, ->] (0,-4) -- (0,4); + \draw [thick, ->] (0,0) -- (4,0); + \node at (0.3,3.8) {$z$}; + \node at (4,0.3) {$hd$}; + % p_max semicircle + \draw [thick, cyclamen] (0,-3.5) to [out=0, in=-90] (3.5,0); + \draw [thick, cyclamen] (0,3.5) to [out=0, in=90] (3.5,0); + \node [cyclamen, left] at (-0.2,3.5) {$p_{\text{max}}$}; + \node [cyclamen, left] at (-0.2,-3.5) {$-p_{\text{max}}$}; + % Angles + \draw [thick, cyclamen, ->] (0,1.5) to [out=0, in=140] (0.55,1.2); + \node [cyclamen] at (0.4,2) {$\theta$}; + \draw [thick, cyclamen] (0,0) -- (1.5,3.15); + \node [cyclamen, above right] at (1.5,3.15) {$\theta_x$}; + \draw [thick, cyclamen] (0,0) -- (1.5,-3.15); + \node [cyclamen, below right] at (1.5,-3.15) {$\theta_y$}; + % Vectors + \draw [ultra thick, cyclamen, ->] (0,0) -- (1.7,2.2); + \draw [ultra thick, cyclamen, ->] (0,0) -- (1.9,0.6); + \draw [ultra thick, cyclamen, ->] (0,0) -- (1.6,-2); +\end{tikzpicture} +\caption{Momentum space at fixed azimuthal angle ("$hd$" stands for +"horizontal direction"). Some vectors with $p_h \in [p_h, p_h +dp_h]$ +are evidenced.}\label{fig:sphere} +} +\end{figure} + +As can be seen, $\theta_x$ and $\theta_y$ are the minimum and maximum tilts +angles of these vectors respectively, because if a point had $p_h \in [p_h; p_h ++ dp_h]$ and $\theta < \theta_x$ or $\theta > \theta_y$, it would have $p > +p_{\text{max}}$. Therefore their values are easily computed as follow: + +$$ + p_h = p_{\text{max}} \sin{\theta_x} = p_{\text{max}} \sin{\theta_y} + \thus \sin{\theta_x} = \sin{\theta_y} = \frac{p_h}{p_{\text{max}}} +$$ + +Since the average value of a quantity is computed by integrating it over all the +possible quantities it depends on weighted on their probability, one gets: + +$$ + \langle |p_v| \rangle (p_h) dp_h = \int_{\theta_x}^{\theta_y} + d\theta P(\theta) \cdot P(p) \, dp \cdot |p_v| (\theta) +$$ + +where $d\theta P(\theta)$ is the probability of generating a point with $\theta +\in [\theta; \theta + d\theta]$ and $P(p) \, dp$ is the probability of +generating a point with $\vec{p}$ in the pink region in @fig:sphere, given a +fixed $\theta$. +The easiest to deduce is $P(p) \, dp$: since $p$ is evenly distributed, it +follows that: + +$$ + P(p) \, dp = \frac{1}{p_{\text{max}}} dp +$$ + +with: + +$$ + dp = p(p_h + dp_h) - p(p_h) + = \frac{p_h + dp_h}{\sin{\theta}} - \frac{p_h}{\sin{\theta}} + = \frac{dp_h}{\sin{\theta}} +$$ + +hence + +$$ + P(p) \, dp = \frac{1}{p_{\text{max}}} \cdot \frac{1}{\sin{\theta}} \, dp_h +$$ + +For $d\theta P(\theta)$, instead, one has to do the same considerations done +in @sec:3, from which: + +$$ + P(\theta) d\theta = \frac{1}{2} \sin{\theta} d\theta +$$ + +Ultimately, having found all the pieces, the integral must be computed: + +\begin{align*} + \langle |p_v| \rangle (p_h) dp_h &= \int_{\theta_x}^{\theta_y} + d\theta \frac{1}{2} \sin{\theta} \cdot + \frac{1}{p_{\text{max}}} \frac{1}{\sin{\theta}} \, dp_h \cdot + p_h \frac{|\cos{\theta}|}{\sin{\theta}} + \\ + &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \int_{\theta_x}^{\theta_y} + d\theta \frac{|\cos{\theta}|}{\sin{\theta}} + \\ + &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} \cdot \mathscr{O} +\end{align*} + +Then, with a bit of math: + +\begin{align*} + \mathscr{O} &= \int_{\theta_x}^{\theta_y} d\theta + \frac{|\cos{\theta}|}{\sin{\theta}} + \\ + &= \int_{\theta_x}^{\frac{\pi}{2}} d\theta \frac{\cos{\theta}}{\sin{\theta}} + - \int_{\frac{\pi}{2}}^{\theta_y} d\theta \frac{\cos{\theta}}{\sin{\theta}} + \\ + &= \left[ \ln{(\sin{\theta})} \right]_{\theta_x}^{\frac{\pi}{2}} + - \left[ \ln{(\sin{\theta})} \right]_{\frac{\pi}{2}}^{\theta_y} + \\ + &= \ln{(1)} -\ln{ \left( \frac{p_h}{p_{\text{max}}} \right) } + - \ln{ \left( \frac{p_h}{p_{\text{max}}} \right) } + \ln{(1)} + \\ + &= 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) } +\end{align*} + +\newpage +Hence, in conclusion: + +\begin{align*} + \langle |p_v| \rangle (p_h) dp_h &= \frac{1}{2} \frac{p_h dp_h}{p_{\text{max}}} + \cdot 2 \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) } + \\ + &= \ln{ \left( \frac{p_{\text{max}}}{p_h} \right) } + \frac{p_h}{p_{\text{max}}} dp_h +\end{align*} + +Namely: + +\begin{figure} +\hypertarget{fig:plot}{% +\centering +\begin{tikzpicture} + \definecolor{cyclamen}{RGB}{146, 24, 43} + % Axis + \draw [thick, <->] (0,5) -- (0,0) -- (11,0); + \node [below right] at (11,0) {$p_h$}; + \node [above left] at (0,5) {$\langle |p_v| \rangle$}; + % Plot + \draw [domain=0.001:10, smooth, variable=\x, + cyclamen, ultra thick] plot ({\x},{12*ln(10/\x)*\x/10}); + \node [cyclamen, below] at (10,0) {$p_{\text{max}}$}; +\end{tikzpicture} +\caption{Plot of the expected distribution.}\label{fig:plot} +} +\end{figure} + +**The code** + +The same distribution should be found by generating and binning points in a +proper way. +A number of $N = 50'000$ points were generated as a couple of values ($p$, +$\theta$), with $p$ evenly distrinuted between 0 and $p_{\text{max}}$ and +$\theta$ given by the same procedure described in @sec:3, namely: + +$$ + \theta = \text{acos}(1 - 2x) +$$ + +with $x$ uniformely distributed between 0 and 1. +The data binning turned out to be a bit challenging. Once $p$ was sorted and +$p_h$ was computed, the bin in which the latter goes in must be found. If $n$ is +the number of bins in which the range [0, $p_{\text{max}}$] is willing to be +divided into, then the width $w$ of each bin is given by: + +$$ + w = \frac{p_{\text{max}}}{n} +$$ + +and the $j^{th}$ bin in which $p_h$ goes in is: + +$$ + j = \text{floor} \left( \frac{p_h}{w} \right) +$$ + +where 'floor' is the function which gives the upper integer lesser than its +argument and the bins are counted starting from zero. +Then, a vector in which the $j^{\text{th}}$ entry contains the sum $S_j$ of all +the $|p_v|$s relative to each $p_h$ fallen into the $j^{\text{th}}$ bin and the +number num$_j$ of entries in the $j^{\text{th}}$ bin was reiteratively updated. +At the end, the average value of $|p_v|_j$ was computed as $S_j / +\text{num}_j$. +For the sake of clarity, for each sorted couple, it works like this: + + - the couple $[p; \theta]$ is generated; + - $p_h$ and $p_v$ are computed; + - the $j^{\text{th}}$ bin in which $p_h$ goes in is computed; + - num$_j$ is increased by 1; + - $S_j$ (which is zero at the beginning of everything) is increased by a factor + $|p_v|$. + +At the end, $\langle |p_h| \rangle_j$ = $\langle |p_h| \rangle (p_h)$ where: + +$$ + p_h = j \cdot w + \frac{w}{2} = w \left( 1 + \frac{1}{2} \right) +$$ + +The following result was obtained: + +![Histogram of the obtained distribution.](images/dip.pdf) diff --git a/notes/sections/5.md b/notes/sections/5.md new file mode 100644 index 0000000..bfa3971 --- /dev/null +++ b/notes/sections/5.md @@ -0,0 +1,113 @@ +# Exercize 5 + +**Numerically compute an integral value via Monte Carlo approaches** + +The integral to be evaluated is the following: + +$$ + I = \int\limits_0^1 dx \, e(x) +$$ + +whose exact value is 1.7182818285... +The three most popular MC methods where applied: plain Monte Carlo, Miser and +Vegas (besides this popularity fact, these three method were chosen for being +the only ones implemented in the GSL library). + +## Plain Monte Carlo + +When the integral $I$ over a volume $V$ of a function $f$ must be evaluated in +a $n-$dimensional space, the simplest MC method approach is to sample $N$ +points $x_i$ evenly distributed in $V$ and approx $I$ as: + +$$ + I \sim I_N = \frac{V}{N} \sum_{i=1}^N f(x_i) = V \cdot \langle f \rangle +$$ + +with $I_N \rightarrow I$ for $N \rightarrow + \infty$ for the law of large +numbers. Hence, the variance can be merely extimated as: + +$$ + \sigma^2 = + \frac{1}{N-1} \sum_{i = 1}^N \left( f(x_i) - \langle f \rangle \right)^2 +$$ + +thus, the error on $I_N$ decreases as $1/\sqrt{N}$. Unlike in deterministic +methods, the estimate of the error is not a strict error bound: random sampling +may not uncover all the important features of the integrand that can result in +an underestimate of the error. +Since the proximity of $I_N$ to $I$ is related to $N$, the accuracy of the +method is determined by the number of function calls when implemented (proof +in @tbl:MC). + +----------------------------------------------------------------- + 500'000 calls 5'000'000 calls 50'000'000 calls +--------- ----------------- ------------------ ------------------ +result 1.7166435813 1.7181231109 1.7183387184 + +$\sigma$ 0.0006955691 0.0002200309 0.0000695809 +----------------------------------------------------------------- + +Table: MC results and errors with different numbers of function +calls. {#tbl:MC} + +## Miser + +The MISER algorithm is based on recursive stratified sampling. +On each recursion step the integral and the error are estimated using a plain +Monte Carlo algorithm. If the error estimate is larger than the required +accuracy, the integration volume is divided into sub-volumes and the procedure +is recursively applied to sub-volumes. +This technique aims to reduce the overall integration error by concentrating +integration points in the regions of highest variance. +The idea of stratified sampling begins with the observation that for two +disjoint regions $a$ and $b$ with Monte Carlo estimates of the integral +$E_a (f)$ and $E_b (f)$ and variances $\sigma_a^2 (f)$ and $\sigma_b^2 (f)$, +the variance $V (f)$ of the combined estimate $E (f)$: + +$$ + E (f)= \frac {1}{2} \left( E_a (f) + E_b (f) \right) +$$ + +is given by, + +$$ + V(f) = \frac{\sigma_a^2 (f)}{4 N_a} + \frac{\sigma_b^2 (f)}{4 N_b} +$$ + +It can be shown that this variance is minimized by distributing the points such that, + +$$ + \frac{N_a}{N_a + N_b} = \frac{\sigma_a}{\sigma_a + \sigma_b} + \cdot \frac{N_a}{N_a + N_b} + = \frac{\sigma_a}{\sigma_a + \sigma b} +$$ + +Hence the smallest error estimate is obtained by allocating sample points in proportion +to the standard deviation of the function in each sub-region. + +--- + +--------------------------------------------------------- + calls plain MC Miser Vegas +------------ -------------- -------------- -------------- + 500'000 1.7166435813 1.7182850738 1.7182818354 + + 5'000'000 1.7181231109 1.7182819143 1.7182818289 + + 50'000'000 1.7183387184 1.7182818221 1.7182818285 +--------------------------------------------------------- + +Table: Results of the three methods. {#tbl:results} + +--------------------------------------------------------- + calls plain MC Miser Vegas +------------ -------------- -------------- -------------- + 500'000 0.0006955691 0.0000021829 0.0000000137 + + 5'000'000 0.0002200309 0.0000001024 0.0000000004 + + 50'000'000 0.0000695809 0.0000000049 0.0000000000 +--------------------------------------------------------- + +Table: $\sigma$s of the three methods. {#tbl:sigmas} + diff --git a/notes/sections/6.md b/notes/sections/6.md new file mode 100644 index 0000000..91a585b --- /dev/null +++ b/notes/sections/6.md @@ -0,0 +1,85 @@ +# Exercise 6 + +**Generating points according to Fraunhofer diffraction** + +The diffraction of a plane wave thorough a round slit must be simulated by +generating $N =$ 50'000 points according to the intensity distribution +$I(\theta)$ on a screen at a great distance $L$ from the slit iself: + +$$ + I(\theta) = \frac{E^2}{2} \left( \frac{2 \pi a^2 \cos{\theta}}{L} + \frac{J_1(x)}{x} \right)^2 \with x = k a \sin{\theta} +$$ + +where: + +- $E$ is the electric field amplitude, default set $E = \SI{1e4}{V/m}$; +- $a$ is the radius of the slit aperture, default set $a = \SI{0.01}{m}$; +- $\theta$ is the angle specified in @fig:fenditure; +- $J_1$ is the Bessel function of first order; +- $k$ is the wavenumber, default set $k = \SI{1e-4}{m^{-1}}$; +- $L$ default set $L = \SI{1}{m}$. + +\begin{figure} +\hypertarget{fig:fenditure}{% +\centering +\begin{tikzpicture} + \definecolor{cyclamen}{RGB}{146, 24, 43} + % Walls + \draw [thick] (-1,3) -- (1,3) -- (1,0.3) -- (1.2,0.3) -- (1.2,3) + -- (9,3); + \draw [thick] (-1,-3) -- (1,-3) -- (1,-0.3) -- (1.2,-0.3) -- (1.2,-3) + -- (9,-3); + \draw [thick] (10,3) -- (9.8,3) -- (9.8,-3) -- (10,-3); + % Lines + \draw [thick, gray] (0.7,0.3) -- (0.5,0.3); + \draw [thick, gray] (0.7,-0.3) -- (0.5,-0.3); + \draw [thick, gray] (0.6,0.3) -- (0.6,-0.3); + \draw [thick, gray] (1.2,0) -- (9.8,0); + \draw [thick, gray] (1.2,-0.1) -- (1.2,0.1); + \draw [thick, gray] (9.8,-0.1) -- (9.8,0.1); + \draw [thick, cyclamen] (1.2,0) -- (9.8,-2); + \draw [thick, cyclamen] (7,0) to [out=-90, in=50] (6.6,-1.23); + % Nodes + \node at (0,0) {$2a$}; + \node at (5.5,0.4) {$L$}; + \node [cyclamen] at (5.5,-0.4) {$\theta$}; + \node [rotate=-90] at (10.2,0) {screen}; +\end{tikzpicture} +\caption{Fraunhofer diffraction.}\label{fig:fenditure} +} +\end{figure} + +Once again, $\theta$, which must be evenly distributed on half sphere, can be +generated only as a function of a variable $x$ uniformely distributed between +0 and 1. Therefore: + +\begin{align*} + \frac{d^2 P}{d\omega^2} = const = \frac{1}{2 \pi} + &\thus d^2 P = \frac{1}{2 \pi} d\omega^2 = + \frac{1}{2 \pi} d\phi \sin{\theta} d\theta \\ + &\thus \frac{dP}{d\theta} = \int_0^{2 \pi} d\phi \frac{1}{2 \pi} \sin{\theta} + = \frac{1}{2 \pi} \sin{\theta} \, 2 \pi = \sin{\theta} +\end{align*} + +\begin{align*} + \theta = \theta (x) &\thus + \frac{dP}{d\theta} = \frac{dP}{dx} \cdot \left| \frac{dx}{d\theta} \right| + = \left. \frac{dP}{dx} \middle/ \, \left| \frac{d\theta}{dx} \right| \right. + \\ + &\thus \sin{\theta} = \left. 1 \middle/ \, \left| + \frac{d\theta}{dx} \right| \right. +\end{align*} + +Since $\theta \in [0, \pi/2]$, then the absolute value symbol can be omitted: + +\begin{align*} + \frac{d\theta}{dx} = \frac{1}{\sin{\theta}} + &\thus d\theta \sin(\theta) = dx + \\ + &\thus - \cos (\theta') |_{0}^{\theta} = x(\theta) - x(0) = x - 0 = x + \\ + &\thus - \cos(\theta) + 1 =x + \\ + &\thus \theta = \text{acos} (1 -x) +\end{align*} diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..c3feca5 --- /dev/null +++ b/shell.nix @@ -0,0 +1,10 @@ +with import { }; + +stdenv.mkDerivation { + name = "jeypsi"; + + GMP_PATH = "${gmp}/lib"; + + buildInputs = with pkgs; [ gdb gsl gmp pkgconfig ]; + shellHook = "exec fish"; +}