analistica/ex-3/likelihood.c
2020-03-06 02:24:32 +01:00

240 lines
6.2 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "likelihood.h"
#include <gsl/gsl_errno.h>
#include <gsl/gsl_multimin.h>
#include <gsl/gsl_linalg.h>
/* `f_logL(par, &sample)`
* gives the negative log-likelihood for the sample `sample`.
*/
double f_logL(const gsl_vector *par, void *sample_) {
struct sample sample = *((struct sample*) sample_);
double sum = 0;
for (size_t i = 0; i < sample.size; i++)
sum += log(fabs(distr(par, &sample.events[i])));
/* Rescale the function to avoid
* round-off errors during minimisation
*/
sum *= 1e-5;
return -sum;
}
/* `fdf_logL(par, &sample, fun, grad)`
* simultaneously computes the gradient and the value of
* the negative log-likelihood for the sample `sample`
* and stores the results in `fun` and `grad` respectively.
*/
void fdf_logL(const gsl_vector *par, void *sample_,
double *fun, gsl_vector *grad) {
struct sample sample = *((struct sample*) sample_);
double prob;
struct event *event;
gsl_vector *term = gsl_vector_alloc(3);
// Note: fun/grad are *not* guaranteed to be 0
gsl_vector_set_zero(grad);
*fun = 0;
for (size_t i = 0; i < sample.size; i++) {
event = &sample.events[i];
prob = distr(par, event);
/* The gradient of log(F(α,β,γ)) is:
* 1/F(α,β,γ) ∇F(α,β,γ)
*/
grad_distr(par, event, term);
gsl_vector_scale(term, -1.0/prob);
gsl_vector_add(grad, term);
// compute function
*fun += log(fabs(prob));
}
/* Rescale the function to avoid
* round-off errors during minimisation
*/
*fun = -*fun*1e-5;
gsl_vector_scale(grad, 1e-5);
// free memory
gsl_vector_free(term);
}
/* `df_logL(par, &sample, grad)`
* gives the gradient of the negative log-likelihood
* for the sample `sample`. The result is stored in
* the `grad` gsl_vector.
*/
void df_logL(const gsl_vector *par, void *sample, gsl_vector *grad) {
double fun;
fdf_logL(par, sample, &fun, grad);
}
/* `hf_logL(par, &sample, hess)`
* gives the hessian matrix of the negative log-likelihood
* for the sample `sample`. The result is stored in
* the `hess` gsl_matrix.
*/
gsl_matrix* hf_logL(const gsl_vector *par, void *sample_) {
struct sample sample = *((struct sample*) sample_);
gsl_vector *grad = gsl_vector_calloc(3);
gsl_matrix *term = gsl_matrix_calloc(3, 3);
gsl_matrix *hess = gsl_matrix_calloc(3, 3);
double prob;
struct event *event;
for (size_t n = 0; n < sample.size; n++) {
/* Compute gradient and value of F */
event = &sample.events[n];
prob = distr(par, event);
grad_distr(par, event, grad);
/* Compute the hessian matrix of -log(F):
* H_ij = 1/F² ∇F_i ∇F_j
*/
for (size_t i = 0; i < 3; i++)
for (size_t j = 0; j < 3; j++) {
gsl_matrix_set(
term, i, j,
gsl_vector_get(grad, i) * gsl_vector_get(grad, j));
}
gsl_matrix_scale(term, 1.0/pow(prob, 2));
gsl_matrix_add(hess, term);
}
// free memory
gsl_vector_free(grad);
gsl_matrix_free(term);
return hess;
}
/* Maximum likelihood estimation of the parameters
* α,β,γ.
*/
min_result maxLikelihood(struct sample *sample) {
/* Starting point: α,β,γ = "something close
* to the solution", because the minimisation
* seems pretty unstable.
*/
gsl_vector *par = gsl_vector_alloc(3);
gsl_vector_set(par, 0, 0.79);
gsl_vector_set(par, 1, 0.02);
gsl_vector_set(par, 2, -0.17);
/* Initialise the minimisation */
gsl_multimin_function_fdf likelihood;
likelihood.n = 3;
likelihood.f = f_logL;
likelihood.df = df_logL;
likelihood.fdf = fdf_logL;
likelihood.params = sample;
/* The minimisation technique used
* is the conjugate gradient method. */
const gsl_multimin_fdfminimizer_type *type =
gsl_multimin_fdfminimizer_conjugate_fr;
gsl_multimin_fdfminimizer *m =
gsl_multimin_fdfminimizer_alloc(type, 3);
gsl_multimin_fdfminimizer_set(
m, // minimisation technique
&likelihood, // function to minimise
par, // initial parameters/starting point
1e-5, // first step length
0.1); // accuracy
size_t iter;
int status = GSL_CONTINUE;
/* Iterate the minimisation until the gradient
* is smaller that 10^-4 or fail.
*/
fputs("\n# maximum likelihood\n\n", stderr);
for (iter = 0; status == GSL_CONTINUE && iter < 100; iter++) {
status = gsl_multimin_fdfminimizer_iterate(m);
if (status)
break;
status = gsl_multimin_test_gradient(m->gradient, 1e-5);
/* Print the iteration, current parameters
* and gradient of the log-likelihood.
*/
if (iter % 5 == 0 || status == GSL_SUCCESS) {
fprintf(stderr, "%2ld\t", iter);
vector_fprint(stderr, m->x);
fputs("\t", stderr);
vector_fprint(stderr, m->gradient);
putc('\n', stderr);
}
}
fprintf(stderr, "status: %s\n", gsl_strerror(status));
fprintf(stderr, "iterations: %ld\n", iter);
/* Store results in the min_result type.
* Note: We allocate a new vector for the
* parameters because `m->x` will be freed
* along with m.
*/
min_result res;
res.par = gsl_vector_alloc(3);
res.err = gsl_vector_alloc(3);
res.cov = gsl_matrix_alloc(3, 3);
gsl_vector_memcpy(res.par, m->x);
/* Compute the standard errors of the estimates.
* The CramérRao bound states that the covariance
* matrix of the estimates is
*
* Σ ≥ I⁻¹ = (-H)⁻¹
*
* where:
* - I is called the Fisher information,
* - H is the hessian of log(L) at
* the maximum likelihood.
*/
res.cov = hf_logL(m->x, sample);
/* Invert H by Cholesky decomposition:
*
* H = LL^T ⇒ H⁻¹ = L^T⁻¹L⁻¹
*
* Note: H is positive defined (because -logL is
* minimised) and symmetric so this is valid.
*
*/
gsl_linalg_cholesky_decomp(res.cov);
gsl_linalg_cholesky_invert(res.cov);
/* Compute the standard errors
* from the covariance matrix.
*/
gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
gsl_vector_memcpy(res.err, &diagonal.vector);
vector_map(&sqrt, res.err);
fputs("\n## results\n", stderr);
fputs("\n* parameters:\n ", stderr);
vector_fprint(stderr, res.par);
fputs("\n* covariance (CramérRao lower bound):\n", stderr);
matrix_fprint(stderr, res.cov);
// free memory
gsl_multimin_fdfminimizer_free(m);
return res;
}