analistica/ex-3/likelihood.c

240 lines
6.2 KiB
C
Raw Permalink Normal View History

2020-03-06 02:24:32 +01:00
#include "likelihood.h"
#include <gsl/gsl_errno.h>
#include <gsl/gsl_multimin.h>
#include <gsl/gsl_linalg.h>
/* `f_logL(par, &sample)`
* gives the negative log-likelihood for the sample `sample`.
*/
double f_logL(const gsl_vector *par, void *sample_) {
struct sample sample = *((struct sample*) sample_);
double sum = 0;
for (size_t i = 0; i < sample.size; i++)
sum += log(fabs(distr(par, &sample.events[i])));
/* Rescale the function to avoid
* round-off errors during minimisation
*/
sum *= 1e-5;
return -sum;
}
/* `fdf_logL(par, &sample, fun, grad)`
* simultaneously computes the gradient and the value of
* the negative log-likelihood for the sample `sample`
* and stores the results in `fun` and `grad` respectively.
*/
void fdf_logL(const gsl_vector *par, void *sample_,
double *fun, gsl_vector *grad) {
struct sample sample = *((struct sample*) sample_);
double prob;
struct event *event;
gsl_vector *term = gsl_vector_alloc(3);
// Note: fun/grad are *not* guaranteed to be 0
gsl_vector_set_zero(grad);
*fun = 0;
for (size_t i = 0; i < sample.size; i++) {
event = &sample.events[i];
prob = distr(par, event);
/* The gradient of log(F(α,β,γ)) is:
* 1/F(α,β,γ) F(α,β,γ)
*/
grad_distr(par, event, term);
gsl_vector_scale(term, -1.0/prob);
gsl_vector_add(grad, term);
// compute function
*fun += log(fabs(prob));
}
/* Rescale the function to avoid
* round-off errors during minimisation
*/
*fun = -*fun*1e-5;
gsl_vector_scale(grad, 1e-5);
// free memory
gsl_vector_free(term);
}
/* `df_logL(par, &sample, grad)`
* gives the gradient of the negative log-likelihood
* for the sample `sample`. The result is stored in
* the `grad` gsl_vector.
*/
void df_logL(const gsl_vector *par, void *sample, gsl_vector *grad) {
double fun;
fdf_logL(par, sample, &fun, grad);
}
/* `hf_logL(par, &sample, hess)`
* gives the hessian matrix of the negative log-likelihood
* for the sample `sample`. The result is stored in
* the `hess` gsl_matrix.
*/
gsl_matrix* hf_logL(const gsl_vector *par, void *sample_) {
struct sample sample = *((struct sample*) sample_);
gsl_vector *grad = gsl_vector_calloc(3);
gsl_matrix *term = gsl_matrix_calloc(3, 3);
gsl_matrix *hess = gsl_matrix_calloc(3, 3);
double prob;
struct event *event;
for (size_t n = 0; n < sample.size; n++) {
/* Compute gradient and value of F */
event = &sample.events[n];
prob = distr(par, event);
grad_distr(par, event, grad);
/* Compute the hessian matrix of -log(F):
* H_ij = 1/F² F_i F_j
*/
for (size_t i = 0; i < 3; i++)
for (size_t j = 0; j < 3; j++) {
gsl_matrix_set(
term, i, j,
gsl_vector_get(grad, i) * gsl_vector_get(grad, j));
}
gsl_matrix_scale(term, 1.0/pow(prob, 2));
gsl_matrix_add(hess, term);
}
// free memory
gsl_vector_free(grad);
gsl_matrix_free(term);
return hess;
}
/* Maximum likelihood estimation of the parameters
* α,β,γ.
*/
min_result maxLikelihood(struct sample *sample) {
/* Starting point: α,β,γ = "something close
* to the solution", because the minimisation
* seems pretty unstable.
*/
gsl_vector *par = gsl_vector_alloc(3);
gsl_vector_set(par, 0, 0.79);
gsl_vector_set(par, 1, 0.02);
gsl_vector_set(par, 2, -0.17);
/* Initialise the minimisation */
gsl_multimin_function_fdf likelihood;
likelihood.n = 3;
likelihood.f = f_logL;
likelihood.df = df_logL;
likelihood.fdf = fdf_logL;
likelihood.params = sample;
/* The minimisation technique used
* is the conjugate gradient method. */
const gsl_multimin_fdfminimizer_type *type =
gsl_multimin_fdfminimizer_conjugate_fr;
gsl_multimin_fdfminimizer *m =
gsl_multimin_fdfminimizer_alloc(type, 3);
gsl_multimin_fdfminimizer_set(
m, // minimisation technique
&likelihood, // function to minimise
par, // initial parameters/starting point
1e-5, // first step length
0.1); // accuracy
size_t iter;
int status = GSL_CONTINUE;
/* Iterate the minimisation until the gradient
* is smaller that 10^-4 or fail.
*/
fputs("\n# maximum likelihood\n\n", stderr);
for (iter = 0; status == GSL_CONTINUE && iter < 100; iter++) {
status = gsl_multimin_fdfminimizer_iterate(m);
if (status)
break;
status = gsl_multimin_test_gradient(m->gradient, 1e-5);
/* Print the iteration, current parameters
* and gradient of the log-likelihood.
*/
if (iter % 5 == 0 || status == GSL_SUCCESS) {
fprintf(stderr, "%2ld\t", iter);
vector_fprint(stderr, m->x);
fputs("\t", stderr);
vector_fprint(stderr, m->gradient);
putc('\n', stderr);
}
}
fprintf(stderr, "status: %s\n", gsl_strerror(status));
fprintf(stderr, "iterations: %ld\n", iter);
/* Store results in the min_result type.
* Note: We allocate a new vector for the
* parameters because `m->x` will be freed
* along with m.
*/
min_result res;
res.par = gsl_vector_alloc(3);
res.err = gsl_vector_alloc(3);
res.cov = gsl_matrix_alloc(3, 3);
gsl_vector_memcpy(res.par, m->x);
/* Compute the standard errors of the estimates.
* The CramérRao bound states that the covariance
* matrix of the estimates is
*
* Σ I¹ = (-H)¹
*
* where:
* - I is called the Fisher information,
* - H is the hessian of log(L) at
* the maximum likelihood.
*/
res.cov = hf_logL(m->x, sample);
/* Invert H by Cholesky decomposition:
*
* H = LL^T H¹ = L^T¹L¹
*
* Note: H is positive defined (because -logL is
* minimised) and symmetric so this is valid.
*
*/
gsl_linalg_cholesky_decomp(res.cov);
gsl_linalg_cholesky_invert(res.cov);
/* Compute the standard errors
* from the covariance matrix.
*/
gsl_vector_const_view diagonal = gsl_matrix_const_diagonal(res.cov);
gsl_vector_memcpy(res.err, &diagonal.vector);
vector_map(&sqrt, res.err);
fputs("\n## results\n", stderr);
fputs("\n* parameters:\n ", stderr);
vector_fprint(stderr, res.par);
fputs("\n* covariance (CramérRao lower bound):\n", stderr);
matrix_fprint(stderr, res.cov);
// free memory
gsl_multimin_fdfminimizer_free(m);
return res;
}