analistica/ex-7/main.c
2020-03-06 02:24:32 +01:00

434 lines
11 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
/* Parameters for bivariate
* gaussian PDF
*/
struct par {
double x0; // x mean
double y0; // y mean
double sigma_x; // x standard dev
double sigma_y; // y standard dev
double rho; // correlation: cov(x,y)/σx⋅σy
};
/* A sample of N 2D points is an
* N×2 matrix, with vectors as rows.
*/
typedef struct {
struct par p;
gsl_matrix *data;
} sample_t;
/* Create a sample of `n` points */
sample_t* sample_t_alloc(size_t n, struct par p) {
sample_t *x = malloc(sizeof(sample_t));
x->p = p;
x->data = gsl_matrix_alloc(n, 2);
return x;
}
/* Delete a sample */
void sample_t_free(sample_t *x) {
gsl_matrix_free(x->data);
free(x);
}
/* `generate_normal(r, n, p)` will create
* a sample of `n` points, distributed
* according to a bivariate gaussian distribution
* of parameters `p`.
*/
sample_t* generate_normal(
gsl_rng *r, size_t n, struct par *p) {
sample_t *s = sample_t_alloc(n, *p);
for (size_t i = 0; i < n; i++) {
/* Generate a vector (x,y) with
* a standard (μ = 0) bivariate
* gaussian PDF.
*/
double *x = gsl_matrix_ptr(s->data, i, 0);
double *y = gsl_matrix_ptr(s->data, i, 1);
gsl_ran_bivariate_gaussian(
r,
p->sigma_x, p->sigma_y, p->rho,
x, y);
/* Shift the vector to (x₀,y₀) */
*x += p->x0;
*y += p->y0;
}
return s;
}
/* Builds the covariance matrix Σ
* from the standard parameters (σ, ρ)
* of a bivariate gaussian.
*/
gsl_matrix* normal_cov(struct par *p) {
double var_x = pow(p->sigma_x, 2);
double var_y = pow(p->sigma_y, 2);
double cov_xy = p->rho * p->sigma_x * p->sigma_y;
gsl_matrix *cov = gsl_matrix_alloc(2, 2);
gsl_matrix_set(cov, 0, 0, var_x);
gsl_matrix_set(cov, 1, 1, var_y);
gsl_matrix_set(cov, 0, 1, cov_xy);
gsl_matrix_set(cov, 1, 0, cov_xy);
return cov;
}
/* Builds the mean vector of
* a bivariate gaussian.
*/
gsl_vector* normal_mean(struct par *p) {
gsl_vector *mu = gsl_vector_alloc(2);
gsl_vector_set(mu, 0, p->x0);
gsl_vector_set(mu, 1, p->y0);
return mu;
}
/* `fisher_proj(c1, c2)` computes the optimal
* projection map, which maximises the separation
* between the two classes.
* The projection vector w is given by
*
* w = Sw⁻¹ (μ₂ - μ₁)
*
* where Sw = Σ₁ + Σ₂ is the so-called within-class
* covariance matrix.
*/
gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) {
/* Construct the covariances of each class... */
gsl_matrix *cov1 = normal_cov(&c1->p);
gsl_matrix *cov2 = normal_cov(&c2->p);
/* and the mean values */
gsl_vector *mu1 = normal_mean(&c1->p);
gsl_vector *mu2 = normal_mean(&c2->p);
/* Compute the inverse of the within-class
* covariance Sw⁻¹.
* Note: by definition Σ is symmetrical and
* positive-definite, so Cholesky is appropriate.
*/
gsl_matrix_add(cov1, cov2);
gsl_linalg_cholesky_decomp(cov1);
gsl_linalg_cholesky_invert(cov1);
/* Compute the difference of the means. */
gsl_vector *diff = gsl_vector_alloc(2);
gsl_vector_memcpy(diff, mu2);
gsl_vector_sub(diff, mu1);
/* Finally multiply diff by Sw.
* This uses the rather low-level CBLAS
* functions gsl_blas_dgemv:
*
* ___ double ___ 1 ___ nothing
* / / /
* dgemv computes y := α op(A)x + βy
* \ \__matrix-vector \____ 0
* \__ A is symmetric
*/
gsl_vector *w = gsl_vector_alloc(2);
gsl_blas_dgemv(
CblasNoTrans, // do nothing on A
1, // α = 1
cov1, // matrix A
diff, // vector x
0, // β = 0
w); // vector y
// free memory
gsl_matrix_free(cov1);
gsl_matrix_free(cov2);
gsl_vector_free(mu1);
gsl_vector_free(mu2);
gsl_vector_free(diff);
return w;
}
/* Computes the determinant from the
* Cholesky decomposition of matrix.
* In this case it's simply the product
* of the diagonal elements, squared.
*/
double gsl_linalg_cholesky_det(gsl_matrix *LL) {
gsl_vector diag = gsl_matrix_diagonal(LL).vector;
double det = 1;
for (size_t i = 0; i < LL->size1; i++)
det *= gsl_vector_get(&diag, i);
return det * det;
}
/* `fisher_cut(ratio, w, c1, c2)` computes
* the threshold (cut), on the line given by
* `w`, to discriminates the classes `c1`, `c2`;
* with `ratio` being the ratio of their prior
* probabilities.
*
* The cut is fixed by the condition of
* conditional probability being the
* same for each class:
*
* P(c₁|x) p(x|c₁)⋅p(c₁)
* ------- = --------------- = 1;
* P(c₂|x) p(x|c₁)⋅p(c₂)
*
* together with x = t⋅w.
*
* If p(x|c) is a bivariate normal PDF the
* solution is found to be:
*
* t = (b/a) + √((b/a)² - c/a);
*
* where
*
* 1. a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
* 2. b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
* 3. c = (μ₁, Σ₁⁻¹μ₁) - (μ₂, Σ₂⁻¹μ₂) + log|Σ₂|/log|Σ₁| - 2 log(α)
* 4. α = p(c₁)/p(c₂)
*
*/
double fisher_cut(
double ratio,
gsl_vector *w,
sample_t *c1, sample_t *c2) {
/* Construct the covariances of each class... */
gsl_matrix *cov1 = normal_cov(&c1->p);
gsl_matrix *cov2 = normal_cov(&c2->p);
/* and the mean values */
gsl_vector *mu1 = normal_mean(&c1->p);
gsl_vector *mu2 = normal_mean(&c2->p);
/* Temporary vector/matrix for
* intermediate results.
*/
gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2);
gsl_vector *vtemp = gsl_vector_alloc(w->size);
/* Invert Σ₁ and Σ₂ in-place:
* we only need the inverse matrices
* in the steps to follow.
*/
gsl_linalg_cholesky_decomp(cov1);
gsl_linalg_cholesky_decomp(cov2);
// store determinant for later
double det1 = gsl_linalg_cholesky_det(cov1);
double det2 = gsl_linalg_cholesky_det(cov2);
gsl_linalg_cholesky_invert(cov1);
gsl_linalg_cholesky_invert(cov2);
/* Compute the first term:
*
* a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
*
*/
// mtemp = cov1 - cov2
gsl_matrix_memcpy(mtemp, cov1);
gsl_matrix_sub(mtemp, cov2);
// vtemp = mtemp ⋅ vtemp
gsl_vector_memcpy(vtemp, w);
gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp);
// a = (w, vtemp)
double a; gsl_blas_ddot(w, vtemp, &a);
/* Compute the second term:
*
* b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
*
*/
// vtemp = cov1 ⋅ mu1
// vtemp = cov2 ⋅ mu2 - vtemp
gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp);
gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp);
// b = (w, vtemp)
double b; gsl_blas_ddot(w, vtemp, &b);
/* Compute the last term:
*
* c = log|Σ₂|/|Σ₁| + (μ₂, Σ₂⁻¹μ₂) - (μ₁, Σ₁⁻¹μ₁)
*
*/
double c, temp;
c = log(det1 / det2) - 2*log(ratio);
gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, 0, vtemp);
gsl_blas_ddot(mu1, vtemp, &temp);
c += temp;
gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp);
gsl_blas_ddot(mu2, vtemp, &temp);
c -= temp;
/* To get the thresold value we have to
* multiply t by |w| if not normalised
*/
double norm; gsl_blas_ddot(w, w, &norm);
// free memory
gsl_vector_free(mu1);
gsl_vector_free(mu2);
gsl_vector_free(vtemp);
gsl_matrix_free(cov1);
gsl_matrix_free(cov2);
gsl_matrix_free(mtemp);
return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm;
}
/* `fisher_cut2(ratio, w, c1, c2)` computes
* the threshold (cut), on the line given by
* `w`, to discriminates the classes `c1`, `c2`;
* with `ratio` being the ratio of their prior
* probabilities.
*
* The cut is fixed by the condition of
* conditional probability being the
* same for each class:
*
* P(c₁|x) p(x|c₁)⋅p(c₁)
* ------- = --------------- = 1;
* P(c₂|x) p(x|c₁)⋅p(c₂)
*
* where p(x|c) is the probability for point x
* along the fisher projection line. If the classes
* are bivariate gaussian then p(x|c) is simply
* given by a normal distribution:
*
* Φ(μ=(w,μ), σ=(w,Σw))
*
* The solution is then
*
* t = (b/a) + √((b/a)² - c/a);
*
* where
*
* 1. a = S₁² - S₂²
* 2. b = M₂S₁² - M₁S₂²
* 3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α)
* 4. α = p(c₁)/p(c₂)
*
*/
double fisher_cut2(
double ratio,
gsl_vector *w,
sample_t *c1, sample_t *c2) {
gsl_vector *vtemp = gsl_vector_alloc(w->size);
/* Construct the covariances of each class... */
gsl_matrix *cov1 = normal_cov(&c1->p);
gsl_matrix *cov2 = normal_cov(&c2->p);
/* and the mean values */
gsl_vector *mu1 = normal_mean(&c1->p);
gsl_vector *mu2 = normal_mean(&c2->p);
/* Project the distribution onto the
* w line to get a 1D gaussian
*/
// mean
double m1; gsl_blas_ddot(w, mu1, &m1);
double m2; gsl_blas_ddot(w, mu2, &m2);
// variances
gsl_blas_dgemv(CblasNoTrans, 1, cov1, w, 0, vtemp);
double var1; gsl_blas_ddot(w, vtemp, &var1);
gsl_blas_dgemv(CblasNoTrans, 1, cov2, w, 0, vtemp);
double var2; gsl_blas_ddot(w, vtemp, &var2);
double a = var1 - var2;
double b = m2*var1 + m1*var2;
double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio);
// free memory
gsl_vector_free(mu1);
gsl_vector_free(mu2);
gsl_vector_free(vtemp);
gsl_matrix_free(cov1);
gsl_matrix_free(cov2);
return (b/a) + sqrt(pow(b/a, 2) - c/a);
}
int main(int argc, char **args) {
// initialize RNG
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
/* Generate two classes of normally
* distributed 2D points with different
* paramters: signal and noise.
*/
struct par par_sig = { 0, 0, 0.3, 0.3, 0.5 };
struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 };
// sample sizes
size_t nsig = 800;
size_t nnoise = 1000;
sample_t *signal = generate_normal(r, nsig, &par_sig);
sample_t *noise = generate_normal(r, nnoise, &par_noise);
/* Fisher linear discriminant
*
* First calculate the direction w onto
* which project the data points. Then the
* cut which determines the class for each
* projected point.
*/
gsl_vector *w = fisher_proj(signal, noise);
double t_cut = fisher_cut2(nsig / (double)nnoise,
w, signal, noise);
fputs("# Linear Fisher discriminant\n\n", stderr);
fprintf(stderr, "* w: [%.3f, %.3f]\n",
gsl_vector_get(w, 0),
gsl_vector_get(w, 1));
fprintf(stderr, "* t_cut: %.3f\n", t_cut);
gsl_vector_fprintf(stdout, w, "%g");
printf("%f\n", t_cut);
/* Print data to stdout for plotting.
* Note: we print the sizes to be able
* to set apart the two matrices.
*/
printf("%ld %ld %d\n", nsig, nnoise, 2);
gsl_matrix_fprintf(stdout, signal->data, "%g");
gsl_matrix_fprintf(stdout, noise->data, "%g");
// free memory
gsl_rng_free(r);
sample_t_free(signal);
sample_t_free(noise);
return EXIT_SUCCESS;
}