analistica/ex-7/main.c

#include <stdio.h>
#include <math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>

/* Parameters for bivariate
 * gaussian PDF
 */
struct par {
  double x0;       // x mean
  double y0;       // y mean
  double sigma_x;  // x standard dev
  double sigma_y;  // y standard dev
  double rho;      // correlation: cov(x,y)/σx⋅σy
};


/* A sample of N 2D points is an
 * N×2 matrix, with vectors as rows.
 */
typedef struct {
  struct par p;
  gsl_matrix *data;
} sample_t;


/* Create a sample of `n` points */
sample_t* sample_t_alloc(size_t n, struct par p) {
  sample_t *x = malloc(sizeof(sample_t));
  x->p    = p;
  x->data = gsl_matrix_alloc(n, 2);
  return x;
}

/* Delete a sample */
void sample_t_free(sample_t *x) {
  gsl_matrix_free(x->data);
  free(x);
}


/* `generate_normal(r, n, p)` will create
 * a sample of `n` points, distributed
 * according to a bivariate gaussian distribution
 * of parameters `p`.
 */
sample_t* generate_normal(
  gsl_rng *r, size_t n, struct par *p) {
  sample_t *s = sample_t_alloc(n, *p);

  for (size_t i = 0; i < n; i++) {
    /* Generate a vector (x,y) with
     * a standard (μ = 0) bivariate
     * gaussian PDF.
     */
    double *x = gsl_matrix_ptr(s->data, i, 0);
    double *y = gsl_matrix_ptr(s->data, i, 1);
    gsl_ran_bivariate_gaussian(
      r,
      p->sigma_x, p->sigma_y, p->rho,               
      x, y);

    /* Shift the vector to (x₀,y₀) */
    *x += p->x0;
    *y += p->y0;
  }

  return s;
}


/* Builds the covariance matrix Σ
 * from the standard parameters (σ, ρ)
 * of a bivariate gaussian.
 */
gsl_matrix* normal_cov(struct par *p) {
  double var_x  = pow(p->sigma_x, 2);
  double var_y  = pow(p->sigma_y, 2);
  double cov_xy = p->rho * p->sigma_x * p->sigma_y;

  gsl_matrix *cov = gsl_matrix_alloc(2, 2);
  gsl_matrix_set(cov, 0, 0, var_x);
  gsl_matrix_set(cov, 1, 1, var_y);
  gsl_matrix_set(cov, 0, 1, cov_xy);
  gsl_matrix_set(cov, 1, 0, cov_xy);

  return cov;
}


/* Builds the mean vector of
 * a bivariate gaussian.
 */
gsl_vector* normal_mean(struct par *p) {
  gsl_vector *mu = gsl_vector_alloc(2);
  gsl_vector_set(mu, 0, p->x0);
  gsl_vector_set(mu, 1, p->y0);

  return mu;
}


/* `fisher_proj(c1, c2)` computes the optimal
 * projection map, which maximises the separation
 * between the two classes.
 * The projection vector w is given by
 *
 *   w = Sw⁻¹ (μ₂ - μ₁)
 * 
 * where Sw = Σ₁ + Σ₂ is the so-called within-class
 * covariance matrix.
 */
gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) {
  /* Construct the covariances of each class... */
  gsl_matrix *cov1 = normal_cov(&c1->p);
  gsl_matrix *cov2 = normal_cov(&c2->p);

  /* and the mean values */
  gsl_vector *mu1 = normal_mean(&c1->p);
  gsl_vector *mu2 = normal_mean(&c2->p);

  /* Compute the inverse of the within-class
   * covariance Sw⁻¹.
   * Note: by definition Σ is symmetrical and
   * positive-definite, so Cholesky is appropriate.
   */
  gsl_matrix_add(cov1, cov2);
  gsl_linalg_cholesky_decomp(cov1);
  gsl_linalg_cholesky_invert(cov1);

  /* Compute the difference of the means. */
  gsl_vector *diff = gsl_vector_alloc(2);
  gsl_vector_memcpy(diff, mu2);
  gsl_vector_sub(diff, mu1);

  /* Finally multiply diff by Sw.
   * This uses the rather low-level CBLAS
   * functions gsl_blas_dgemv:
   *
   *  ___ double          ___ 1 ___ nothing
   * /                   /     /      
   * dgemv computes y := α op(A)x + βy
   *  \  \__matrix-vector           \____ 0
   *   \__ A is symmetric
   */
  gsl_vector *w = gsl_vector_alloc(2);
  gsl_blas_dgemv(
    CblasNoTrans,  // do nothing on A
    1,             // α = 1
    cov1,          // matrix A
    diff,          // vector x
    0,             // β = 0
    w);            // vector y

  // free memory
  gsl_matrix_free(cov1);
  gsl_matrix_free(cov2);
  gsl_vector_free(mu1);
  gsl_vector_free(mu2);
  gsl_vector_free(diff);

  return w;
}


/* Computes the determinant from the
 * Cholesky decomposition of matrix.
 * In this case it's simply the product
 * of the diagonal elements, squared.
 */
double gsl_linalg_cholesky_det(gsl_matrix *LL) {
  gsl_vector diag = gsl_matrix_diagonal(LL).vector;
  double det = 1;
  for (size_t i = 0; i < LL->size1; i++)
    det *= gsl_vector_get(&diag, i);
  return det * det;
}


/* `fisher_cut(ratio, w, c1, c2)` computes
 * the threshold (cut), on the line given by
 * `w`, to discriminates the classes `c1`, `c2`;
 * with `ratio` being the ratio of their prior
 * probabilities.
 *
 * The cut is fixed by the condition of
 * conditional probability being the
 * same for each class:
 *
 *  P(c₁|x)    p(x|c₁)⋅p(c₁)
 *  ------- = --------------- = 1;
 *  P(c₂|x)    p(x|c₁)⋅p(c₂)
 *
 * together with x = t⋅w.
 *
 * If p(x|c) is a bivariate normal PDF the
 * solution is found to be:
 *
 *   t = (b/a) + √((b/a)² - c/a);
 *
 * where
 *
 *  1. a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
 *  2. b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
 *  3. c = (μ₁, Σ₁⁻¹μ₁) - (μ₂, Σ₂⁻¹μ₂) + log|Σ₂|/log|Σ₁| - 2 log(α)
 *  4. α = p(c₁)/p(c₂)
 *
 */
double fisher_cut(
  double ratio,
  gsl_vector *w,
  sample_t *c1, sample_t *c2) {

  /* Construct the covariances of each class... */
  gsl_matrix *cov1 = normal_cov(&c1->p);
  gsl_matrix *cov2 = normal_cov(&c2->p);

  /* and the mean values */
  gsl_vector *mu1 = normal_mean(&c1->p);
  gsl_vector *mu2 = normal_mean(&c2->p);

  /* Temporary vector/matrix for
   * intermediate results.
   */
  gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2);
  gsl_vector *vtemp = gsl_vector_alloc(w->size);

  /* Invert Σ₁ and Σ₂ in-place:
   * we only need the inverse matrices
   * in the steps to follow.
   */
  gsl_linalg_cholesky_decomp(cov1);
  gsl_linalg_cholesky_decomp(cov2);
  // store determinant for later
  double det1 = gsl_linalg_cholesky_det(cov1);
  double det2 = gsl_linalg_cholesky_det(cov2);
  gsl_linalg_cholesky_invert(cov1);
  gsl_linalg_cholesky_invert(cov2);

  /* Compute the first term:
   *
   *  a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
   *
   */
  // mtemp = cov1 - cov2
  gsl_matrix_memcpy(mtemp, cov1);
  gsl_matrix_sub(mtemp, cov2);

  // vtemp = mtemp ⋅ vtemp
  gsl_vector_memcpy(vtemp, w);
  gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp);

  // a = (w, vtemp)
  double a; gsl_blas_ddot(w, vtemp, &a);

  /* Compute the second term:
   *
   *  b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
   *
   */
  // vtemp = cov1 ⋅ mu1
  // vtemp = cov2 ⋅ mu2 - vtemp
  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp);

  // b = (w, vtemp)
  double b; gsl_blas_ddot(w, vtemp, &b);

  /* Compute the last term:
   *
   *  c = log|Σ₂|/|Σ₁| + (μ₂, Σ₂⁻¹μ₂) - (μ₁, Σ₁⁻¹μ₁)
   *
   */
  double c, temp;
  c = log(det1 / det2) - 2*log(ratio);

  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1,  0, vtemp);
  gsl_blas_ddot(mu1, vtemp, &temp);
  c += temp;

  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
  gsl_blas_ddot(mu2, vtemp, &temp);
  c -= temp;

  /* To get the thresold value we have to
   * multiply t by |w| if not normalised
   */
  double norm; gsl_blas_ddot(w, w, &norm);

  // free memory
  gsl_vector_free(mu1);
  gsl_vector_free(mu2);
  gsl_vector_free(vtemp);
  gsl_matrix_free(cov1);
  gsl_matrix_free(cov2);
  gsl_matrix_free(mtemp);

  return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm;
}


/* `fisher_cut2(ratio, w, c1, c2)` computes
 * the threshold (cut), on the line given by
 * `w`, to discriminates the classes `c1`, `c2`;
 * with `ratio` being the ratio of their prior
 * probabilities.
 *
 * The cut is fixed by the condition of
 * conditional probability being the
 * same for each class:
 *
 *  P(c₁|x)    p(x|c₁)⋅p(c₁)
 *  ------- = --------------- = 1;
 *  P(c₂|x)    p(x|c₁)⋅p(c₂)
 *
 * where p(x|c) is the probability for point x
 * along the fisher projection line. If the classes
 * are bivariate gaussian then p(x|c) is simply
 * given by a normal distribution:
 *
 *   Φ(μ=(w,μ), σ=(w,Σw))
 *
 * The solution is then
 *
 *   t = (b/a) + √((b/a)² - c/a);
 *
 * where
 *
 *  1. a = S₁² - S₂²
 *  2. b = M₂S₁² - M₁S₂²
 *  3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α)
 *  4. α = p(c₁)/p(c₂)
 *
 */
double fisher_cut2(
  double ratio,
  gsl_vector *w,
  sample_t *c1, sample_t *c2) {

  gsl_vector *vtemp = gsl_vector_alloc(w->size);

  /* Construct the covariances of each class... */
  gsl_matrix *cov1 = normal_cov(&c1->p);
  gsl_matrix *cov2 = normal_cov(&c2->p);

  /* and the mean values */
  gsl_vector *mu1 = normal_mean(&c1->p);
  gsl_vector *mu2 = normal_mean(&c2->p);

  /* Project the distribution onto the
   * w line to get a 1D gaussian
   */
  // mean
  double m1; gsl_blas_ddot(w, mu1, &m1);
  double m2; gsl_blas_ddot(w, mu2, &m2);

  // variances
  gsl_blas_dgemv(CblasNoTrans, 1, cov1, w,  0, vtemp);
  double var1; gsl_blas_ddot(w, vtemp, &var1);
  gsl_blas_dgemv(CblasNoTrans, 1, cov2, w,  0, vtemp);
  double var2; gsl_blas_ddot(w, vtemp, &var2);

  double a = var1 - var2;
  double b = m2*var1 + m1*var2;
  double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio);

  // free memory
  gsl_vector_free(mu1);
  gsl_vector_free(mu2);
  gsl_vector_free(vtemp);
  gsl_matrix_free(cov1);
  gsl_matrix_free(cov2);

  return (b/a) + sqrt(pow(b/a, 2) - c/a);
}


int main(int argc, char **args) {
  // initialize RNG
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);

  /* Generate two classes of normally
   * distributed 2D points with different
   * paramters: signal and noise.
   */
  struct par par_sig   = { 0, 0, 0.3, 0.3, 0.5 };
  struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 };

  // sample sizes
  size_t nsig   =  800;
  size_t nnoise = 1000;

  sample_t *signal = generate_normal(r, nsig, &par_sig);
  sample_t *noise  = generate_normal(r, nnoise, &par_noise);

  /* Fisher linear discriminant
   *
   * First calculate the direction w onto
   * which project the data points. Then the
   * cut which determines the class for each
   * projected point.
   */
  gsl_vector *w = fisher_proj(signal, noise);
  double t_cut  = fisher_cut2(nsig / (double)nnoise,
                              w, signal, noise);

  fputs("# Linear Fisher discriminant\n\n", stderr);
  fprintf(stderr, "* w: [%.3f, %.3f]\n",
      gsl_vector_get(w, 0),
      gsl_vector_get(w, 1));
  fprintf(stderr, "* t_cut: %.3f\n", t_cut);

  gsl_vector_fprintf(stdout, w, "%g");
  printf("%f\n", t_cut);

  /* Print data to stdout for plotting.
   * Note: we print the sizes  to be able
   * to set apart the two matrices.
   */
  printf("%ld %ld %d\n", nsig, nnoise, 2);
  gsl_matrix_fprintf(stdout, signal->data, "%g");
  gsl_matrix_fprintf(stdout, noise->data,  "%g");

  // free memory
  gsl_rng_free(r);
  sample_t_free(signal);
  sample_t_free(noise);

  return EXIT_SUCCESS;
}