ex-7: split into multiple files

2020-03-06 10:54:28 +00:00 · 2020-03-06 10:54:28 +00:00 · f3293ba808
commit f3293ba808
parent cf27610d53
6 changed files with 390 additions and 399 deletions
--- a/ex-7/common.c
+++ b/ex-7/common.c
@ -0,0 +1,46 @@
+#include "common.h"
+#include <gsl/gsl_randist.h>
+
+/* Create a sample of `n` points */
+sample_t* sample_t_alloc(size_t n, struct par p) {
+  sample_t *x = malloc(sizeof(sample_t));
+  x->p    = p;
+  x->data = gsl_matrix_alloc(n, 2);
+  return x;
+}
+
+/* Delete a sample */
+void sample_t_free(sample_t *x) {
+  gsl_matrix_free(x->data);
+  free(x);
+}
+
+
+/* `generate_normal(r, n, p)` will create
+ * a sample of `n` points, distributed
+ * according to a bivariate gaussian distribution
+ * of parameters `p`.
+ */
+sample_t* generate_normal(
+  gsl_rng *r, size_t n, struct par *p) {
+  sample_t *s = sample_t_alloc(n, *p);
+
+  for (size_t i = 0; i < n; i++) {
+    /* Generate a vector (x,y) with
+     * a standard (μ = 0) bivariate
+     * gaussian PDF.
+     */
+    double *x = gsl_matrix_ptr(s->data, i, 0);
+    double *y = gsl_matrix_ptr(s->data, i, 1);
+    gsl_ran_bivariate_gaussian(
+      r,
+      p->sigma_x, p->sigma_y, p->rho,               
+      x, y);
+
+    /* Shift the vector to (x₀,y₀) */
+    *x += p->x0;
+    *y += p->y0;
+  }
+
+  return s;
+}
--- a/ex-7/common.h
+++ b/ex-7/common.h
@ -0,0 +1,40 @@
+#pragma once
+
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_rng.h>
+
+/* Parameters for bivariate
+ * gaussian PDF
+ */
+struct par {
+  double x0;       // x mean
+  double y0;       // y mean
+  double sigma_x;  // x standard dev
+  double sigma_y;  // y standard dev
+  double rho;      // correlation: cov(x,y)/σx⋅σy
+};
+
+
+/* A sample of N 2D points is an
+ * N×2 matrix, with vectors as rows.
+ */
+typedef struct {
+  struct par p;
+  gsl_matrix *data;
+} sample_t;
+
+
+/* Create a sample of `n` points */
+sample_t* sample_t_alloc(size_t n, struct par p);
+
+
+/* Delete a sample */
+void sample_t_free(sample_t *x);
+
+
+/* `generate_normal(r, n, p)` will create
+ * a sample of `n` points, distributed
+ * according to a bivariate gaussian distribution
+ * of parameters `p`.
+ */
+sample_t* generate_normal(gsl_rng *r, size_t n, struct par *p);
--- a/ex-7/fisher.c
+++ b/ex-7/fisher.c
@ -0,0 +1,186 @@
+#include "fisher.h"
+#include <math.h>
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_linalg.h>
+
+/* Builds the covariance matrix Σ
+ * from the standard parameters (σ, ρ)
+ * of a bivariate gaussian.
+ */
+gsl_matrix* normal_cov(struct par *p) {
+  double var_x  = pow(p->sigma_x, 2);
+  double var_y  = pow(p->sigma_y, 2);
+  double cov_xy = p->rho * p->sigma_x * p->sigma_y;
+
+  gsl_matrix *cov = gsl_matrix_alloc(2, 2);
+  gsl_matrix_set(cov, 0, 0, var_x);
+  gsl_matrix_set(cov, 1, 1, var_y);
+  gsl_matrix_set(cov, 0, 1, cov_xy);
+  gsl_matrix_set(cov, 1, 0, cov_xy);
+
+  return cov;
+}
+
+
+/* Builds the mean vector of
+ * a bivariate gaussian.
+ */
+gsl_vector* normal_mean(struct par *p) {
+  gsl_vector *mu = gsl_vector_alloc(2);
+  gsl_vector_set(mu, 0, p->x0);
+  gsl_vector_set(mu, 1, p->y0);
+
+  return mu;
+}
+
+
+/* `fisher_proj(c1, c2)` computes the optimal
+ * projection map, which maximises the separation
+ * between the two classes.
+ * The projection vector w is given by
+ *
+ *   w = Sw⁻¹ (μ₂ - μ₁)
+ * 
+ * where Sw = Σ₁ + Σ₂ is the so-called within-class
+ * covariance matrix.
+ */
+gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) {
+  /* Construct the covariances of each class... */
+  gsl_matrix *cov1 = normal_cov(&c1->p);
+  gsl_matrix *cov2 = normal_cov(&c2->p);
+
+  /* and the mean values */
+  gsl_vector *mu1 = normal_mean(&c1->p);
+  gsl_vector *mu2 = normal_mean(&c2->p);
+
+  /* Compute the inverse of the within-class
+   * covariance Sw⁻¹.
+   * Note: by definition Σ is symmetrical and
+   * positive-definite, so Cholesky is appropriate.
+   */
+  gsl_matrix_add(cov1, cov2);
+  gsl_linalg_cholesky_decomp(cov1);
+  gsl_linalg_cholesky_invert(cov1);
+
+  /* Compute the difference of the means. */
+  gsl_vector *diff = gsl_vector_alloc(2);
+  gsl_vector_memcpy(diff, mu2);
+  gsl_vector_sub(diff, mu1);
+
+  /* Finally multiply diff by Sw.
+   * This uses the rather low-level CBLAS
+   * functions gsl_blas_dgemv:
+   *
+   *  ___ double          ___ 1 ___ nothing
+   * /                   /     /      
+   * dgemv computes y := α op(A)x + βy
+   *  \  \__matrix-vector           \____ 0
+   *   \__ A is symmetric
+   */
+  gsl_vector *w = gsl_vector_alloc(2);
+  gsl_blas_dgemv(
+    CblasNoTrans,  // do nothing on A
+    1,             // α = 1
+    cov1,          // matrix A
+    diff,          // vector x
+    0,             // β = 0
+    w);            // vector y
+
+  // free memory
+  gsl_matrix_free(cov1);
+  gsl_matrix_free(cov2);
+  gsl_vector_free(mu1);
+  gsl_vector_free(mu2);
+  gsl_vector_free(diff);
+
+  return w;
+}
+
+
+/* `fisher_cut(ratio, w, c1, c2)` computes
+ * the threshold (cut), on the line given by
+ * `w`, to discriminates the classes `c1`, `c2`;
+ * with `ratio` being the ratio of their prior
+ * probabilities.
+ *
+ * The cut is fixed by the condition of
+ * conditional probability being the
+ * same for each class:
+ *
+ *  P(c₁|x)    p(x|c₁)⋅p(c₁)
+ *  ------- = --------------- = 1;
+ *  P(c₂|x)    p(x|c₁)⋅p(c₂)
+ *
+ * where p(x|c) is the probability for point x
+ * along the fisher projection line. If the classes
+ * are bivariate gaussian then p(x|c) is simply
+ * given by a normal distribution:
+ *
+ *   Φ(μ=(w,μ), σ=(w,Σw))
+ *
+ * The solution is then
+ *
+ *   t = (b/a) + √((b/a)² - c/a);
+ *
+ * where
+ *
+ *  1. a = S₁² - S₂²
+ *  2. b = M₂S₁² - M₁S₂²
+ *  3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α)
+ *  4. α = p(c₁)/p(c₂)
+ *
+ */
+double fisher_cut(
+  double ratio,
+  gsl_vector *w,
+  sample_t *c1, sample_t *c2) {
+
+  /* Create a temporary vector variable */
+  gsl_vector *vtemp = gsl_vector_alloc(w->size);
+
+  /* Construct the covariances of each class... */
+  gsl_matrix *cov1 = normal_cov(&c1->p);
+  gsl_matrix *cov2 = normal_cov(&c2->p);
+
+  /* and the mean values */
+  gsl_vector *mu1 = normal_mean(&c1->p);
+  gsl_vector *mu2 = normal_mean(&c2->p);
+
+  /* Project the distribution onto the
+   * w line to get a 1D gaussian
+   */
+
+  /* Mean: mi = (w, μi) */
+  double m1; gsl_blas_ddot(w, mu1, &m1);
+  double m2; gsl_blas_ddot(w, mu2, &m2);
+
+  /* Variance: vari = (w, covi⋅w)
+   *
+   *   vtemp = covi⋅w
+   *   vari  = w⋅vtemp 
+   */
+  gsl_blas_dgemv(CblasNoTrans, 1, cov1, w,  0, vtemp);
+  double var1; gsl_blas_ddot(w, vtemp, &var1);
+  gsl_blas_dgemv(CblasNoTrans, 1, cov2, w,  0, vtemp);
+  double var2; gsl_blas_ddot(w, vtemp, &var2);
+
+  /* Solve the P(c₁|x) = P(c₂|x) equation:
+   *
+   *   ax² - 2bx + c = 0
+   *
+   * with a,b,c given as above.
+   *
+   * */
+  double a = var1 - var2;
+  double b = m2*var1 + m1*var2;
+  double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio);
+
+  // free memory
+  gsl_vector_free(mu1);
+  gsl_vector_free(mu2);
+  gsl_vector_free(vtemp);
+  gsl_matrix_free(cov1);
+  gsl_matrix_free(cov2);
+
+  return (b/a) + sqrt(pow(b/a, 2) - c/a);
+}
--- a/ex-7/fisher.h
+++ b/ex-7/fisher.h
@ -0,0 +1,66 @@
+#include "common.h"
+#include <gsl/gsl_matrix.h>
+
+/* Builds the covariance matrix Σ
+ * from the standard parameters (σ, ρ)
+ * of a bivariate gaussian.
+ */
+gsl_matrix* normal_cov(struct par *p);
+
+
+/* Builds the mean vector of
+ * a bivariate gaussian.
+ */
+gsl_vector* normal_mean(struct par *p);
+
+
+/* `fisher_proj(c1, c2)` computes the optimal
+ * projection map, which maximises the separation
+ * between the two classes.
+ * The projection vector w is given by
+ *
+ *   w = Sw⁻¹ (μ₂ - μ₁)
+ * 
+ * where Sw = Σ₁ + Σ₂ is the so-called within-class
+ * covariance matrix.
+ */
+gsl_vector* fisher_proj(sample_t *c1, sample_t *c2);
+
+
+/* `fisher_cut(ratio, w, c1, c2)` computes
+ * the threshold (cut), on the line given by
+ * `w`, to discriminates the classes `c1`, `c2`;
+ * with `ratio` being the ratio of their prior
+ * probabilities.
+ *
+ * The cut is fixed by the condition of
+ * conditional probability being the
+ * same for each class:
+ *
+ *  P(c₁|x)    p(x|c₁)⋅p(c₁)
+ *  ------- = --------------- = 1;
+ *  P(c₂|x)    p(x|c₁)⋅p(c₂)
+ *
+ * where p(x|c) is the probability for point x
+ * along the fisher projection line. If the classes
+ * are bivariate gaussian then p(x|c) is simply
+ * given by a normal distribution:
+ *
+ *   Φ(μ=(w,μ), σ=(w,Σw))
+ *
+ * The solution is then
+ *
+ *   t = (b/a) + √((b/a)² - c/a);
+ *
+ * where
+ *
+ *  1. a = S₁² - S₂²
+ *  2. b = M₂S₁² - M₁S₂²
+ *  3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α)
+ *  4. α = p(c₁)/p(c₂)
+ *
+ */
+double fisher_cut(
+  double ratio,
+  gsl_vector *w,
+  sample_t *c1, sample_t *c2);
--- a/ex-7/main.c
+++ b/ex-7/main.c
@ -1,383 +1,38 @@
 #include <stdio.h>
-#include <math.h>
-#include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
-#include <gsl/gsl_matrix.h>
-#include <gsl/gsl_linalg.h>
+#include <string.h>
+#include "fisher.h"

-/* Parameters for bivariate
- * gaussian PDF
- */
-struct par {
-  double x0;       // x mean
-  double y0;       // y mean
-  double sigma_x;  // x standard dev
-  double sigma_y;  // y standard dev
-  double rho;      // correlation: cov(x,y)/σx⋅σy
+/* Options for the program */
+struct options {
+  char *mode;
+  size_t nsig;
+  size_t nnoise;
 };


-/* A sample of N 2D points is an
- * N×2 matrix, with vectors as rows.
- */
-typedef struct {
-  struct par p;
-  gsl_matrix *data;
-} sample_t;
+int main(int argc, char **argv) {
+  /* Set default options */
+  struct options opts;
+  opts.mode   = "fisher";
+  opts.nsig   =  800;
+  opts.nnoise = 1000;

-
-/* Create a sample of `n` points */
-sample_t* sample_t_alloc(size_t n, struct par p) {
-  sample_t *x = malloc(sizeof(sample_t));
-  x->p    = p;
-  x->data = gsl_matrix_alloc(n, 2);
-  return x;
-}
-
-/* Delete a sample */
-void sample_t_free(sample_t *x) {
-  gsl_matrix_free(x->data);
-  free(x);
-}
-
-
-/* `generate_normal(r, n, p)` will create
- * a sample of `n` points, distributed
- * according to a bivariate gaussian distribution
- * of parameters `p`.
- */
-sample_t* generate_normal(
-  gsl_rng *r, size_t n, struct par *p) {
-  sample_t *s = sample_t_alloc(n, *p);
-
-  for (size_t i = 0; i < n; i++) {
-    /* Generate a vector (x,y) with
-     * a standard (μ = 0) bivariate
-     * gaussian PDF.
-     */
-    double *x = gsl_matrix_ptr(s->data, i, 0);
-    double *y = gsl_matrix_ptr(s->data, i, 1);
-    gsl_ran_bivariate_gaussian(
-      r,
-      p->sigma_x, p->sigma_y, p->rho,               
-      x, y);
-
-    /* Shift the vector to (x₀,y₀) */
-    *x += p->x0;
-    *y += p->y0;
+  /* Process CLI arguments */
+  for (size_t i = 1; i < argc; i++) {
+         if (!strcmp(argv[i], "-m")) opts.mode = argv[++i];
+    else if (!strcmp(argv[i], "-s")) opts.nsig = atol(argv[++i]);
+    else if (!strcmp(argv[i], "-n")) opts.nnoise = atol(argv[++i]);
+    else {
+      fprintf(stderr, "Usage: %s -[hiIntp]\n", argv[0]);
+      fprintf(stderr, "\t-h\tShow this message.\n");
+      fprintf(stderr, "\t-m MODE\tThe disciminant to use: 'fisher' for "
+                      "Fisher linear discriminant, 'percep' for perceptron.\n");
+      fprintf(stderr, "\t-s N\tThe number of events in signal class.\n");
+      fprintf(stderr, "\t-n N\tThe number of events in noise class.\n");
+      return EXIT_FAILURE;
+    }
  }

-  return s;
-}
-
-
-/* Builds the covariance matrix Σ
- * from the standard parameters (σ, ρ)
- * of a bivariate gaussian.
- */
-gsl_matrix* normal_cov(struct par *p) {
-  double var_x  = pow(p->sigma_x, 2);
-  double var_y  = pow(p->sigma_y, 2);
-  double cov_xy = p->rho * p->sigma_x * p->sigma_y;
-
-  gsl_matrix *cov = gsl_matrix_alloc(2, 2);
-  gsl_matrix_set(cov, 0, 0, var_x);
-  gsl_matrix_set(cov, 1, 1, var_y);
-  gsl_matrix_set(cov, 0, 1, cov_xy);
-  gsl_matrix_set(cov, 1, 0, cov_xy);
-
-  return cov;
-}
-
-
-/* Builds the mean vector of
- * a bivariate gaussian.
- */
-gsl_vector* normal_mean(struct par *p) {
-  gsl_vector *mu = gsl_vector_alloc(2);
-  gsl_vector_set(mu, 0, p->x0);
-  gsl_vector_set(mu, 1, p->y0);
-
-  return mu;
-}
-
-
-/* `fisher_proj(c1, c2)` computes the optimal
- * projection map, which maximises the separation
- * between the two classes.
- * The projection vector w is given by
- *
- *   w = Sw⁻¹ (μ₂ - μ₁)
- * 
- * where Sw = Σ₁ + Σ₂ is the so-called within-class
- * covariance matrix.
- */
-gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) {
-  /* Construct the covariances of each class... */
-  gsl_matrix *cov1 = normal_cov(&c1->p);
-  gsl_matrix *cov2 = normal_cov(&c2->p);
-
-  /* and the mean values */
-  gsl_vector *mu1 = normal_mean(&c1->p);
-  gsl_vector *mu2 = normal_mean(&c2->p);
-
-  /* Compute the inverse of the within-class
-   * covariance Sw⁻¹.
-   * Note: by definition Σ is symmetrical and
-   * positive-definite, so Cholesky is appropriate.
-   */
-  gsl_matrix_add(cov1, cov2);
-  gsl_linalg_cholesky_decomp(cov1);
-  gsl_linalg_cholesky_invert(cov1);
-
-  /* Compute the difference of the means. */
-  gsl_vector *diff = gsl_vector_alloc(2);
-  gsl_vector_memcpy(diff, mu2);
-  gsl_vector_sub(diff, mu1);
-
-  /* Finally multiply diff by Sw.
-   * This uses the rather low-level CBLAS
-   * functions gsl_blas_dgemv:
-   *
-   *  ___ double          ___ 1 ___ nothing
-   * /                   /     /      
-   * dgemv computes y := α op(A)x + βy
-   *  \  \__matrix-vector           \____ 0
-   *   \__ A is symmetric
-   */
-  gsl_vector *w = gsl_vector_alloc(2);
-  gsl_blas_dgemv(
-    CblasNoTrans,  // do nothing on A
-    1,             // α = 1
-    cov1,          // matrix A
-    diff,          // vector x
-    0,             // β = 0
-    w);            // vector y
-
-  // free memory
-  gsl_matrix_free(cov1);
-  gsl_matrix_free(cov2);
-  gsl_vector_free(mu1);
-  gsl_vector_free(mu2);
-  gsl_vector_free(diff);
-
-  return w;
-}
-
-
-/* Computes the determinant from the
- * Cholesky decomposition of matrix.
- * In this case it's simply the product
- * of the diagonal elements, squared.
- */
-double gsl_linalg_cholesky_det(gsl_matrix *LL) {
-  gsl_vector diag = gsl_matrix_diagonal(LL).vector;
-  double det = 1;
-  for (size_t i = 0; i < LL->size1; i++)
-    det *= gsl_vector_get(&diag, i);
-  return det * det;
-}
-
-
-/* `fisher_cut(ratio, w, c1, c2)` computes
- * the threshold (cut), on the line given by
- * `w`, to discriminates the classes `c1`, `c2`;
- * with `ratio` being the ratio of their prior
- * probabilities.
- *
- * The cut is fixed by the condition of
- * conditional probability being the
- * same for each class:
- *
- *  P(c₁|x)    p(x|c₁)⋅p(c₁)
- *  ------- = --------------- = 1;
- *  P(c₂|x)    p(x|c₁)⋅p(c₂)
- *
- * together with x = t⋅w.
- *
- * If p(x|c) is a bivariate normal PDF the
- * solution is found to be:
- *
- *   t = (b/a) + √((b/a)² - c/a);
- *
- * where
- *
- *  1. a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
- *  2. b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
- *  3. c = (μ₁, Σ₁⁻¹μ₁) - (μ₂, Σ₂⁻¹μ₂) + log|Σ₂|/log|Σ₁| - 2 log(α)
- *  4. α = p(c₁)/p(c₂)
- *
- */
-double fisher_cut(
-  double ratio,
-  gsl_vector *w,
-  sample_t *c1, sample_t *c2) {
-
-  /* Construct the covariances of each class... */
-  gsl_matrix *cov1 = normal_cov(&c1->p);
-  gsl_matrix *cov2 = normal_cov(&c2->p);
-
-  /* and the mean values */
-  gsl_vector *mu1 = normal_mean(&c1->p);
-  gsl_vector *mu2 = normal_mean(&c2->p);
-
-  /* Temporary vector/matrix for
-   * intermediate results.
-   */
-  gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2);
-  gsl_vector *vtemp = gsl_vector_alloc(w->size);
-
-  /* Invert Σ₁ and Σ₂ in-place:
-   * we only need the inverse matrices
-   * in the steps to follow.
-   */
-  gsl_linalg_cholesky_decomp(cov1);
-  gsl_linalg_cholesky_decomp(cov2);
-  // store determinant for later
-  double det1 = gsl_linalg_cholesky_det(cov1);
-  double det2 = gsl_linalg_cholesky_det(cov2);
-  gsl_linalg_cholesky_invert(cov1);
-  gsl_linalg_cholesky_invert(cov2);
-
-  /* Compute the first term:
-   *
-   *  a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w)
-   *
-   */
-  // mtemp = cov1 - cov2
-  gsl_matrix_memcpy(mtemp, cov1);
-  gsl_matrix_sub(mtemp, cov2);
-
-  // vtemp = mtemp ⋅ vtemp
-  gsl_vector_memcpy(vtemp, w);
-  gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp);
-
-  // a = (w, vtemp)
-  double a; gsl_blas_ddot(w, vtemp, &a);
-
-  /* Compute the second term:
-   *
-   *  b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂)
-   *
-   */
-  // vtemp = cov1 ⋅ mu1
-  // vtemp = cov2 ⋅ mu2 - vtemp
-  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
-  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp);
-
-  // b = (w, vtemp)
-  double b; gsl_blas_ddot(w, vtemp, &b);
-
-  /* Compute the last term:
-   *
-   *  c = log|Σ₂|/|Σ₁| + (μ₂, Σ₂⁻¹μ₂) - (μ₁, Σ₁⁻¹μ₁)
-   *
-   */
-  double c, temp;
-  c = log(det1 / det2) - 2*log(ratio);
-
-  gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1,  0, vtemp);
-  gsl_blas_ddot(mu1, vtemp, &temp);
-  c += temp;
-
-  gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2,  0, vtemp);
-  gsl_blas_ddot(mu2, vtemp, &temp);
-  c -= temp;
-
-  /* To get the thresold value we have to
-   * multiply t by |w| if not normalised
-   */
-  double norm; gsl_blas_ddot(w, w, &norm);
-
-  // free memory
-  gsl_vector_free(mu1);
-  gsl_vector_free(mu2);
-  gsl_vector_free(vtemp);
-  gsl_matrix_free(cov1);
-  gsl_matrix_free(cov2);
-  gsl_matrix_free(mtemp);
-
-  return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm;
-}
-
-
-/* `fisher_cut2(ratio, w, c1, c2)` computes
- * the threshold (cut), on the line given by
- * `w`, to discriminates the classes `c1`, `c2`;
- * with `ratio` being the ratio of their prior
- * probabilities.
- *
- * The cut is fixed by the condition of
- * conditional probability being the
- * same for each class:
- *
- *  P(c₁|x)    p(x|c₁)⋅p(c₁)
- *  ------- = --------------- = 1;
- *  P(c₂|x)    p(x|c₁)⋅p(c₂)
- *
- * where p(x|c) is the probability for point x
- * along the fisher projection line. If the classes
- * are bivariate gaussian then p(x|c) is simply
- * given by a normal distribution:
- *
- *   Φ(μ=(w,μ), σ=(w,Σw))
- *
- * The solution is then
- *
- *   t = (b/a) + √((b/a)² - c/a);
- *
- * where
- *
- *  1. a = S₁² - S₂²
- *  2. b = M₂S₁² - M₁S₂²
- *  3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α)
- *  4. α = p(c₁)/p(c₂)
- *
- */
-double fisher_cut2(
-  double ratio,
-  gsl_vector *w,
-  sample_t *c1, sample_t *c2) {
-
-  gsl_vector *vtemp = gsl_vector_alloc(w->size);
-
-  /* Construct the covariances of each class... */
-  gsl_matrix *cov1 = normal_cov(&c1->p);
-  gsl_matrix *cov2 = normal_cov(&c2->p);
-
-  /* and the mean values */
-  gsl_vector *mu1 = normal_mean(&c1->p);
-  gsl_vector *mu2 = normal_mean(&c2->p);
-
-  /* Project the distribution onto the
-   * w line to get a 1D gaussian
-   */
-  // mean
-  double m1; gsl_blas_ddot(w, mu1, &m1);
-  double m2; gsl_blas_ddot(w, mu2, &m2);
-
-  // variances
-  gsl_blas_dgemv(CblasNoTrans, 1, cov1, w,  0, vtemp);
-  double var1; gsl_blas_ddot(w, vtemp, &var1);
-  gsl_blas_dgemv(CblasNoTrans, 1, cov2, w,  0, vtemp);
-  double var2; gsl_blas_ddot(w, vtemp, &var2);
-
-  double a = var1 - var2;
-  double b = m2*var1 + m1*var2;
-  double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio);
-
-  // free memory
-  gsl_vector_free(mu1);
-  gsl_vector_free(mu2);
-  gsl_vector_free(vtemp);
-  gsl_matrix_free(cov1);
-  gsl_matrix_free(cov2);
-
-  return (b/a) + sqrt(pow(b/a, 2) - c/a);
-}
-
-
-int main(int argc, char **args) {
  // initialize RNG
  gsl_rng_env_setup();
  gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
@ -389,38 +44,36 @@ int main(int argc, char **args) {
  struct par par_sig   = { 0, 0, 0.3, 0.3, 0.5 };
  struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 };

-  // sample sizes
-  size_t nsig   =  800;
-  size_t nnoise = 1000;
+  sample_t *signal = generate_normal(r, opts.nsig, &par_sig);
+  sample_t *noise  = generate_normal(r, opts.nnoise, &par_noise);

-  sample_t *signal = generate_normal(r, nsig, &par_sig);
-  sample_t *noise  = generate_normal(r, nnoise, &par_noise);
+  if (!strcmp(opts.mode, "fisher")) {
+    /* Fisher linear discriminant
+     *
+     * First calculate the direction w onto
+     * which project the data points. Then the
+     * cut which determines the class for each
+     * projected point.
+     */
+    double ratio = opts.nsig / (double)opts.nnoise;
+    gsl_vector *w = fisher_proj(signal, noise);
+    double t_cut  = fisher_cut(ratio, w, signal, noise);

-  /* Fisher linear discriminant
-   *
-   * First calculate the direction w onto
-   * which project the data points. Then the
-   * cut which determines the class for each
-   * projected point.
-   */
-  gsl_vector *w = fisher_proj(signal, noise);
-  double t_cut  = fisher_cut2(nsig / (double)nnoise,
-                              w, signal, noise);
+    fputs("# Linear Fisher discriminant\n\n", stderr);
+    fprintf(stderr, "* w: [%.3f, %.3f]\n",
+        gsl_vector_get(w, 0),
+        gsl_vector_get(w, 1));
+    fprintf(stderr, "* t_cut: %.3f\n", t_cut);

-  fputs("# Linear Fisher discriminant\n\n", stderr);
-  fprintf(stderr, "* w: [%.3f, %.3f]\n",
-      gsl_vector_get(w, 0),
-      gsl_vector_get(w, 1));
-  fprintf(stderr, "* t_cut: %.3f\n", t_cut);
-
-  gsl_vector_fprintf(stdout, w, "%g");
-  printf("%f\n", t_cut);
+    gsl_vector_fprintf(stdout, w, "%g");
+    printf("%f\n", t_cut);
+  }

  /* Print data to stdout for plotting.
-   * Note: we print the sizes  to be able
+   * Note: we print the sizes to be able
   * to set apart the two matrices.
   */
-  printf("%ld %ld %d\n", nsig, nnoise, 2);
+  printf("%ld %ld %d\n", opts.nsig, opts.nnoise, 2);
  gsl_matrix_fprintf(stdout, signal->data, "%g");
  gsl_matrix_fprintf(stdout, noise->data,  "%g");

--- a/4
+++ b/4
@ -12,7 +12,7 @@ ex-1/bin/%: ex-1/%.c
 ex-2/bin/%: ex-2/%.c
 	$(CCOMPILE)

-ex-3/bin/main: ex-3/common.c ex-3/likelihood.c ex-3/chisquared.c
+ex-3/bin/main: ex-3/main.c ex-3/common.c ex-3/likelihood.c ex-3/chisquared.c
 	$(CCOMPILE)

 ex-4/bin/main: ex-4/main.c
@ -24,7 +24,7 @@ ex-5/bin/%: ex-5/%.c
 ex-6/bin/main: ex-6/rl.c ex-6/fft.c
 	$(CCOMPILE)

-ex-7/main: ex-7/main.c
+ex-7/bin/main: ex-7/main.c ex-7/common.c ex-7/fisher.c
 	$(CCOMPILE)

 misc/pdfs: misc/pdfs.c