#include #include #include #include #include #include /* Parameters for bivariate * gaussian PDF */ struct par { double x0; // x mean double y0; // y mean double sigma_x; // x standard dev double sigma_y; // y standard dev double rho; // correlation: cov(x,y)/σx⋅σy }; /* A sample of N 2D points is an * N×2 matrix, with vectors as rows. */ typedef struct { struct par p; gsl_matrix *data; } sample_t; /* Create a sample of `n` points */ sample_t* sample_t_alloc(size_t n, struct par p) { sample_t *x = malloc(sizeof(sample_t)); x->p = p; x->data = gsl_matrix_alloc(n, 2); return x; } /* Delete a sample */ void sample_t_free(sample_t *x) { gsl_matrix_free(x->data); free(x); } /* `generate_normal(r, n, p)` will create * a sample of `n` points, distributed * according to a bivariate gaussian distribution * of parameters `p`. */ sample_t* generate_normal( gsl_rng *r, size_t n, struct par *p) { sample_t *s = sample_t_alloc(n, *p); for (size_t i = 0; i < n; i++) { /* Generate a vector (x,y) with * a standard (μ = 0) bivariate * gaussian PDF. */ double *x = gsl_matrix_ptr(s->data, i, 0); double *y = gsl_matrix_ptr(s->data, i, 1); gsl_ran_bivariate_gaussian( r, p->sigma_x, p->sigma_y, p->rho, x, y); /* Shift the vector to (x₀,y₀) */ *x += p->x0; *y += p->y0; } return s; } /* Builds the covariance matrix Σ * from the standard parameters (σ, ρ) * of a bivariate gaussian. */ gsl_matrix* normal_cov(struct par *p) { double var_x = pow(p->sigma_x, 2); double var_y = pow(p->sigma_y, 2); double cov_xy = p->rho * p->sigma_x * p->sigma_y; gsl_matrix *cov = gsl_matrix_alloc(2, 2); gsl_matrix_set(cov, 0, 0, var_x); gsl_matrix_set(cov, 1, 1, var_y); gsl_matrix_set(cov, 0, 1, cov_xy); gsl_matrix_set(cov, 1, 0, cov_xy); return cov; } /* Builds the mean vector of * a bivariate gaussian. */ gsl_vector* normal_mean(struct par *p) { gsl_vector *mu = gsl_vector_alloc(2); gsl_vector_set(mu, 0, p->x0); gsl_vector_set(mu, 1, p->y0); return mu; } /* `fisher_proj(c1, c2)` computes the optimal * projection map, which maximises the separation * between the two classes. * The projection vector w is given by * * w = Sw⁻¹ (μ₂ - μ₁) * * where Sw = Σ₁ + Σ₂ is the so-called within-class * covariance matrix. */ gsl_vector* fisher_proj(sample_t *c1, sample_t *c2) { /* Construct the covariances of each class... */ gsl_matrix *cov1 = normal_cov(&c1->p); gsl_matrix *cov2 = normal_cov(&c2->p); /* and the mean values */ gsl_vector *mu1 = normal_mean(&c1->p); gsl_vector *mu2 = normal_mean(&c2->p); /* Compute the inverse of the within-class * covariance Sw⁻¹. * Note: by definition Σ is symmetrical and * positive-definite, so Cholesky is appropriate. */ gsl_matrix_add(cov1, cov2); gsl_linalg_cholesky_decomp(cov1); gsl_linalg_cholesky_invert(cov1); /* Compute the difference of the means. */ gsl_vector *diff = gsl_vector_alloc(2); gsl_vector_memcpy(diff, mu2); gsl_vector_sub(diff, mu1); /* Finally multiply diff by Sw. * This uses the rather low-level CBLAS * functions gsl_blas_dgemv: * * ___ double ___ 1 ___ nothing * / / / * dgemv computes y := α op(A)x + βy * \ \__matrix-vector \____ 0 * \__ A is symmetric */ gsl_vector *w = gsl_vector_alloc(2); gsl_blas_dgemv( CblasNoTrans, // do nothing on A 1, // α = 1 cov1, // matrix A diff, // vector x 0, // β = 0 w); // vector y // free memory gsl_matrix_free(cov1); gsl_matrix_free(cov2); gsl_vector_free(mu1); gsl_vector_free(mu2); gsl_vector_free(diff); return w; } /* Computes the determinant from the * Cholesky decomposition of matrix. * In this case it's simply the product * of the diagonal elements, squared. */ double gsl_linalg_cholesky_det(gsl_matrix *LL) { gsl_vector diag = gsl_matrix_diagonal(LL).vector; double det = 1; for (size_t i = 0; i < LL->size1; i++) det *= gsl_vector_get(&diag, i); return det * det; } /* `fisher_cut(ratio, w, c1, c2)` computes * the threshold (cut), on the line given by * `w`, to discriminates the classes `c1`, `c2`; * with `ratio` being the ratio of their prior * probabilities. * * The cut is fixed by the condition of * conditional probability being the * same for each class: * * P(c₁|x) p(x|c₁)⋅p(c₁) * ------- = --------------- = 1; * P(c₂|x) p(x|c₁)⋅p(c₂) * * together with x = t⋅w. * * If p(x|c) is a bivariate normal PDF the * solution is found to be: * * t = (b/a) + √((b/a)² - c/a); * * where * * 1. a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w) * 2. b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂) * 3. c = (μ₁, Σ₁⁻¹μ₁) - (μ₂, Σ₂⁻¹μ₂) + log|Σ₂|/log|Σ₁| - 2 log(α) * 4. α = p(c₁)/p(c₂) * */ double fisher_cut( double ratio, gsl_vector *w, sample_t *c1, sample_t *c2) { /* Construct the covariances of each class... */ gsl_matrix *cov1 = normal_cov(&c1->p); gsl_matrix *cov2 = normal_cov(&c2->p); /* and the mean values */ gsl_vector *mu1 = normal_mean(&c1->p); gsl_vector *mu2 = normal_mean(&c2->p); /* Temporary vector/matrix for * intermediate results. */ gsl_matrix *mtemp = gsl_matrix_alloc(cov1->size1, cov1->size2); gsl_vector *vtemp = gsl_vector_alloc(w->size); /* Invert Σ₁ and Σ₂ in-place: * we only need the inverse matrices * in the steps to follow. */ gsl_linalg_cholesky_decomp(cov1); gsl_linalg_cholesky_decomp(cov2); // store determinant for later double det1 = gsl_linalg_cholesky_det(cov1); double det2 = gsl_linalg_cholesky_det(cov2); gsl_linalg_cholesky_invert(cov1); gsl_linalg_cholesky_invert(cov2); /* Compute the first term: * * a = (w, (Σ₁⁻¹ - Σ₂⁻¹)w) * */ // mtemp = cov1 - cov2 gsl_matrix_memcpy(mtemp, cov1); gsl_matrix_sub(mtemp, cov2); // vtemp = mtemp ⋅ vtemp gsl_vector_memcpy(vtemp, w); gsl_blas_dgemv(CblasNoTrans, 1, mtemp, w, 0, vtemp); // a = (w, vtemp) double a; gsl_blas_ddot(w, vtemp, &a); /* Compute the second term: * * b = (w, Σ₁⁻¹μ₁ - Σ₂⁻¹μ₂) * */ // vtemp = cov1 ⋅ mu1 // vtemp = cov2 ⋅ mu2 - vtemp gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp); gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, -1, vtemp); // b = (w, vtemp) double b; gsl_blas_ddot(w, vtemp, &b); /* Compute the last term: * * c = log|Σ₂|/|Σ₁| + (μ₂, Σ₂⁻¹μ₂) - (μ₁, Σ₁⁻¹μ₁) * */ double c, temp; c = log(det1 / det2) - 2*log(ratio); gsl_blas_dgemv(CblasNoTrans, 1, cov1, mu1, 0, vtemp); gsl_blas_ddot(mu1, vtemp, &temp); c += temp; gsl_blas_dgemv(CblasNoTrans, 1, cov2, mu2, 0, vtemp); gsl_blas_ddot(mu2, vtemp, &temp); c -= temp; /* To get the thresold value we have to * multiply t by |w| if not normalised */ double norm; gsl_blas_ddot(w, w, &norm); // free memory gsl_vector_free(mu1); gsl_vector_free(mu2); gsl_vector_free(vtemp); gsl_matrix_free(cov1); gsl_matrix_free(cov2); gsl_matrix_free(mtemp); return ((b/a) + sqrt(pow(b/a, 2) - c/a)) * norm; } /* `fisher_cut2(ratio, w, c1, c2)` computes * the threshold (cut), on the line given by * `w`, to discriminates the classes `c1`, `c2`; * with `ratio` being the ratio of their prior * probabilities. * * The cut is fixed by the condition of * conditional probability being the * same for each class: * * P(c₁|x) p(x|c₁)⋅p(c₁) * ------- = --------------- = 1; * P(c₂|x) p(x|c₁)⋅p(c₂) * * where p(x|c) is the probability for point x * along the fisher projection line. If the classes * are bivariate gaussian then p(x|c) is simply * given by a normal distribution: * * Φ(μ=(w,μ), σ=(w,Σw)) * * The solution is then * * t = (b/a) + √((b/a)² - c/a); * * where * * 1. a = S₁² - S₂² * 2. b = M₂S₁² - M₁S₂² * 3. c = M₂²S₁² - M₁²S₂² - 2S₁²S₂² log(α) * 4. α = p(c₁)/p(c₂) * */ double fisher_cut2( double ratio, gsl_vector *w, sample_t *c1, sample_t *c2) { gsl_vector *vtemp = gsl_vector_alloc(w->size); /* Construct the covariances of each class... */ gsl_matrix *cov1 = normal_cov(&c1->p); gsl_matrix *cov2 = normal_cov(&c2->p); /* and the mean values */ gsl_vector *mu1 = normal_mean(&c1->p); gsl_vector *mu2 = normal_mean(&c2->p); /* Project the distribution onto the * w line to get a 1D gaussian */ // mean double m1; gsl_blas_ddot(w, mu1, &m1); double m2; gsl_blas_ddot(w, mu2, &m2); // variances gsl_blas_dgemv(CblasNoTrans, 1, cov1, w, 0, vtemp); double var1; gsl_blas_ddot(w, vtemp, &var1); gsl_blas_dgemv(CblasNoTrans, 1, cov2, w, 0, vtemp); double var2; gsl_blas_ddot(w, vtemp, &var2); double a = var1 - var2; double b = m2*var1 + m1*var2; double c = m2*m2*var1 - m1*m1*var2 + 2*var1*var2 * log(ratio); // free memory gsl_vector_free(mu1); gsl_vector_free(mu2); gsl_vector_free(vtemp); gsl_matrix_free(cov1); gsl_matrix_free(cov2); return (b/a) + sqrt(pow(b/a, 2) - c/a); } int main(int argc, char **args) { // initialize RNG gsl_rng_env_setup(); gsl_rng *r = gsl_rng_alloc(gsl_rng_default); /* Generate two classes of normally * distributed 2D points with different * paramters: signal and noise. */ struct par par_sig = { 0, 0, 0.3, 0.3, 0.5 }; struct par par_noise = { 4, 4, 1.0, 1.0, 0.4 }; // sample sizes size_t nsig = 800; size_t nnoise = 1000; sample_t *signal = generate_normal(r, nsig, &par_sig); sample_t *noise = generate_normal(r, nnoise, &par_noise); /* Fisher linear discriminant * * First calculate the direction w onto * which project the data points. Then the * cut which determines the class for each * projected point. */ gsl_vector *w = fisher_proj(signal, noise); double t_cut = fisher_cut2(nsig / (double)nnoise, w, signal, noise); fputs("# Linear Fisher discriminant\n\n", stderr); fprintf(stderr, "* w: [%.3f, %.3f]\n", gsl_vector_get(w, 0), gsl_vector_get(w, 1)); fprintf(stderr, "* t_cut: %.3f\n", t_cut); gsl_vector_fprintf(stdout, w, "%g"); printf("%f\n", t_cut); /* Print data to stdout for plotting. * Note: we print the sizes to be able * to set apart the two matrices. */ printf("%ld %ld %d\n", nsig, nnoise, 2); gsl_matrix_fprintf(stdout, signal->data, "%g"); gsl_matrix_fprintf(stdout, noise->data, "%g"); // free memory gsl_rng_free(r); sample_t_free(signal); sample_t_free(noise); return EXIT_SUCCESS; }