#include "dist.h" /* Computes the earth mover's distance, aka 1-Wasserstein * metric, of two histograms. It gives the minimum cost * (weight * distance) associated to shifting around the * weights of the bins to turn one histogram into the other. * * For a 1D normalised histogram the distance can be proven * to be equal to the L¹ distance of the CDF. If the bins are * equally sized this can be futher simplified to give a simple * O(n) solution. */ double emd_between(gsl_histogram *a, gsl_histogram *b) { /* For the distance to make sense the * histograms must have the same area. */ normalise(a); normalise(b); /* Get the bin width used to compute the cost, * this assumes the histogram have equal ranges * and all bin widths are equal. */ double lower, upper; gsl_histogram_get_range(a, 0, &lower, &upper); double width = upper - lower; double sum = 0; double diff = 0; for (int i = 0; i < a->n; i++) { diff += gsl_histogram_get(a, i) - gsl_histogram_get(b, i); sum += fabs(diff); } /* Multiply the sum by the bin width to * compute the final cost */ return sum * width; } /* Normalise a histogram: ie rescaling the bin * weights to sum to 1 */ void normalise(gsl_histogram *h) { double sum = gsl_histogram_sum(h); gsl_histogram_scale(h, 1/sum); }