ex-6: initial work on deconvolution testing

This commit is contained in:
Michele Guerini Rocco 2020-05-11 00:13:51 +02:00
parent 19981225f2
commit b865dd83ac
10 changed files with 507 additions and 158 deletions

138
ex-6/common.c Normal file
View File

@ -0,0 +1,138 @@
#include <math.h>
#include <gsl/gsl_sf.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_blas.h>
#include "common.h"
/* Intensity of the EM field at a large distance
* from a circular aperture diffraction of a plane
* wave.
*
* The intensity is given as a function of θ,
* the diffraction angle. All the other physical
* parameters are controlled by the structure `param`.
*
* See Fraunhöfer diffraction on "Hect - Optics" for
* a derivation of the formula.
*/
double intensity(double theta, struct param p) {
double x = p.k * p.a * sin(theta);
double R = p.l / cos(theta);
/* The intensity function has an eliminable
* discontinuoty at the origin, which must
* be handled separately.
*/
if (fabs(x) < 1e-15) {
return 0.5 * pow(p.e/R * M_PI * pow(p.a, 2), 2);
}
double y = 2*M_PI*pow(p.a, 2) * gsl_sf_bessel_J1(x) / x;
return 0.5 * pow(p.e * y/R, 2);
}
/* `gaussian_kernel(bins, sigma)` generates a convolution
* kernel for the intensity histogram with `bins` bins.
*
* The kernel is a gaussian PDF with μ at the central bin
* and σ equal to `sigma`. The size is 6% of `bins` and
* the bin width is the same of the intensity histogram:
* π/2 / `bins`.
*/
gsl_histogram* gaussian_kernel(size_t bins, double sigma) {
/* Set the size to 6% of the histogram.
* Always choose n even to correctly center the
* gaussian in the middle.
*/
size_t n = ((double) bins * 6) / 100;
n = n % 2 ? n : n+1;
double dx = (M_PI/2) / bins;
gsl_histogram *res = gsl_histogram_alloc(n);
gsl_histogram_set_ranges_uniform(res, 0, dx * n);
/* The histogram will be such that
* the maximum falls in the central bin.
*/
long int offset = (res->n)/2;
for (long int i = 0; i < (long int)res->n; i++)
res->bin[i] = gsl_ran_gaussian_pdf(
((double)(i - offset)) * dx, sigma * dx);
return res;
}
/* `gsl_vector_convolve(a, b, res)` computes the linear
* convolution of two vectors. The resulting vector
* has size `a->n + b->n - 1` and is stored in `res`.
*/
int gsl_vector_convolve(gsl_vector *a, gsl_vector *b, gsl_vector *res) {
size_t m = a->size;
size_t n = b->size;
/* Vector views for the overlap dot product */
gsl_vector_view va, vb;
/* Reverse `b` before convolving */
gsl_vector_reverse(b);
double dot;
size_t start_a, start_b, many;
for (size_t i = 0; i < m + n - 1; i++) {
/* Calculate the starting indices
* of the two vectors a,b overlap.
*/
if (i < n) start_a = 0;
else start_a = i - (n - 1);
if (i < n) start_b = (n - 1) - i;
else start_b = 0;
/* Calculate how many elements overlap */
if (i < n) many = i + 1;
else if (i >= m) many = n - (i - m) - 1;
else many = n;
/* Take the dot product of the overlap
* and store it in the resulting histogram
*/
va = gsl_vector_subvector(a, start_a, many);
vb = gsl_vector_subvector(b, start_b, many);
gsl_blas_ddot(&va.vector, &vb.vector, &dot);
gsl_vector_set(res, i, dot);
}
/* Put b back together */
gsl_vector_reverse(b);
return GSL_SUCCESS;
}
/* Convolves two histograms while keeping the
* original bin edges. This is a simple wrapper
* function around `gsl_vector_convolve`.
*/
gsl_histogram* histogram_convolve(gsl_histogram *a, gsl_histogram *b) {
gsl_histogram *res = gsl_histogram_calloc(a->n + b->n - 1);
/* Set the same edges as `a`*/
double max = gsl_histogram_max(a);
double min = gsl_histogram_min(a);
gsl_histogram_set_ranges_uniform(res, min, max);
/* Create vector views for everybody */
gsl_vector_view va = gsl_vector_view_array(a->bin, a->n);
gsl_vector_view vb = gsl_vector_view_array(b->bin, b->n);
gsl_vector_view vres = gsl_vector_view_array(res->bin, res->n);
gsl_vector_convolve(&va.vector, &vb.vector, &vres.vector);
return res;
}

51
ex-6/common.h Normal file
View File

@ -0,0 +1,51 @@
#pragma once
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_vector_double.h>
/* Intensity function parameters */
struct param {
double a; // aperture radius
double k; // wavenumber
double e; // wave amplitude
double l; // screen distance
};
/* Intensity of the EM field at a large distance
* from a circular aperture diffraction of a plane
* wave.
*
* The intensity is given as a function of θ,
* the diffraction angle. All the other physical
* parameters are controlled by the structure `param`.
*
* See Fraunhöfer diffraction on "Hect - Optics" for
* a derivation of the formula.
*/
double intensity(double theta, struct param p);
/* `gaussian_kernel(bins, sigma)` generates a convolution
* kernel for the intensity histogram with `bins` bins.
*
* The kernel is a gaussian PDF with μ at the central bin
* and σ equal to `sigma`. The size is 6% of `bins` and
* the bin width is the same of the intensity histogram:
* π/2 / `bins`.
*/
gsl_histogram* gaussian_kernel(size_t bins, double sigma);
/* `gsl_vector_convolve(a, b, res)` computes the linear
* convolution of two vectors. The resulting vector
* has size `a->n + b->n - 1` and is stored in `res`.
*/
int gsl_vector_convolve(gsl_vector *a, gsl_vector *b, gsl_vector *res);
/* Convolves two histograms while keeping the
* original bin edges. This is a simple wrapper
* function around `gsl_vector_convolve`.
*/
gsl_histogram* histogram_convolve(gsl_histogram *a, gsl_histogram *b);

24
ex-6/dist-plot.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python
from pylab import *
import sys
rcParams['font.size'] = 12
n = int(input())
a, b, f = loadtxt(sys.stdin, unpack=True)
subplot(121)
title('FFT')
hist(a[:n], insert(b[:n], 0, a[0]), weights=f[:n],
histtype='stepfilled', color='#e3c5ca', edgecolor='#92182b')
ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
subplot(122)
title('RL')
hist(a[n:], insert(b[n:], 0, a[n]), weights=f[n:],
histtype='stepfilled', color='#e3c5ca', edgecolor='#92182b')
ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
tight_layout()
show()

48
ex-6/dist.c Normal file
View File

@ -0,0 +1,48 @@
#include "dist.h"
/* Computes the earth mover's distance, aka 1-Wasserstein
* metric, of two histograms. It gives the minimum cost
* (weight * distance) associated to shifting around the
* weights of the bins to turn one histogram into the other.
*
* For a 1D normalised histogram the distance can be proven
* to be equal to the L¹ distance of the CDF. If the bins are
* equally sized this can be futher simplified to give a simple
* O(n) solution.
*/
double edm_between(gsl_histogram *a, gsl_histogram *b) {
/* For the distance to make sense the
* histograms must have the same area.
*/
normalise(a);
normalise(b);
/* Get the bin width used to compute the cost,
* this assumes the histogram have equal ranges
* and all bin widths are equal.
*/
double lower, upper;
gsl_histogram_get_range(a, 0, &lower, &upper);
double width = upper - lower;
double sum = 0;
double diff = 0;
for (int i = 0; i < a->n; i++) {
diff += gsl_histogram_get(a, i) - gsl_histogram_get(b, i);
sum += fabs(diff);
}
/* Multiply the sum by the bin width to
* compute the final cost
*/
return sum * width;
}
/* Normalise a histogram: ie rescaling the bin
* weights to sum to 1
*/
void normalise(gsl_histogram *h) {
double sum = gsl_histogram_sum(h);
gsl_histogram_scale(h, 1/sum);
}

20
ex-6/dist.h Normal file
View File

@ -0,0 +1,20 @@
#include <math.h>
#include <gsl/gsl_histogram.h>
/* Computes the earth mover's distance, aka 1-Wasserstein
* metric, of two histograms. It gives the minimum cost
* (weight * distance) associated to shifting around the
* weights of the bins to turn one histogram into the other.
*
* For a 1D normalised histogram the distance can be proven
* to be equal to the L¹ distance of the CDF. If the bins are
* equally sized this can be futher simplified to give a simple
* O(n) solution.
*/
double edm_between(gsl_histogram *a, gsl_histogram *b);
/* Normalise a histogram: ie rescaling the bin
* weights to sum to 1
*/
void normalise(gsl_histogram *h);

View File

@ -2,7 +2,8 @@
#include <string.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_sf.h>
#include "common.h"
#include "fft.h"
#include "rl.h"
@ -20,101 +21,6 @@ struct options {
};
/* Intensity function parameters */
struct param {
double a; // aperture radius
double k; // wavenumber
double e; // wave amplitude
double l; // screen distance
};
/* Intensity of the EM field at a large distance
* from a circular aperture diffraction of a plane
* wave.
*
* The intensity is given as a function of θ,
* the diffraction angle. All the other physical
* parameters are controlled by the structure `param`.
*
* See Fraunhöfer diffraction on "Hect - Optics" for
* a derivation of the formula.
*/
double intensity(double theta, struct param p) {
double x = p.k * p.a * sin(theta);
double R = p.l / cos(theta);
/* The intensity function has an eliminable
* discontinuoty at the origin, which must
* be handled separately.
*/
if (fabs(x) < 1e-15) {
return 0.5 * pow(p.e/R * M_PI * pow(p.a, 2), 2);
}
double y = 2*M_PI*pow(p.a, 2) * gsl_sf_bessel_J1(x) / x;
return 0.5 * pow(p.e * y/R, 2);
}
/* `gaussian_for(hist, sigma)` generates a histogram
* with the same bin width of `hist` but smaller,
* with a gaussian PDF with μ at the central bin
* and σ equal to `sigma`.
*/
gsl_histogram* gaussian_for(gsl_histogram *hist, double sigma) {
/* Set the size to 6% of the histogram.
* Always choose n even to correctly center the
* gaussian in the middle.
*/
size_t n = ((double) hist->n * 6) / 100;
n = n % 2 ? n : n+1;
/* Calculate the (single) bin width assuming
* the ranges are uniformely spaced
*/
double min, max;
gsl_histogram_get_range(hist, 0, &min, &max);
double dx = max - min;
gsl_histogram *res = gsl_histogram_alloc(n);
gsl_histogram_set_ranges_uniform(res, 0, dx * n);
/* The histogram will be such that
* the maximum falls in the central bin.
*/
long int offset = (res->n)/2;
for (long int i = 0; i < (long int)res->n; i++)
res->bin[i] = gsl_ran_gaussian_pdf(
((double)(i - offset)) * dx, sigma * dx);
return res;
}
/* Convolves two histograms while keeping the
* original bin edges. This is a simple wrapper
* function around `gsl_vector_convolve`.
*/
gsl_histogram* histogram_convolve(gsl_histogram *a, gsl_histogram *b) {
gsl_histogram *res = gsl_histogram_calloc(a->n + b->n - 1);
/* Set the same edges as `a`*/
double max = gsl_histogram_max(a);
double min = gsl_histogram_min(a);
gsl_histogram_set_ranges_uniform(res, min, max);
/* Create vector views for everybody */
gsl_vector_view va = gsl_vector_view_array(a->bin, a->n);
gsl_vector_view vb = gsl_vector_view_array(b->bin, b->n);
gsl_vector_view vres = gsl_vector_view_array(res->bin, res->n);
gsl_vector_convolve(&va.vector, &vb.vector, &vres.vector);
return res;
}
int show_help(char **argv) {
fprintf(stderr, "Usage: %s -[hcdoebsrmn]\n", argv[0]);
fprintf(stderr, " -h\t\tShow this message.\n");
@ -192,7 +98,7 @@ int main(int argc, char **argv) {
*/
fputs("\n# convolution\n", stderr);
fputs("1. generating gaussian kernel...", stderr);
gsl_histogram *kernel = gaussian_for(hist, opts.sigma);
gsl_histogram *kernel = gaussian_kernel(hist->n, opts.sigma);
fputs("done\n", stderr);
fputs("2. convolving...", stderr);

View File

@ -1,53 +1,7 @@
#include "rl.h"
#include <gsl/gsl_blas.h>
#include <math.h>
/* `gsl_vector_convolve(a, b, res)` computes the linear
* convolution of two vectors. The resulting vector
* has size `a->n + b->n - 1` and is stored in `res`.
*/
int gsl_vector_convolve(gsl_vector *a, gsl_vector *b, gsl_vector *res) {
size_t m = a->size;
size_t n = b->size;
/* Vector views for the overlap dot product */
gsl_vector_view va, vb;
/* Reverse `b` before convolving */
gsl_vector_reverse(b);
double dot;
size_t start_a, start_b, many;
for (size_t i = 0; i < m + n - 1; i++) {
/* Calculate the starting indices
* of the two vectors a,b overlap.
*/
if (i < n) start_a = 0;
else start_a = i - (n - 1);
if (i < n) start_b = (n - 1) - i;
else start_b = 0;
/* Calculate how many elements overlap */
if (i < n) many = i + 1;
else if (i >= m) many = n - (i - m) - 1;
else many = n;
/* Take the dot product of the overlap
* and store it in the resulting histogram
*/
va = gsl_vector_subvector(a, start_a, many);
vb = gsl_vector_subvector(b, start_b, many);
gsl_blas_ddot(&va.vector, &vb.vector, &dot);
gsl_vector_set(res, i, dot);
}
/* Put b back together */
gsl_vector_reverse(b);
return GSL_SUCCESS;
}
#include "common.h"
#include "rl.h"
/* Performs the Richardson-Lucy deconvolution.
@ -127,7 +81,6 @@ gsl_histogram* rl_deconvolve(
/* Set NaNs to zero */
for (size_t i = 0; i < rel_blur->size; i++)
if (isnan(gsl_vector_get(rel_blur, i))) {
fprintf(stderr, "gotcha: %ld!!\n", i);
double y = (i > 0)? gsl_vector_get(rel_blur, i - 1) : 1;
gsl_vector_set(rel_blur, i, y);
}

View File

@ -1,20 +1,10 @@
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_vector.h>
/* `rl_deconvolve(data, noise, rounds)`
* tries to deconvolve `noise` from
* `data` in `rounds` rounds.
* */
*/
gsl_histogram* rl_deconvolve(
gsl_histogram *data,
gsl_histogram *noise,
size_t rounds);
/* `convolve(a, b)` computes the linear convolution
* of two histograms. The convolted histogram
* has length `a->n + b->n - 1`.
*/
int gsl_vector_convolve(
gsl_vector *a,
gsl_vector *b,
gsl_vector *res);

217
ex-6/test.c Normal file
View File

@ -0,0 +1,217 @@
#include <math.h>
#include <string.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_statistics_double.h>
#include "common.h"
#include "fft.h"
#include "rl.h"
#include "dist.h"
/* Program options */
struct options {
size_t num_events;
size_t num_exps;
size_t bins;
double sigma;
size_t rounds;
const char* mode;
double noise;
};
/* A pair of `double`s */
typedef struct {
double a;
double b;
} pair_t;
int show_help(char **argv) {
fprintf(stderr, "Usage: %s -[hcdoebsrmn]\n", argv[0]);
fprintf(stderr, " -h\t\tShow this message.\n");
fprintf(stderr, " -c\t\tPrint the convolved histogram to stdout.\n");
fprintf(stderr, " -d\t\tAttempt and print the deconvolved histogram.\n");
fprintf(stderr, " -o\t\tPrint the original histogram to stdout.\n");
fprintf(stderr, " -e N\t\tThe number of events. (default: 50000)\n");
fprintf(stderr, " -b N\t\tThe number of θ bins. (default: 150)\n");
fprintf(stderr, " -s SIGMA\tThe sigma of gaussian kernel. "
"(default: 0.8)\n");
fprintf(stderr, " -r N\t\tThe number of RL deconvolution rounds."
"(default: 3)\n");
fprintf(stderr, " -m MODE\tThe deconvolution mode: 'fft' or 'rl'."
"(default: 'fft')\n");
fprintf(stderr, " -n SIGMA\tThe σ of the gaussian noise to add to "
"the convolution. (default: 0)\n");
return EXIT_SUCCESS;
}
/* Performs an experiment consisting in
*
* 1. Measuring the distribution I(θ) by reverse
* sampling from an RNG;
* 2. Convolving the I(θ) sample with a kernel
* to simulate the instrumentation response;
* 3. Applying a gaussian noise with σ=opts.noise
* 4. Deconvolving the result with both the
* Richardson-Lucy (RL) algorithm and the Fourier
* transform (FFT) method;
* 5. Computing the Earth Mover's Distance (EDM) between
* the deconvolution results and the original,
* uncorrupted sample.
*
* The function returns a pair of the FFT and RL
* distances, in this order.
*/
pair_t experiment(
struct options opts,
gsl_rng *r,
gsl_histogram *kernel) {
struct param p = { 0.01, 0.0001, 1e4, 1 };
/* Sample events following the intensity
* of a circular aperture diffraction I(θ)
* while producing a histogram.
*/
gsl_histogram* hist = gsl_histogram_alloc(opts.bins);
gsl_histogram_set_ranges_uniform(hist, 0, M_PI/2);
for (size_t i = 0; i < opts.num_events; i++){
double theta;
do {
// uniform sphere polar angle
theta = acos(1 - gsl_rng_uniform(r));
} while(intensity(0, p) * gsl_rng_uniform(r) > intensity(theta, p));
gsl_histogram_increment(hist, theta);
}
/* Convolve the hisogram with the kernel */
gsl_histogram *conv = histogram_convolve(hist, kernel);
/* Add gaussian noise with σ=opts.noise */
if (opts.noise > 0) {
for (size_t i = 0; i < conv->n; i++)
conv->bin[i] += conv->bin[i] * gsl_ran_gaussian(r, opts.noise);
}
/* Deconvolve the histogram with both methods: RL and FFT.
* The FFT is used as reference for the distance.
*/
gsl_histogram *fft_clean = fft_deconvolve(conv, kernel);
gsl_histogram *rl_clean = rl_deconvolve(conv, kernel, opts.rounds);
/* Compute the earth mover's distances from the original
* histogram and store add each one to the respective
* distance histogram.
*/
pair_t dist;
dist.a = edm_between(hist, fft_clean);
dist.b = edm_between(hist, rl_clean);
// free memory
gsl_histogram_free(hist);
gsl_histogram_free(conv);
gsl_histogram_free(fft_clean);
gsl_histogram_free(rl_clean);
return dist;
}
int main(int argc, char **argv) {
struct options opts;
/* Set default options */
opts.num_events = 50000;
opts.num_exps = 1000;
opts.bins = 150;
opts.sigma = 0.8;
opts.rounds = 3;
opts.mode = "fft";
opts.noise = 0;
/* Process CLI arguments */
for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-e")) opts.num_events = atol(argv[++i]);
else if (!strcmp(argv[i], "-x")) opts.num_exps = atol(argv[++i]);
else if (!strcmp(argv[i], "-b")) opts.bins = atol(argv[++i]);
else if (!strcmp(argv[i], "-s")) opts.sigma = atof(argv[++i]);
else if (!strcmp(argv[i], "-r")) opts.rounds = atol(argv[++i]);
else if (!strcmp(argv[i], "-n")) opts.noise = atof(argv[++i]);
else return show_help(argv);
}
/* Initialize an RNG. */
gsl_rng_env_setup();
gsl_rng *r = gsl_rng_alloc(gsl_rng_default);
/* Generate the gaussian kernel */
gsl_histogram *kernel = gaussian_kernel(opts.bins, opts.sigma);
/* Performs experiments and record the deconvolution
* EDM distances to the original sample.
*/
double* fft_dist = calloc(opts.num_exps, sizeof(double));
double* rl_dist = calloc(opts.num_exps, sizeof(double));
for (size_t i = 0; i < opts.num_exps; i++) {
pair_t dist = experiment(opts, r, kernel);
fft_dist[i] = dist.a;
rl_dist[i] = dist.b;
}
/* Compute some statistics of the EDM */
// 1 is the array stride.
double fft_mean = gsl_stats_mean(fft_dist, 1, opts.num_exps);
double fft_stdev = gsl_stats_sd(fft_dist, 1, opts.num_exps);
double fft_skew = gsl_stats_skew_m_sd(fft_dist, 1, opts.num_exps, fft_mean, fft_stdev);
double fft_min, fft_max; gsl_stats_minmax(&fft_min, &fft_max, fft_dist, 1, opts.num_exps);
double rl_mean = gsl_stats_mean(rl_dist, 1, opts.num_exps);
double rl_stdev = gsl_stats_sd(rl_dist, 1, opts.num_exps);
double rl_skew = gsl_stats_skew_m_sd(rl_dist, 1, opts.num_exps, rl_mean, rl_stdev);
double rl_min, rl_max; gsl_stats_minmax(&rl_min, &rl_max, rl_dist, 1, opts.num_exps);
/* Create EDM distance histograms.
* Since the distance depends wildly on the noise we can't
* set a fixed range and therefore use the above values.
*/
gsl_histogram *fft_hist = gsl_histogram_alloc(sqrt(opts.num_exps));
gsl_histogram *rl_hist = gsl_histogram_alloc(sqrt(opts.num_exps));
gsl_histogram_set_ranges_uniform(fft_hist, fft_min, fft_max);
gsl_histogram_set_ranges_uniform(rl_hist, rl_min, rl_max);
for (size_t i = 0; i < opts.num_exps; i++) {
gsl_histogram_increment(fft_hist, fft_dist[i]);
gsl_histogram_increment( rl_hist, rl_dist[i]);
}
// print results to stderr
fprintf(stderr, "# EDM distance\n\n");
fprintf(stderr, "## FFT deconvolution\n");
fprintf(stderr, "- mean: %.2e\n"
"- stdev: %.1e\n"
"- skew: %.2f\n", fft_mean, fft_stdev, fft_skew);
fprintf(stderr, "\n## RL deconvolution\n");
fprintf(stderr, "- mean: %.2e\n"
"- stdev: %.1e\n"
"- skew: %.2f\n", rl_mean, rl_stdev, rl_skew);
// print histograms to stdout
fprintf(stderr, "\n# EDM histogram\n");
fprintf(stdout, "%d\n", (int)sqrt(opts.num_exps));
gsl_histogram_fprintf(stdout, fft_hist, "%g", "%g");
gsl_histogram_fprintf(stdout, rl_hist, "%g", "%g");
// free memory
gsl_rng_free(r);
gsl_histogram_free(kernel);
gsl_histogram_free(fft_hist);
gsl_histogram_free(rl_hist);
free(fft_dist);
free(rl_dist);
return EXIT_SUCCESS;
}

View File

@ -31,7 +31,9 @@ ex-5/bin/%: ex-5/%.c
$(CCOMPILE)
ex-6: ex-6/bin/main
ex-6/bin/main: ex-6/main.c ex-6/rl.c ex-6/fft.c
ex-6/bin/main: ex-6/main.c ex-6/common.c ex-6/rl.c ex-6/fft.c
$(CCOMPILE)
ex-6/bin/test: ex-6/test.c ex-6/common.c ex-6/rl.c ex-6/fft.c ex-6/dist.c
$(CCOMPILE)
ex-7: ex-7/bin/main ex-7/bin/test