diff --git a/ex-1/kde-plot.py b/ex-1/kde-plot.py new file mode 100755 index 0000000..9cec239 --- /dev/null +++ b/ex-1/kde-plot.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +import numpy as np +import matplotlib.pyplot as plt + +from scipy.stats import moyal +from scipy.optimize import minimize + + +def gaussian_kde(dataset, points): + n = dataset.size + m = points.size + + var = np.var(dataset) * np.power(n*3/4, -2/5) * 0.4 + result = np.zeros(m) + + if m >= n: + for i in range(n): + diff = dataset[i] - points + result += np.exp(-diff**2/(2*var))/n + else: + for i in range(m): + diff = dataset - points[i] + result[i] = np.sum(np.exp(-diff**2 / (2*var))) + + return result / np.sqrt(2*np.pi*var) / n + + +def gaussian_kde_point(dataset, var, x): + var *= np.power(dataset.size * 3/4, -2/5) * 0.4 + val = np.sum(np.exp(-(x - dataset)**2 / (2*var))) / dataset.size + return val / np.sqrt(2*np.pi*var) + + +def hsm(x): + n = len(x) + while n > 3: + k = n//2 + diffs = x[k:] - x[:-k] + + i = np.where(diffs == np.min(diffs))[0] + i = int(np.mean(i)) + + if diffs[i] == 0: + return x[i] + else: + x = x[i:i+k] + n = len(x) + + if n == 3: + d = 2*x[1] - x[0] - x[2] + return np.mean(x[1:] if d > 0 else x[:2]) + + if n < 3: + return np.mean(x) + + +def bootstrap(f, x, n): + vals = np.empty(n) + for i in range(n): + s = np.random.choice(x, size=x.size) + vals[i] = f(np.sort(s)) + return vals + + +np.random.seed(1) +x = np.linspace(-2, 25, 400) +sample = moyal.rvs(size=5000, loc=4) + +s = np.argsort(sample) + +f = gaussian_kde(sample, x) + + +def kde(sample): + var = np.var(sample) + f = lambda x: -gaussian_kde_point(sample, var, x) + M = minimize(lambda x: f(x), hsm(sample)).x + return M + +m = bootstrap(hsm, sample, 500) +M = bootstrap(kde, sample, 500) + +print(m.mean(), m.std()) +print(M.mean(), M.std()) + +plt.figure() +plt.plot(x, moyal.pdf(x, loc=4), c='xkcd:gray', label='true PDF') +plt.plot(x, f, '#92182b', label='empirical PDF') +plt.plot(sample, np.zeros_like(sample), + '|', c='xkcd:navy') +#plt.axvline(x=m.mean(), c='r') +plt.axvline(x=M.mean(), c='xkcd:camel', label='mode') +plt.legend() +plt.show()