analistica/ex-1/plots/kde.py

96 lines
2.1 KiB
Python
Raw Permalink Normal View History

2020-04-26 00:00:06 +02:00
#!/usr/bin/env python
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import moyal
from scipy.optimize import minimize
def gaussian_kde(dataset, points):
n = dataset.size
m = points.size
var = np.var(dataset) * np.power(n*3/4, -2/5) * 0.4
result = np.zeros(m)
if m >= n:
for i in range(n):
diff = dataset[i] - points
result += np.exp(-diff**2/(2*var))/n
else:
for i in range(m):
diff = dataset - points[i]
result[i] = np.sum(np.exp(-diff**2 / (2*var)))
return result / np.sqrt(2*np.pi*var) / n
def gaussian_kde_point(dataset, var, x):
var *= np.power(dataset.size * 3/4, -2/5) * 0.4
val = np.sum(np.exp(-(x - dataset)**2 / (2*var))) / dataset.size
return val / np.sqrt(2*np.pi*var)
def hsm(x):
n = len(x)
while n > 3:
k = n//2
diffs = x[k:] - x[:-k]
i = np.where(diffs == np.min(diffs))[0]
i = int(np.mean(i))
if diffs[i] == 0:
return x[i]
else:
x = x[i:i+k]
n = len(x)
if n == 3:
d = 2*x[1] - x[0] - x[2]
return np.mean(x[1:] if d > 0 else x[:2])
if n < 3:
return np.mean(x)
def bootstrap(f, x, n):
vals = np.empty(n)
for i in range(n):
s = np.random.choice(x, size=x.size)
vals[i] = f(np.sort(s))
return vals
np.random.seed(1)
x = np.linspace(-2, 25, 400)
sample = moyal.rvs(size=5000, loc=4)
s = np.argsort(sample)
f = gaussian_kde(sample, x)
def kde(sample):
var = np.var(sample)
f = lambda x: -gaussian_kde_point(sample, var, x)
M = minimize(lambda x: f(x), hsm(sample)).x
return M
m = bootstrap(hsm, sample, 500)
M = bootstrap(kde, sample, 500)
print(m.mean(), m.std())
print(M.mean(), M.std())
plt.figure()
plt.plot(x, moyal.pdf(x, loc=4), c='xkcd:gray', label='true PDF')
plt.plot(x, f, '#92182b', label='empirical PDF')
plt.plot(sample, np.zeros_like(sample),
'|', c='xkcd:navy')
plt.axvline(x=m.mean(), c='r')
2020-04-26 00:00:06 +02:00
plt.axvline(x=M.mean(), c='xkcd:camel', label='mode')
plt.legend()
plt.show()