352 lines
7.9 KiB
Python
352 lines
7.9 KiB
Python
|
# coding: utf-8
|
|||
|
|
|||
|
from __future__ import division, unicode_literals
|
|||
|
|
|||
|
# special functions
|
|||
|
from sympy.functions.special.gamma_functions import lowergamma, gamma
|
|||
|
from sympy.functions.special.error_functions import erf
|
|||
|
from mpmath import hyp2f1
|
|||
|
|
|||
|
# uncertainty propagation
|
|||
|
from uncertainties.core import UFloat
|
|||
|
from uncertainties import ufloat, wrap, correlated_values
|
|||
|
import uncertainties.unumpy as unp
|
|||
|
|
|||
|
# fit
|
|||
|
from numpy.polynomial.polynomial import polyfit
|
|||
|
|
|||
|
import sys
|
|||
|
import collections
|
|||
|
import types
|
|||
|
import numpy as np
|
|||
|
import string
|
|||
|
|
|||
|
|
|||
|
##
|
|||
|
## Variables
|
|||
|
##
|
|||
|
|
|||
|
def array(*args, **kwargs):
|
|||
|
"""
|
|||
|
Shorthand for 1-dimensional array
|
|||
|
"""
|
|||
|
return np.array(args)
|
|||
|
|
|||
|
|
|||
|
def uarray(err, *val):
|
|||
|
"""
|
|||
|
Shorthand for uncertain array with
|
|||
|
constant uncertainty
|
|||
|
"""
|
|||
|
return unp.uarray(val, [err])
|
|||
|
|
|||
|
|
|||
|
def nominal(x):
|
|||
|
"""
|
|||
|
Nominal value of n-dimensional uncertain array
|
|||
|
"""
|
|||
|
return unp.nominal_values(x)
|
|||
|
|
|||
|
|
|||
|
def sigma(x):
|
|||
|
"""
|
|||
|
Uncertainty of n-dimensional uncertain array
|
|||
|
"""
|
|||
|
return unp.std_devs(x)
|
|||
|
|
|||
|
|
|||
|
class sample(np.ndarray):
|
|||
|
"""
|
|||
|
Sample type (ndarray subclass)
|
|||
|
Given a data sample outputs many statistical properties:
|
|||
|
n: number of measures
|
|||
|
min: minimum value
|
|||
|
max: maximum value
|
|||
|
mean: sample mean
|
|||
|
med: median value
|
|||
|
var: sample variance
|
|||
|
std: sample standard deviation
|
|||
|
stdm: standard deviation of the mean
|
|||
|
"""
|
|||
|
__array_priority__ = 2
|
|||
|
|
|||
|
def __new__(cls, *sample):
|
|||
|
if isinstance(sample[0], types.GeneratorType):
|
|||
|
sample = list(sample[0])
|
|||
|
s = np.asarray(sample).view(cls)
|
|||
|
return s
|
|||
|
|
|||
|
def __str__(self):
|
|||
|
return format_measure(self.mean, self.stdm)
|
|||
|
|
|||
|
def __array_finalize__(self, obj):
|
|||
|
if obj is None:
|
|||
|
return
|
|||
|
obj = np.asarray(obj)
|
|||
|
self.n = len(obj)
|
|||
|
self.min = np.min(obj)
|
|||
|
self.max = np.max(obj)
|
|||
|
self.mean = np.mean(obj)
|
|||
|
self.med = np.median(obj)
|
|||
|
self.var = np.var(obj, ddof=1)
|
|||
|
self.std = np.sqrt(self.var)
|
|||
|
self.stdm = self.std / np.sqrt(self.n)
|
|||
|
|
|||
|
def val(self):
|
|||
|
"""
|
|||
|
Gives the measure at standard 68% CL as an uncertain float (X, S/√n)
|
|||
|
"""
|
|||
|
return ufloat(self.mean, self.stdm)
|
|||
|
|
|||
|
def tval(self):
|
|||
|
"""
|
|||
|
Gives the measure at 68% CL, calculated with the Student
|
|||
|
t-distribution, as an uncertain float (X, tS/√n)
|
|||
|
"""
|
|||
|
return ufloat(self.mean, t(0.68, self.n-1) * self.stdm)
|
|||
|
|
|||
|
|
|||
|
##
|
|||
|
## Normal distribution
|
|||
|
##
|
|||
|
|
|||
|
def phi(x, mu, sigma):
|
|||
|
"""
|
|||
|
Normal CDF
|
|||
|
computes the probability P(x<a) given X is a normally distributed
|
|||
|
random variable with mean μ and standard deviation σ.
|
|||
|
"""
|
|||
|
return float(1 + erf((x-mu) / (sigma*np.sqrt(2))))/2
|
|||
|
|
|||
|
|
|||
|
def p(mu, sigma, a, b=None):
|
|||
|
"""
|
|||
|
Normal CDF shorthand
|
|||
|
given a normally distributed random variable X, with mean μ
|
|||
|
and standard deviation σ, computes the probability P(a<X<b)
|
|||
|
or P(X<a) whether b is given.
|
|||
|
"""
|
|||
|
if b:
|
|||
|
return phi(b, mu, sigma) - phi(a, mu, sigma) # P(a<X<b)
|
|||
|
return phi(a, mu, sigma) # P(x<a)
|
|||
|
|
|||
|
|
|||
|
##
|
|||
|
## χ² distribution
|
|||
|
##
|
|||
|
|
|||
|
def chi(x, d):
|
|||
|
"""
|
|||
|
χ² CDF
|
|||
|
given a χ² distribution with degrees of freedom
|
|||
|
computes the probability P(x<X^2)
|
|||
|
"""
|
|||
|
return float(lowergamma(d/2, x/2)/gamma(d/2))
|
|||
|
|
|||
|
|
|||
|
def q(x, d):
|
|||
|
"""
|
|||
|
χ² 1-CDF
|
|||
|
given a χ² distribution with d degrees of freedom
|
|||
|
computes the probability P(X^2>x)
|
|||
|
"""
|
|||
|
return 1 - chi(x, d)
|
|||
|
|
|||
|
|
|||
|
def chi_squared(X, O, B, s=2):
|
|||
|
"""
|
|||
|
χ² test for a histogram
|
|||
|
given
|
|||
|
X: a normally distributed variable X
|
|||
|
O: ndarray of normalized absolute frequencies
|
|||
|
B: ndarray of bins delimiters
|
|||
|
s: number of constraints on X
|
|||
|
computes the probability P(χ²>χ₀²)
|
|||
|
"""
|
|||
|
N, M = X.n, len(O)
|
|||
|
d = M-1-s
|
|||
|
dX = np.diff(B)
|
|||
|
|
|||
|
E = [N*p(X.mean, X.std, B[k], B[k+1]) for k in range(M)]
|
|||
|
Chi = sum((N*O*dX - E)**2/E)
|
|||
|
|
|||
|
return q(Chi, d)
|
|||
|
|
|||
|
|
|||
|
def chi_squared_fit(X, Y, f, sigma=None, s=2):
|
|||
|
"""
|
|||
|
χ² test for fitted data
|
|||
|
given
|
|||
|
X: independent variable
|
|||
|
Y: dependent variable
|
|||
|
f: best fit function
|
|||
|
s: number of constraints on the data (optional)
|
|||
|
sigma: uncertainty on Y, number or ndarray (optional)
|
|||
|
"""
|
|||
|
if sigma is None:
|
|||
|
sigma_Y = np.mean([i.std for i in Y])
|
|||
|
else:
|
|||
|
sigma_Y = np.asarray(sigma).mean()
|
|||
|
Chi = sum(((Y - f(X))/sigma_Y)**2)
|
|||
|
return q(Chi, Y.size-s)
|
|||
|
|
|||
|
|
|||
|
##
|
|||
|
## Student t-distribution
|
|||
|
##
|
|||
|
|
|||
|
def t(p, n):
|
|||
|
"""
|
|||
|
Student's t-distribution quantile
|
|||
|
"""
|
|||
|
|
|||
|
from scipy.stats import t
|
|||
|
dist = t(n-1)
|
|||
|
tm, tp = dist.interval(p)
|
|||
|
return (abs(tm) + tp)/2
|
|||
|
|
|||
|
|
|||
|
##
|
|||
|
## Regressions
|
|||
|
##
|
|||
|
|
|||
|
def simple_linear(X, Y, sigma_Y):
|
|||
|
"""
|
|||
|
Linear regression of line Y=kX
|
|||
|
"""
|
|||
|
sigma = np.asarray(sigma_Y).mean()
|
|||
|
k = sum(X*Y) / sum(X**2)
|
|||
|
sigma_k = sigma / np.sqrt(sum(X**2))
|
|||
|
return ufloat(k, sigma_k)
|
|||
|
|
|||
|
|
|||
|
def linear(X, Y, sigma_Y):
|
|||
|
"""
|
|||
|
Linear regression of line Y=A+BX
|
|||
|
"""
|
|||
|
sigma = np.asarray(sigma_Y).mean()
|
|||
|
|
|||
|
N = len(Y)
|
|||
|
D = N*sum(X**2) - sum(X)**2
|
|||
|
A = (sum(X**2)*sum(Y) - sum(X)*sum(X*Y))/D
|
|||
|
B = (N*sum(X*Y) - sum(X)*sum(Y))/D
|
|||
|
|
|||
|
sigma_A = sigma * np.sqrt(sum(X**2)/D)
|
|||
|
sigma_B = sigma * np.sqrt(N/D)
|
|||
|
|
|||
|
return (ufloat(A, sigma_A),
|
|||
|
ufloat(B, sigma_B))
|
|||
|
|
|||
|
|
|||
|
def polynomial(X, Y, d=2, sigma_Y=None):
|
|||
|
"""
|
|||
|
d-th degree polynomial fit
|
|||
|
"""
|
|||
|
if sigma_Y is None:
|
|||
|
weights = None
|
|||
|
elif isinstance(sigma_Y, collections.Iterable):
|
|||
|
weights = 1/np.asarray(sigma_Y)
|
|||
|
else:
|
|||
|
weights = np.repeat(1/sigma_Y, len(X))
|
|||
|
coeff, cov = np.polyfit(X, Y, d, w=weights,
|
|||
|
cov=True, full=False)
|
|||
|
return correlated_values(coeff, cov)
|
|||
|
|
|||
|
|
|||
|
def curve(X, Y, f, sigmaY=None, guess=None, **kwargs):
|
|||
|
"""
|
|||
|
Any function fit
|
|||
|
"""
|
|||
|
from scipy.optimize import curve_fit
|
|||
|
|
|||
|
if sigmaY is None:
|
|||
|
weights = None
|
|||
|
elif isinstance(sigmaY, collections.Iterable):
|
|||
|
weights = np.asarray(sigmaY)
|
|||
|
else:
|
|||
|
weights = np.repeat(sigmaY, len(X))
|
|||
|
|
|||
|
coeff, cov = curve_fit(f, X, Y, sigma=weights, p0=guess, **kwargs)
|
|||
|
if sigmaY is None:
|
|||
|
return lambda x: f(x, *coeff), [ufloat(i, np.nan) for i in coeff]
|
|||
|
return lambda x: f(x, *coeff), correlated_values(coeff, cov)
|
|||
|
|
|||
|
|
|||
|
##
|
|||
|
## Misc
|
|||
|
##
|
|||
|
|
|||
|
def check_measures(X1, X2):
|
|||
|
"""
|
|||
|
Checks whether the results of two set of measures
|
|||
|
are compatible with each other. Gives the α-value
|
|||
|
α=1-P(X within tσ) where t is the weighted difference
|
|||
|
of X₁ and X₂
|
|||
|
"""
|
|||
|
t = np.abs(X1.n - X2.n)/np.sqrt(X1.s**2 + X2.s**2)
|
|||
|
return float(1 - erf(t/np.sqrt(2)))
|
|||
|
|
|||
|
|
|||
|
def combine_measures(*variables):
|
|||
|
"""
|
|||
|
Combines different (compatible) measures of the same
|
|||
|
quantity producing a new value with a smaller uncertainty
|
|||
|
than the individually taken ones
|
|||
|
"""
|
|||
|
W = np.array([1/i.s**2 for i in variables])
|
|||
|
X = np.array([i.n for i in variables])
|
|||
|
|
|||
|
best = sum(W*X)/sum(W)
|
|||
|
sigma = 1/np.sqrt(sum(W))
|
|||
|
|
|||
|
return ufloat(best, sigma)
|
|||
|
|
|||
|
|
|||
|
def format_measure(mean, sigma):
|
|||
|
"""
|
|||
|
Formats a measure in the standard declaration
|
|||
|
"""
|
|||
|
prec = int(np.log10(abs(sigma)))
|
|||
|
limit = 2-prec if prec < 0 else 2
|
|||
|
sigma = round(sigma, limit)
|
|||
|
mean = round(mean, limit)
|
|||
|
|
|||
|
return "{}±{}".format(mean, sigma)
|
|||
|
|
|||
|
|
|||
|
class WithEmptyFormatter(string.Formatter):
|
|||
|
int_type = (int, long) if sys.version_info < (3,) else (int)
|
|||
|
|
|||
|
def vformat(self, *args):
|
|||
|
self._automatic = None
|
|||
|
return super(WithEmptyFormatter, self).vformat(*args)
|
|||
|
|
|||
|
def get_value(self, key, args, kwargs):
|
|||
|
if key == '':
|
|||
|
if self._automatic is None:
|
|||
|
self._automatic = 0
|
|||
|
elif self._automatic == -1:
|
|||
|
raise ValueError("cannot switch from manual field specification "
|
|||
|
"to automatic field numbering")
|
|||
|
key = self._automatic
|
|||
|
self._automatic += 1
|
|||
|
elif isinstance(key, int_type):
|
|||
|
if self._automatic is None:
|
|||
|
self._automatic = -1
|
|||
|
elif self._automatic != -1:
|
|||
|
raise ValueError("cannot switch from automatic field numbering "
|
|||
|
"to manual field specification")
|
|||
|
return super(WithEmptyFormatter, self).get_value(key, args, kwargs)
|
|||
|
|
|||
|
|
|||
|
class MeasureFormatter(WithEmptyFormatter):
|
|||
|
def format_field(self, value, format_spec):
|
|||
|
if isinstance(value, UFloat):
|
|||
|
return value.format(format_spec+'P')
|
|||
|
else:
|
|||
|
return super(MeasureFormatter, self).format_field(value, format_spec)
|
|||
|
|
|||
|
mformat = MeasureFormatter().format
|
|||
|
|
|||
|
epsilon = 0.05
|