lab-II/lab.py
2018-03-18 18:02:21 +01:00

352 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding: utf-8
from __future__ import division, unicode_literals
# special functions
from sympy.functions.special.gamma_functions import lowergamma, gamma
from sympy.functions.special.error_functions import erf
from mpmath import hyp2f1
# uncertainty propagation
from uncertainties.core import UFloat
from uncertainties import ufloat, wrap, correlated_values
import uncertainties.unumpy as unp
# fit
from numpy.polynomial.polynomial import polyfit
import sys
import collections
import types
import numpy as np
import string
##
## Variables
##
def array(*args, **kwargs):
"""
Shorthand for 1-dimensional array
"""
return np.array(args)
def uarray(err, *val):
"""
Shorthand for uncertain array with
constant uncertainty
"""
return unp.uarray(val, [err])
def nominal(x):
"""
Nominal value of n-dimensional uncertain array
"""
return unp.nominal_values(x)
def sigma(x):
"""
Uncertainty of n-dimensional uncertain array
"""
return unp.std_devs(x)
class sample(np.ndarray):
"""
Sample type (ndarray subclass)
Given a data sample outputs many statistical properties:
n: number of measures
min: minimum value
max: maximum value
mean: sample mean
med: median value
var: sample variance
std: sample standard deviation
stdm: standard deviation of the mean
"""
__array_priority__ = 2
def __new__(cls, *sample):
if isinstance(sample[0], types.GeneratorType):
sample = list(sample[0])
s = np.asarray(sample).view(cls)
return s
def __str__(self):
return format_measure(self.mean, self.stdm)
def __array_finalize__(self, obj):
if obj is None:
return
obj = np.asarray(obj)
self.n = len(obj)
self.min = np.min(obj)
self.max = np.max(obj)
self.mean = np.mean(obj)
self.med = np.median(obj)
self.var = np.var(obj, ddof=1)
self.std = np.sqrt(self.var)
self.stdm = self.std / np.sqrt(self.n)
def val(self):
"""
Gives the measure at standard 68% CL as an uncertain float (X, S/√n)
"""
return ufloat(self.mean, self.stdm)
def tval(self):
"""
Gives the measure at 68% CL, calculated with the Student
t-distribution, as an uncertain float (X, tS/√n)
"""
return ufloat(self.mean, t(0.68, self.n-1) * self.stdm)
##
## Normal distribution
##
def phi(x, mu, sigma):
"""
Normal CDF
computes the probability P(x<a) given X is a normally distributed
random variable with mean μ and standard deviation σ.
"""
return float(1 + erf((x-mu) / (sigma*np.sqrt(2))))/2
def p(mu, sigma, a, b=None):
"""
Normal CDF shorthand
given a normally distributed random variable X, with mean μ
and standard deviation σ, computes the probability P(a<X<b)
or P(X<a) whether b is given.
"""
if b:
return phi(b, mu, sigma) - phi(a, mu, sigma) # P(a<X<b)
return phi(a, mu, sigma) # P(x<a)
##
## χ² distribution
##
def chi(x, d):
"""
χ² CDF
given a χ² distribution with degrees of freedom
computes the probability P(x<X^2)
"""
return float(lowergamma(d/2, x/2)/gamma(d/2))
def q(x, d):
"""
χ² 1-CDF
given a χ² distribution with d degrees of freedom
computes the probability P(X^2>x)
"""
return 1 - chi(x, d)
def chi_squared(X, O, B, s=2):
"""
χ² test for a histogram
given
X: a normally distributed variable X
O: ndarray of normalized absolute frequencies
B: ndarray of bins delimiters
s: number of constraints on X
computes the probability P(χ²>χ₀²)
"""
N, M = X.n, len(O)
d = M-1-s
dX = np.diff(B)
E = [N*p(X.mean, X.std, B[k], B[k+1]) for k in range(M)]
Chi = sum((N*O*dX - E)**2/E)
return q(Chi, d)
def chi_squared_fit(X, Y, f, sigma=None, s=2):
"""
χ² test for fitted data
given
X: independent variable
Y: dependent variable
f: best fit function
s: number of constraints on the data (optional)
sigma: uncertainty on Y, number or ndarray (optional)
"""
if sigma is None:
sigma_Y = np.mean([i.std for i in Y])
else:
sigma_Y = np.asarray(sigma).mean()
Chi = sum(((Y - f(X))/sigma_Y)**2)
return q(Chi, Y.size-s)
##
## Student t-distribution
##
def t(p, n):
"""
Student's t-distribution quantile
"""
from scipy.stats import t
dist = t(n-1)
tm, tp = dist.interval(p)
return (abs(tm) + tp)/2
##
## Regressions
##
def simple_linear(X, Y, sigma_Y):
"""
Linear regression of line Y=kX
"""
sigma = np.asarray(sigma_Y).mean()
k = sum(X*Y) / sum(X**2)
sigma_k = sigma / np.sqrt(sum(X**2))
return ufloat(k, sigma_k)
def linear(X, Y, sigma_Y):
"""
Linear regression of line Y=A+BX
"""
sigma = np.asarray(sigma_Y).mean()
N = len(Y)
D = N*sum(X**2) - sum(X)**2
A = (sum(X**2)*sum(Y) - sum(X)*sum(X*Y))/D
B = (N*sum(X*Y) - sum(X)*sum(Y))/D
sigma_A = sigma * np.sqrt(sum(X**2)/D)
sigma_B = sigma * np.sqrt(N/D)
return (ufloat(A, sigma_A),
ufloat(B, sigma_B))
def polynomial(X, Y, d=2, sigma_Y=None):
"""
d-th degree polynomial fit
"""
if sigma_Y is None:
weights = None
elif isinstance(sigma_Y, collections.Iterable):
weights = 1/np.asarray(sigma_Y)
else:
weights = np.repeat(1/sigma_Y, len(X))
coeff, cov = np.polyfit(X, Y, d, w=weights,
cov=True, full=False)
return correlated_values(coeff, cov)
def curve(X, Y, f, sigmaY=None, guess=None, **kwargs):
"""
Any function fit
"""
from scipy.optimize import curve_fit
if sigmaY is None:
weights = None
elif isinstance(sigmaY, collections.Iterable):
weights = np.asarray(sigmaY)
else:
weights = np.repeat(sigmaY, len(X))
coeff, cov = curve_fit(f, X, Y, sigma=weights, p0=guess, **kwargs)
if sigmaY is None:
return lambda x: f(x, *coeff), [ufloat(i, np.nan) for i in coeff]
return lambda x: f(x, *coeff), correlated_values(coeff, cov)
##
## Misc
##
def check_measures(X1, X2):
"""
Checks whether the results of two set of measures
are compatible with each other. Gives the α-value
α=1-P(X within tσ) where t is the weighted difference
of X₁ and X₂
"""
t = np.abs(X1.n - X2.n)/np.sqrt(X1.s**2 + X2.s**2)
return float(1 - erf(t/np.sqrt(2)))
def combine_measures(*variables):
"""
Combines different (compatible) measures of the same
quantity producing a new value with a smaller uncertainty
than the individually taken ones
"""
W = np.array([1/i.s**2 for i in variables])
X = np.array([i.n for i in variables])
best = sum(W*X)/sum(W)
sigma = 1/np.sqrt(sum(W))
return ufloat(best, sigma)
def format_measure(mean, sigma):
"""
Formats a measure in the standard declaration
"""
prec = int(np.log10(abs(sigma)))
limit = 2-prec if prec < 0 else 2
sigma = round(sigma, limit)
mean = round(mean, limit)
return "{}±{}".format(mean, sigma)
class WithEmptyFormatter(string.Formatter):
int_type = (int, long) if sys.version_info < (3,) else (int)
def vformat(self, *args):
self._automatic = None
return super(WithEmptyFormatter, self).vformat(*args)
def get_value(self, key, args, kwargs):
if key == '':
if self._automatic is None:
self._automatic = 0
elif self._automatic == -1:
raise ValueError("cannot switch from manual field specification "
"to automatic field numbering")
key = self._automatic
self._automatic += 1
elif isinstance(key, int_type):
if self._automatic is None:
self._automatic = -1
elif self._automatic != -1:
raise ValueError("cannot switch from automatic field numbering "
"to manual field specification")
return super(WithEmptyFormatter, self).get_value(key, args, kwargs)
class MeasureFormatter(WithEmptyFormatter):
def format_field(self, value, format_spec):
if isinstance(value, UFloat):
return value.format(format_spec+'P')
else:
return super(MeasureFormatter, self).format_field(value, format_spec)
mformat = MeasureFormatter().format
epsilon = 0.05