# coding: utf-8 from __future__ import division, unicode_literals # special functions from sympy.functions.special.gamma_functions import lowergamma, gamma from sympy.functions.special.error_functions import erf from mpmath import hyp2f1 # uncertainty propagation from uncertainties.core import UFloat from uncertainties import ufloat, wrap, correlated_values import uncertainties.unumpy as unp # fit from numpy.polynomial.polynomial import polyfit import sys import collections import types import numpy as np import string ## ## Variables ## def array(*args, **kwargs): """ Shorthand for 1-dimensional array """ return np.array(args) def uarray(err, *val): """ Shorthand for uncertain array with constant uncertainty """ return unp.uarray(val, [err]) def nominal(x): """ Nominal value of n-dimensional uncertain array """ return unp.nominal_values(x) def sigma(x): """ Uncertainty of n-dimensional uncertain array """ return unp.std_devs(x) class sample(np.ndarray): """ Sample type (ndarray subclass) Given a data sample outputs many statistical properties: n: number of measures min: minimum value max: maximum value mean: sample mean med: median value var: sample variance std: sample standard deviation stdm: standard deviation of the mean """ __array_priority__ = 2 def __new__(cls, *sample): if isinstance(sample[0], types.GeneratorType): sample = list(sample[0]) s = np.asarray(sample).view(cls) return s def __str__(self): return format_measure(self.mean, self.stdm) def __array_finalize__(self, obj): if obj is None: return obj = np.asarray(obj) self.n = len(obj) self.min = np.min(obj) self.max = np.max(obj) self.mean = np.mean(obj) self.med = np.median(obj) self.var = np.var(obj, ddof=1) self.std = np.sqrt(self.var) self.stdm = self.std / np.sqrt(self.n) def val(self): """ Gives the measure at standard 68% CL as an uncertain float (X, S/√n) """ return ufloat(self.mean, self.stdm) def tval(self): """ Gives the measure at 68% CL, calculated with the Student t-distribution, as an uncertain float (X, tS/√n) """ return ufloat(self.mean, t(0.68, self.n-1) * self.stdm) ## ## Normal distribution ## def phi(x, mu, sigma): """ Normal CDF computes the probability P(xx) """ return 1 - chi(x, d) def chi_squared(X, O, B, s=2): """ χ² test for a histogram given X: a normally distributed variable X O: ndarray of normalized absolute frequencies B: ndarray of bins delimiters s: number of constraints on X computes the probability P(χ²>χ₀²) """ N, M = X.n, len(O) d = M-1-s dX = np.diff(B) E = [N*p(X.mean, X.std, B[k], B[k+1]) for k in range(M)] Chi = sum((N*O*dX - E)**2/E) return q(Chi, d) def chi_squared_fit(X, Y, f, sigma=None, s=2): """ χ² test for fitted data given X: independent variable Y: dependent variable f: best fit function s: number of constraints on the data (optional) sigma: uncertainty on Y, number or ndarray (optional) """ if sigma is None: sigma_Y = np.mean([i.std for i in Y]) else: sigma_Y = np.asarray(sigma).mean() Chi = sum(((Y - f(X))/sigma_Y)**2) return q(Chi, Y.size-s) ## ## Student t-distribution ## def t(p, n): """ Student's t-distribution quantile """ from scipy.stats import t dist = t(n-1) tm, tp = dist.interval(p) return (abs(tm) + tp)/2 ## ## Regressions ## def simple_linear(X, Y, sigma_Y): """ Linear regression of line Y=kX """ sigma = np.asarray(sigma_Y).mean() k = sum(X*Y) / sum(X**2) sigma_k = sigma / np.sqrt(sum(X**2)) return ufloat(k, sigma_k) def linear(X, Y, sigma_Y): """ Linear regression of line Y=A+BX """ sigma = np.asarray(sigma_Y).mean() N = len(Y) D = N*sum(X**2) - sum(X)**2 A = (sum(X**2)*sum(Y) - sum(X)*sum(X*Y))/D B = (N*sum(X*Y) - sum(X)*sum(Y))/D sigma_A = sigma * np.sqrt(sum(X**2)/D) sigma_B = sigma * np.sqrt(N/D) return (ufloat(A, sigma_A), ufloat(B, sigma_B)) def polynomial(X, Y, d=2, sigma_Y=None): """ d-th degree polynomial fit """ if sigma_Y is None: weights = None elif isinstance(sigma_Y, collections.Iterable): weights = 1/np.asarray(sigma_Y) else: weights = np.repeat(1/sigma_Y, len(X)) coeff, cov = np.polyfit(X, Y, d, w=weights, cov=True, full=False) return correlated_values(coeff, cov) def curve(X, Y, f, sigmaY=None, guess=None, **kwargs): """ Any function fit """ from scipy.optimize import curve_fit if sigmaY is None: weights = None elif isinstance(sigmaY, collections.Iterable): weights = np.asarray(sigmaY) else: weights = np.repeat(sigmaY, len(X)) coeff, cov = curve_fit(f, X, Y, sigma=weights, p0=guess, **kwargs) if sigmaY is None: return lambda x: f(x, *coeff), [ufloat(i, np.nan) for i in coeff] return lambda x: f(x, *coeff), correlated_values(coeff, cov) ## ## Misc ## def check_measures(X1, X2): """ Checks whether the results of two set of measures are compatible with each other. Gives the α-value α=1-P(X within tσ) where t is the weighted difference of X₁ and X₂ """ t = np.abs(X1.n - X2.n)/np.sqrt(X1.s**2 + X2.s**2) return float(1 - erf(t/np.sqrt(2))) def combine_measures(*variables): """ Combines different (compatible) measures of the same quantity producing a new value with a smaller uncertainty than the individually taken ones """ W = np.array([1/i.s**2 for i in variables]) X = np.array([i.n for i in variables]) best = sum(W*X)/sum(W) sigma = 1/np.sqrt(sum(W)) return ufloat(best, sigma) def format_measure(mean, sigma): """ Formats a measure in the standard declaration """ prec = int(np.log10(abs(sigma))) limit = 2-prec if prec < 0 else 2 sigma = round(sigma, limit) mean = round(mean, limit) return "{}±{}".format(mean, sigma) class WithEmptyFormatter(string.Formatter): int_type = (int, long) if sys.version_info < (3,) else (int) def vformat(self, *args): self._automatic = None return super(WithEmptyFormatter, self).vformat(*args) def get_value(self, key, args, kwargs): if key == '': if self._automatic is None: self._automatic = 0 elif self._automatic == -1: raise ValueError("cannot switch from manual field specification " "to automatic field numbering") key = self._automatic self._automatic += 1 elif isinstance(key, int_type): if self._automatic is None: self._automatic = -1 elif self._automatic != -1: raise ValueError("cannot switch from automatic field numbering " "to manual field specification") return super(WithEmptyFormatter, self).get_value(key, args, kwargs) class MeasureFormatter(WithEmptyFormatter): def format_field(self, value, format_spec): if isinstance(value, UFloat): return value.format(format_spec+'P') else: return super(MeasureFormatter, self).format_field(value, format_spec) mformat = MeasureFormatter().format epsilon = 0.05