lab-II/lab.py

# coding: utf-8

from __future__ import division, unicode_literals

# special functions
from sympy.functions.special.gamma_functions import lowergamma, gamma
from sympy.functions.special.error_functions import erf
from mpmath                                  import hyp2f1

# uncertainty propagation
from uncertainties.core   import UFloat
from uncertainties        import ufloat, wrap, correlated_values
import uncertainties.unumpy as unp

# fit
from numpy.polynomial.polynomial import polyfit

import sys
import collections
import types
import numpy as np
import string


##
## Variables
##

def array(*args, **kwargs):
  """
  Shorthand for 1-dimensional array
  """
  return np.array(args)


def uarray(err, *val):
  """
  Shorthand for uncertain array with
  constant uncertainty
  """
  return unp.uarray(val, [err])


def nominal(x):
  """
  Nominal value of n-dimensional uncertain array
  """
  return unp.nominal_values(x)


def sigma(x):
  """
  Uncertainty of n-dimensional uncertain array
  """
  return unp.std_devs(x)


class sample(np.ndarray):
  """
  Sample type (ndarray subclass)
  Given a data sample outputs many statistical properties:
  n:    number of measures
  min:  minimum value
  max:  maximum value
  mean: sample mean
  med:  median value
  var:  sample variance
  std:  sample standard deviation
  stdm: standard deviation of the mean
  """
  __array_priority__ = 2

  def __new__(cls, *sample):
    if isinstance(sample[0], types.GeneratorType):
       sample = list(sample[0])
    s = np.asarray(sample).view(cls)
    return s

  def __str__(self):
    return format_measure(self.mean, self.stdm)

  def __array_finalize__(self, obj):
    if obj is None:
      return
    obj = np.asarray(obj)
    self.n    = len(obj)
    self.min  = np.min(obj)
    self.max  = np.max(obj)
    self.mean = np.mean(obj)
    self.med  = np.median(obj)
    self.var  = np.var(obj, ddof=1)
    self.std  = np.sqrt(self.var)
    self.stdm = self.std / np.sqrt(self.n)

  def val(self):
    """
    Gives the measure at standard 68% CL as an uncertain float (X, S/√n)
    """
    return ufloat(self.mean, self.stdm)

  def tval(self):
    """
    Gives the measure at 68% CL, calculated with the Student
    t-distribution, as an uncertain float (X, tS/√n)
    """
    return ufloat(self.mean, t(0.68, self.n-1) * self.stdm)


##
## Normal distribution
##

def phi(x, mu, sigma):
  """
  Normal CDF
  computes the probability P(x<a) given X is a normally distributed
  random variable with mean μ and standard deviation σ.
  """
  return float(1 + erf((x-mu) / (sigma*np.sqrt(2))))/2


def p(mu, sigma, a, b=None):
  """
  Normal CDF shorthand
  given a normally distributed random variable X, with mean μ
  and standard deviation σ, computes the probability P(a<X<b)
  or P(X<a) whether b is given.
  """
  if b:
    return phi(b, mu, sigma) - phi(a, mu, sigma) # P(a<X<b)
  return phi(a, mu, sigma)                       # P(x<a)


##
## χ² distribution
##

def chi(x, d):
  """
  χ² CDF
  given a χ² distribution with degrees of freedom
  computes the probability P(x<X^2)
  """
  return float(lowergamma(d/2, x/2)/gamma(d/2))


def q(x, d):
  """
  χ² 1-CDF
  given a χ² distribution with d degrees of freedom
  computes the probability P(X^2>x)
  """
  return 1 - chi(x, d) 


def chi_squared(X, O, B, s=2):
  """
  χ² test for a histogram
  given
  X: a normally distributed variable X
  O: ndarray of normalized absolute frequencies
  B: ndarray of bins delimiters
  s: number of constraints on X
  computes the probability P(χ²>χ₀²)
  """
  N, M  = X.n, len(O)
  d  = M-1-s
  dX = np.diff(B)

  E   = [N*p(X.mean, X.std, B[k], B[k+1]) for k in range(M)]
  Chi = sum((N*O*dX - E)**2/E)

  return q(Chi, d)


def chi_squared_fit(X, Y, f, sigma=None, s=2):
  """
  χ² test for fitted data
  given
  X: independent variable
  Y: dependent variable
  f: best fit function
  s: number of constraints on the data (optional)
  sigma: uncertainty on Y, number or ndarray (optional)
  """
  if sigma is None:
    sigma_Y = np.mean([i.std for i in Y])
  else:
    sigma_Y = np.asarray(sigma).mean()
  Chi = sum(((Y - f(X))/sigma_Y)**2)
  return q(Chi, Y.size-s)


##
## Student t-distribution
##

def t(p, n):
  """
  Student's t-distribution quantile
  """

  from scipy.stats import t
  dist = t(n-1)
  tm, tp = dist.interval(p)
  return (abs(tm) + tp)/2


##
## Regressions
##

def simple_linear(X, Y, sigma_Y):
  """
  Linear regression of line Y=kX
  """
  sigma   = np.asarray(sigma_Y).mean()
  k       = sum(X*Y) / sum(X**2)
  sigma_k = sigma / np.sqrt(sum(X**2))
  return ufloat(k, sigma_k)


def linear(X, Y, sigma_Y):
  """
  Linear regression of line Y=A+BX
  """
  sigma = np.asarray(sigma_Y).mean()
   
  N = len(Y)
  D = N*sum(X**2) - sum(X)**2
  A = (sum(X**2)*sum(Y) - sum(X)*sum(X*Y))/D
  B = (N*sum(X*Y) - sum(X)*sum(Y))/D

  sigma_A = sigma * np.sqrt(sum(X**2)/D)
  sigma_B = sigma * np.sqrt(N/D)

  return (ufloat(A, sigma_A),
          ufloat(B, sigma_B))


def polynomial(X, Y, d=2, sigma_Y=None):
  """
  d-th degree polynomial fit
  """
  if sigma_Y is None:
    weights = None
  elif isinstance(sigma_Y, collections.Iterable):
    weights = 1/np.asarray(sigma_Y)
  else:
    weights = np.repeat(1/sigma_Y, len(X))
  coeff, cov = np.polyfit(X, Y, d, w=weights,
                          cov=True, full=False)
  return correlated_values(coeff, cov)


def curve(X, Y, f, sigmaY=None, guess=None, **kwargs):
  """
  Any function fit
  """
  from scipy.optimize import curve_fit

  if sigmaY is None:
    weights = None
  elif isinstance(sigmaY, collections.Iterable):
    weights = np.asarray(sigmaY)
  else:
    weights = np.repeat(sigmaY, len(X))

  coeff, cov = curve_fit(f, X, Y, sigma=weights, p0=guess, **kwargs)
  if sigmaY is None:
    return lambda x: f(x, *coeff), [ufloat(i, np.nan) for i in coeff]
  return lambda x: f(x, *coeff), correlated_values(coeff, cov)


##
## Misc
##

def check_measures(X1, X2):
  """
  Checks whether the results of two set of measures
  are compatible with each other. Gives the α-value
  α=1-P(X within tσ) where t is the weighted difference
  of X₁ and X₂
  """
  t = np.abs(X1.n - X2.n)/np.sqrt(X1.s**2 + X2.s**2)
  return float(1 - erf(t/np.sqrt(2)))


def combine_measures(*variables):
  """
  Combines different (compatible) measures of the same
  quantity producing a new value with a smaller uncertainty
  than the individually taken ones 
  """
  W = np.array([1/i.s**2 for i in variables])
  X = np.array([i.n for i in variables])

  best  = sum(W*X)/sum(W)
  sigma = 1/np.sqrt(sum(W))

  return ufloat(best, sigma)


def format_measure(mean, sigma):
  """
  Formats a measure in the standard declaration
  """
  prec  = int(np.log10(abs(sigma)))
  limit = 2-prec if prec < 0 else 2 
  sigma = round(sigma, limit)
  mean  = round(mean, limit)

  return "{}±{}".format(mean, sigma)


class WithEmptyFormatter(string.Formatter):
  int_type = (int, long) if sys.version_info < (3,) else (int)

  def vformat(self, *args):
    self._automatic = None
    return super(WithEmptyFormatter, self).vformat(*args)

  def get_value(self, key, args, kwargs):
    if key == '':
      if self._automatic is None:
        self._automatic = 0
      elif self._automatic == -1:
        raise ValueError("cannot switch from manual field specification "
                         "to automatic field numbering")
      key = self._automatic
      self._automatic += 1
    elif isinstance(key, int_type):
      if self._automatic is None:
        self._automatic = -1
      elif self._automatic != -1:
        raise ValueError("cannot switch from automatic field numbering "
                         "to manual field specification")
    return super(WithEmptyFormatter, self).get_value(key, args, kwargs)


class MeasureFormatter(WithEmptyFormatter):
  def format_field(self, value, format_spec):
    if isinstance(value, UFloat):
      return value.format(format_spec+'P')
    else:
      return super(MeasureFormatter, self).format_field(value, format_spec)

mformat = MeasureFormatter().format

epsilon = 0.05