ex-3: revised and typo-fixed
This commit is contained in:
parent
b865dd83ac
commit
4aa8eefaf9
12
ex-3/main.c
12
ex-3/main.c
@ -85,12 +85,16 @@ int main(int argc, char **argv) {
|
|||||||
fputs("# event sampling\n\n", stderr);
|
fputs("# event sampling\n\n", stderr);
|
||||||
fprintf(stderr, "generating %ld events...", opts.num_events);
|
fprintf(stderr, "generating %ld events...", opts.num_events);
|
||||||
struct event *e;
|
struct event *e;
|
||||||
|
// for (size_t i = 0; i < s.size; i++){
|
||||||
|
// e = &s.events[i];
|
||||||
|
// do {
|
||||||
|
// e->th = acos(1 - 2*gsl_rng_uniform(r));
|
||||||
|
// e->ph = 2 * M_PI * gsl_rng_uniform(r);
|
||||||
|
// } while(0.2 * gsl_rng_uniform(r) > distr(def_par, e));
|
||||||
for (size_t i = 0; i < s.size; i++){
|
for (size_t i = 0; i < s.size; i++){
|
||||||
e = &s.events[i];
|
e = &s.events[i];
|
||||||
do {
|
e->th = acos(1 - 2*gsl_rng_uniform(r));
|
||||||
e->th = acos(1 - 2*gsl_rng_uniform(r));
|
e->ph = 2 * M_PI * gsl_rng_uniform(r);
|
||||||
e->ph = 2 * M_PI * gsl_rng_uniform(r);
|
|
||||||
} while(0.2 * gsl_rng_uniform(r) > distr(def_par, e));
|
|
||||||
|
|
||||||
// update the histogram
|
// update the histogram
|
||||||
gsl_histogram2d_increment(hist, e->th, e->ph);
|
gsl_histogram2d_increment(hist, e->th, e->ph);
|
||||||
|
@ -9,7 +9,8 @@ bins = tuple(map(int, sys.stdin.readline().split()))
|
|||||||
xedges, yedges, counts = loadtxt(sys.stdin, unpack=True, usecols=[0,2,4])
|
xedges, yedges, counts = loadtxt(sys.stdin, unpack=True, usecols=[0,2,4])
|
||||||
counts = counts.reshape(bins)
|
counts = counts.reshape(bins)
|
||||||
|
|
||||||
plt.rcParams['font.size'] = 15
|
plt.rcParams['font.size'] = 30
|
||||||
|
tight_layout()
|
||||||
suptitle('Angular decay distribution')
|
suptitle('Angular decay distribution')
|
||||||
|
|
||||||
#subplot2grid((1, 3), (0, 0), colspan=2, aspect='equal')
|
#subplot2grid((1, 3), (0, 0), colspan=2, aspect='equal')
|
||||||
@ -27,4 +28,6 @@ norm = colorbar(fraction=0.023, pad=0.04).norm
|
|||||||
# x, y, z, rstride=1, cstride=1,
|
# x, y, z, rstride=1, cstride=1,
|
||||||
# facecolors=cm.viridis(norm(counts)))
|
# facecolors=cm.viridis(norm(counts)))
|
||||||
#axis('off')
|
#axis('off')
|
||||||
|
|
||||||
show()
|
show()
|
||||||
|
|
||||||
|
@ -89,3 +89,20 @@
|
|||||||
year={2008},
|
year={2008},
|
||||||
publisher={Citeseer}
|
publisher={Citeseer}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@misc{painless94,
|
||||||
|
title={An introduction to the conjugate gradient method without the
|
||||||
|
agonizing pain},
|
||||||
|
author={Shewchuk, Jonathan Richard and others},
|
||||||
|
year={1994},
|
||||||
|
pages={42},
|
||||||
|
publisher={Carnegie-Mellon University. Department of Computer Science}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{Lou05,
|
||||||
|
title={A brief description of the levenberg-marquardt algorithm
|
||||||
|
implemened by levmar},
|
||||||
|
author={M. I. A. Lourakis},
|
||||||
|
year={2005},
|
||||||
|
journal={Matrix}
|
||||||
|
}
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -116,7 +116,7 @@ approx: 0.57225\ 72410\ 34058
|
|||||||
diff: 0.00495\ 84238\ 67473
|
diff: 0.00495\ 84238\ 67473
|
||||||
--------- -----------------------
|
--------- -----------------------
|
||||||
|
|
||||||
Table: Best esitimation of $\gamma$ using
|
Table: Best estimation of $\gamma$ using
|
||||||
the alternative formula. {#tbl:second}
|
the alternative formula. {#tbl:second}
|
||||||
|
|
||||||
Here, the problem lies in the binomial term: computing the factorial of a
|
Here, the problem lies in the binomial term: computing the factorial of a
|
||||||
|
@ -4,37 +4,34 @@
|
|||||||
|
|
||||||
A number of $N = 50'000$ points on the unit sphere, each representing a
|
A number of $N = 50'000$ points on the unit sphere, each representing a
|
||||||
particle detection event, must be generated according to then angular
|
particle detection event, must be generated according to then angular
|
||||||
probability distribution function $F_0$:
|
probability distribution function $F$:
|
||||||
|
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
F_0 (\theta, \phi) = \frac{3}{4 \pi} \Bigg[
|
F (\theta, \phi) = &\frac{3}{4 \pi} \Bigg[
|
||||||
\frac{1}{2} (1 - \alpha_0) + \frac{1}{2} (3\alpha_0 - 1) \cos^2(\theta)& \\
|
\frac{1}{2} (1 - \alpha) + \frac{1}{2} (3\alpha - 1) \cos^2(\theta) \\
|
||||||
- \beta_0 \sin^2(\theta) \cos(2 \phi)
|
&- \beta \sin^2(\theta) \cos(2 \phi)
|
||||||
- \gamma_0 \sqrt{2} \sin(2 \theta) \cos(\phi)& \Bigg]
|
- \gamma \sqrt{2} \sin(2 \theta) \cos(\phi) \Bigg]
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
where $\theta$ and $\phi$ are, respectively, the polar and azimuthal angles, and
|
where $\theta$ and $\phi$ are, respectively, the polar and azimuthal angles, and
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\alpha_0 = 0.65 \et \beta_0 = 0.06 \et \gamma_0 = -0.18
|
\alpha_0 = 0.65 \et \beta_0 = 0.06 \et \gamma_0 = -0.18
|
||||||
$$
|
$$
|
||||||
|
|
||||||
To generate the points a *try and catch* method was employed:
|
To generate the points a *try and catch* method was employed:
|
||||||
|
|
||||||
1. generate a point $(\theta , \phi)$ uniformly on a unit sphere
|
1. generate a point $(\theta , \phi)$ uniformly on a unit sphere,
|
||||||
2. generate a third value $Y$ uniformly in $[0, 1]$
|
2. generate a third value $Y$ uniformly in $[0, 1]$,
|
||||||
3. if @eq:requirement is satisfied save the point
|
3. if @eq:requirement is satisfied save the point,
|
||||||
4. repeat from 1.
|
4. repeat from 1.
|
||||||
|
|
||||||
$$
|
$$
|
||||||
Y < F_0(\theta, \phi)
|
Y < F(\theta, \phi)
|
||||||
$$ {#eq:requirement}
|
$$ {#eq:requirement}
|
||||||
|
|
||||||
To increase the efficiency of the procedure, the $Y$ values were actually
|
To increase the efficiency of the procedure, the $Y$ values were actually
|
||||||
generated in $[0, 0.2]$, since $\max F_0 = 0.179$ for the given parameters
|
generated in $[0, 0.2]$, since $\max F = 0.179$ for the given parameters
|
||||||
(an other option would have been generating the numbers in the range $[0 ; 1]$,
|
(by generating the numbers in the range $[0 ; 1]$, all the points with
|
||||||
but all the points with $Y_{\theta, \phi} > 0.179$ would have been rejected with
|
$Y_{\theta, \phi} > 0.179$ would have been rejected with the same
|
||||||
the same probability, namely 1).
|
probability, namely 1).
|
||||||
|
|
||||||
While $\phi$ can simply be generated uniformly between 0 and $2 \pi$, for
|
While $\phi$ can simply be generated uniformly between 0 and $2 \pi$, for
|
||||||
$\theta$ one has to be more careful: by generating evenly distributed numbers
|
$\theta$ one has to be more careful: by generating evenly distributed numbers
|
||||||
@ -48,21 +45,20 @@ easily generated by the GSL function `gsl_rng_uniform()`.
|
|||||||
|
|
||||||
<div id="fig:compare">
|
<div id="fig:compare">
|
||||||
![Uniformly distributed points with $\theta$ evenly distributed between
|
![Uniformly distributed points with $\theta$ evenly distributed between
|
||||||
0 and $\pi$.](images/histo-i-u.pdf){width=45%}
|
0 and $\pi$.](images/histo-i-u.pdf){width=50%}
|
||||||
![Points uniformly distributed on a spherical
|
![Points uniformly distributed on a spherical
|
||||||
surface.](images/histo-p-u.pdf){width=45%}
|
surface.](images/histo-p-u.pdf){width=50%}
|
||||||
|
|
||||||
![Sample generated according to $F$ with $\theta$ evenly distributed between
|
![Sample generated according to $F$ with $\theta$ evenly distributed between
|
||||||
0 and $\pi$.](images/histo-i-F.pdf){width=45%}
|
0 and $\pi$.](images/histo-i-F.pdf){width=50%}
|
||||||
![Sample generated according to $F$ with $\theta$ properly
|
![Sample generated according to $F$ with $\theta$ properly
|
||||||
distributed.](images/histo-p-F.pdf){width=45%}
|
distributed.](images/histo-p-F.pdf){width=50%}
|
||||||
|
|
||||||
Examples of samples. On the left, points with $\theta$ evenly distributed
|
Examples of samples. On the left, points with $\theta$ evenly distributed
|
||||||
between 0 and $\pi$; on the right, points with $\theta$ properly distributed.
|
between 0 and $\pi$; on the right, points with $\theta$ properly distributed.
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
The transformation can be found by imposing the angular PDF to be a constant:
|
The transformation can be found by imposing the angular PDF to be a constant:
|
||||||
|
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\frac{d^2 P}{d \Omega^2} = \text{const} = \frac{1}{4 \pi}
|
\frac{d^2 P}{d \Omega^2} = \text{const} = \frac{1}{4 \pi}
|
||||||
\hspace{20pt} &\Longrightarrow \hspace{20pt}
|
\hspace{20pt} &\Longrightarrow \hspace{20pt}
|
||||||
@ -86,7 +82,6 @@ The transformation can be found by imposing the angular PDF to be a constant:
|
|||||||
|
|
||||||
If $\theta$ is chosen to grew together with $x$, then the absolute value can be
|
If $\theta$ is chosen to grew together with $x$, then the absolute value can be
|
||||||
omitted:
|
omitted:
|
||||||
|
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\frac{d \theta}{dx} = \frac{2}{\sin(\theta)}
|
\frac{d \theta}{dx} = \frac{2}{\sin(\theta)}
|
||||||
\hspace{20pt} &\Longrightarrow \hspace{20pt}
|
\hspace{20pt} &\Longrightarrow \hspace{20pt}
|
||||||
@ -108,10 +103,11 @@ a single point, the effect of this omission is negligible.
|
|||||||
\vspace{30pt}
|
\vspace{30pt}
|
||||||
|
|
||||||
|
|
||||||
## Parameter estimation
|
## Parameters estimation
|
||||||
|
|
||||||
The sample must now be used to estimate the parameters $\alpha_0$,
|
The sample must now be used to estimate the parameters $\alpha$, $\beta$ and
|
||||||
$\beta_0$ and $\gamma_0$ of the angular distribution $F_0$.
|
$\gamma$ of the angular distribution $F$. The correct set will be referred to
|
||||||
|
as {$\alpha_0$, $\beta_0$, $\gamma_0$}.
|
||||||
|
|
||||||
|
|
||||||
### Maximum Likelihood method {#sec:MLM}
|
### Maximum Likelihood method {#sec:MLM}
|
||||||
@ -119,9 +115,7 @@ $\beta_0$ and $\gamma_0$ of the angular distribution $F_0$.
|
|||||||
The Maximum Likelihood method (MLM) is based on the observation that the best
|
The Maximum Likelihood method (MLM) is based on the observation that the best
|
||||||
estimate {$\bar{\alpha}$, $\bar{\beta}$, $\bar{\gamma}$} of the parameters
|
estimate {$\bar{\alpha}$, $\bar{\beta}$, $\bar{\gamma}$} of the parameters
|
||||||
{$\alpha$, $\beta$, $\gamma$} should maximize the Likelihood function $L$
|
{$\alpha$, $\beta$, $\gamma$} should maximize the Likelihood function $L$
|
||||||
defined in @eq:Like0, where the index $i$ runs over the sample and $F$ is the
|
defined in @eq:Like0, where the index $i$ runs over the sample:
|
||||||
function $F_0$ with free parameters $\alpha$, $\beta$ and $\gamma$:
|
|
||||||
|
|
||||||
$$
|
$$
|
||||||
L = \prod_i F(\theta_i, \phi_i) = \prod_i F_i
|
L = \prod_i F(\theta_i, \phi_i) = \prod_i F_i
|
||||||
$$ {#eq:Like0}
|
$$ {#eq:Like0}
|
||||||
@ -134,9 +128,8 @@ $\beta$, $\gamma$} = {$\alpha_0$, $\beta_0$, $\gamma_0$}. Thus, by viewing $F_i$
|
|||||||
as a function of the parameters, by maximizing $P = L$ with respect
|
as a function of the parameters, by maximizing $P = L$ with respect
|
||||||
to $\alpha$, $\beta$ and $\gamma$, a good estimate should be found.
|
to $\alpha$, $\beta$ and $\gamma$, a good estimate should be found.
|
||||||
Instead of actually maximising $L$, the function $-\ln(L)$
|
Instead of actually maximising $L$, the function $-\ln(L)$
|
||||||
was minimised, as minimisation methods as usually described in literature
|
was minimised, as minimisation methods are usually described in literature
|
||||||
and $\ln$ since it simplifies the math:
|
and the logarithm simplifies the math:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
L = \prod_i F_i \thus - \ln(L) = - \sum_{i} \ln(F_i)
|
L = \prod_i F_i \thus - \ln(L) = - \sum_{i} \ln(F_i)
|
||||||
$$ {#eq:Like}
|
$$ {#eq:Like}
|
||||||
@ -148,36 +141,36 @@ conjugate gradient Fletcher-Reeves algorithm, used in the solution, which requir
|
|||||||
the implementation of $-\ln(L)$ and its gradient.
|
the implementation of $-\ln(L)$ and its gradient.
|
||||||
|
|
||||||
To minimise a function $f$, given an initial guess point $x_0$, the gradient
|
To minimise a function $f$, given an initial guess point $x_0$, the gradient
|
||||||
$\nabla f$ is used to find the initial direction $v_0$ (the steepest descent) along
|
$\nabla f$ is used to find the initial direction $v_0$ (the steepest descent)
|
||||||
which a line minimisation is performed: the minimum occurs where the
|
along which a line minimisation is performed: the minimum $x_1$ occurs where
|
||||||
directional derivative along $v_0$ is zero:
|
the directional derivative along $v_0$ is zero:
|
||||||
$$
|
$$
|
||||||
\frac{\partial f}{\partial v_0} = \nabla f \cdot v_0 = 0
|
\frac{\partial f}{\partial v_0} = \nabla f \cdot v_0 = 0
|
||||||
$$
|
$$
|
||||||
|
|
||||||
or, equivalently, at the point where the gradient is orthogonal to $v_0$.
|
or, equivalently, at the point where the gradient is orthogonal to $v_0$.
|
||||||
After this first step the following procedure is iterated:
|
After this first step, the following procedure is iterated:
|
||||||
|
|
||||||
1. Find the steepest descent $v_n$.
|
1. find the steepest descent $v_n$ at $x_n$,
|
||||||
2. Compute the *conjugate* direction: $w_n = v_n + \beta_n w_{n-1}$
|
2. compute the *conjugate* direction: $w_n = v_n + \beta_n w_{n-1}$,
|
||||||
3. Perform a line minimisation along $w_n$
|
3. perform a line minimisation along $w_n$ and update the minimum $x_{n+1}$,
|
||||||
4. Update the position $x_{n+1} = x_n + \alpha_n w_n$
|
|
||||||
|
|
||||||
where $\alpha_n$ is line minimum and $\beta_n$ is given by the Fletcher-Reeves
|
Different formulas for $\beta_n$ have been developed, all equivalent for a
|
||||||
formula:
|
quadratic function, but not for nonlinear optimization: in such instances, the
|
||||||
|
best formula is a matter of heuristics [@painless94]. In this case, $\beta_n$ is
|
||||||
|
given by the Fletcher-Reeves formula:
|
||||||
$$
|
$$
|
||||||
\beta = \frac{\|\nabla f(x_n)\|^2}{\|\nabla f(x_{n-1})\|^2}
|
\beta_n = \frac{\|\nabla f(x_n)\|^2}{\|\nabla f(x_{n-1})\|^2}
|
||||||
$$
|
$$
|
||||||
|
|
||||||
The accuracy of each line minimisation is controlled the parameter `tol`,
|
The accuracy of each line minimisation is controlled with the parameter `tol`,
|
||||||
meaning:
|
meaning:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
w \cdot \nabla f < \text{tol} \, \|w\| \, \|\nabla f\|
|
w \cdot \nabla f < \text{tol} \, \|w\| \, \|\nabla f\|
|
||||||
$$
|
$$
|
||||||
|
|
||||||
The minimisation is quite unstable and this forced the starting point to be
|
The minimisation is quite unstable and this forced the starting point to be
|
||||||
taken very close to the solution, namely:
|
taken very close to the solution, namely:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\alpha_{sp} = 0.79 \et
|
\alpha_{sp} = 0.79 \et
|
||||||
\beta_{sp} = 0.02 \et
|
\beta_{sp} = 0.02 \et
|
||||||
@ -187,29 +180,28 @@ $$
|
|||||||
The overall minimisation ends when the gradient module is smaller than $10^{-4}$.
|
The overall minimisation ends when the gradient module is smaller than $10^{-4}$.
|
||||||
The program took 25 of the above iterations to reach the result shown in @eq:Like_res.
|
The program took 25 of the above iterations to reach the result shown in @eq:Like_res.
|
||||||
|
|
||||||
The Cramér-Rao bound states that the covariance matrix of parameters estimated
|
As regards the error estimation, the Cramér-Rao bound states that the covariance
|
||||||
by MLM is greater than the the inverse of the Hessian matrix of $-\log(L)$
|
matrix of parameters estimated by MLM is greater than the inverse of the
|
||||||
at the minimum. Thus, the Hessian matrix was computed
|
Hessian matrix of $-\log(L)$ at the minimum. Thus, the Hessian matrix was
|
||||||
analytically and inverted by a Cholesky decomposition, which states that
|
computed analytically and inverted by a Cholesky decomposition, which states
|
||||||
every positive definite symmetric square matrix $H$ can be written as the
|
that every positive definite symmetric square matrix $H$ can be written as the
|
||||||
product of a lower triangular matrix $L$ and its transpose $L^T$:
|
product of a lower triangular matrix $\Delta$ and its transpose $\Delta^T$:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
H = L \cdot L^T
|
H = \Delta \cdot \Delta^T
|
||||||
$$
|
$$
|
||||||
|
|
||||||
The Hessian matrix is, indeed, both symmetric and positive definite, when
|
The Hessian matrix is, indeed, both symmetric and positive definite, when
|
||||||
computed in a minimum. Once decomposed the inverse is given by
|
computed in a minimum. Once decomposed, the inverse is given by
|
||||||
|
$$
|
||||||
|
H^{-1} = (\Delta \cdot \Delta^T)^{-1} = (\Delta^{-1})^T \cdot \Delta^{-1}
|
||||||
|
$$
|
||||||
|
|
||||||
$$
|
where the inverse of $\Delta$ is easily computed, since it's a triangular
|
||||||
H^{-1} = (L \cdot L^T)^{-1} = (L^{-1})^T \cdot L^{-1}
|
matrix.
|
||||||
$$
|
|
||||||
The inverse of $L$ is easily computed since it's a triangular matrix.
|
|
||||||
|
|
||||||
The GSL library provides two functions `gsl_linalg_cholesky_decomp()` and
|
The GSL library provides two functions `gsl_linalg_cholesky_decomp()` and
|
||||||
`gsl_linalg_cholesky_invert()` to decompose and invert in-place a matrix.
|
`gsl_linalg_cholesky_invert()` to decompose and invert in-place a matrix.
|
||||||
The result is shown below:
|
The result is shown below:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\bar{\alpha_L} = 0.6541 \et
|
\bar{\alpha_L} = 0.6541 \et
|
||||||
\bar{\beta_L} = 0.06125 \et
|
\bar{\beta_L} = 0.06125 \et
|
||||||
@ -231,22 +223,24 @@ Hence:
|
|||||||
\gamma_L &= -0.1763 \pm 0.0016
|
\gamma_L &= -0.1763 \pm 0.0016
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
|
See @sec:res_comp for results compatibility.
|
||||||
|
|
||||||
|
|
||||||
### Least squares estimation
|
### Least squares estimation
|
||||||
|
|
||||||
Another method that can be used to estimate {$\alpha$, $\beta$, $\gamma$} are
|
Another method that can be used to estimate {$\alpha$, $\beta$, $\gamma$} are
|
||||||
the least squares (LSQ), which is a multidimensional minimisation
|
the least squares (LSQ), which is a multidimensional minimisation
|
||||||
specialised to the case of sum of squared functions called residuals.
|
specialised to the case of sum of squared functions called residuals.
|
||||||
The residuals can be anything in general but are usually interpreted as the
|
In general, the residuals can be anything but are usually interpreted as the
|
||||||
difference between an expected (fit) and an observed value. The least squares
|
difference between an expected (fit) and an observed value. The least squares
|
||||||
then correspond to the expected values that best fit the observations.
|
then correspond to the expected values that best fit the observations.
|
||||||
|
|
||||||
To apply the LSQ method, the data must be properly binned, meaning that each
|
To apply the LSQ method, the data must be properly binned, meaning that each
|
||||||
bin should contain a significant number of events (say greater than four or
|
bin should contain a significant number of events (say greater than four or
|
||||||
five): in this case, 30 bins for $\theta$ and 60 for $\phi$ turned out
|
five): in this case, 30 bins for $\theta$ and 60 for $\phi$ turned out
|
||||||
to be satisfactory. The expected values were given as the product of $N$, $F$
|
to be satisfactory. The expected values $E_{\theta, \phi}$ were given as the
|
||||||
computed in the geometric center of the bin and the solid angle enclosed by the
|
product of $N$, $F$ computed in the geometric center of the bin and the solid
|
||||||
bin itself, namely:
|
angle enclosed by the bin itself, namely:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
E_{\theta, \phi} = N F(\theta, \phi) \, \Delta \theta \, \Delta \phi \,
|
E_{\theta, \phi} = N F(\theta, \phi) \, \Delta \theta \, \Delta \phi \,
|
||||||
@ -254,11 +248,8 @@ $$
|
|||||||
$$
|
$$
|
||||||
|
|
||||||
Once the data are binned, the number of events in each bin plays the role of the
|
Once the data are binned, the number of events in each bin plays the role of the
|
||||||
observed value, while the expected one is given by the probability of finding
|
observed value. Then, the $\chi^2$, defined as follow, must be minimized with
|
||||||
an event in that bin multiplied by the total number of points, $N$. Then, the
|
respect to the three parameters to be estimated:
|
||||||
$\chi^2$, defined as follow, must be minimized with respect to the three
|
|
||||||
parameters to be estimated:
|
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\chi^2 = \sum_i f_i^2 = \|\vec f\|^2 \with f_i = \frac{O_i - E_i}{\sqrt{E_i}}
|
\chi^2 = \sum_i f_i^2 = \|\vec f\|^2 \with f_i = \frac{O_i - E_i}{\sqrt{E_i}}
|
||||||
$$
|
$$
|
||||||
@ -270,12 +261,11 @@ trust region method was used to minimize the $\chi^2$.
|
|||||||
In such methods, the $\chi^2$, its gradient and its Hessian matrix are computed
|
In such methods, the $\chi^2$, its gradient and its Hessian matrix are computed
|
||||||
in a series of points in the parameters space till the gradient and the matrix
|
in a series of points in the parameters space till the gradient and the matrix
|
||||||
reach given proper values, meaning that the minimum has been well approached,
|
reach given proper values, meaning that the minimum has been well approached,
|
||||||
where "well" is related to that values.
|
where "well" is related to those values.
|
||||||
If {$x_1 \dots x_p$} are the parameters which must be estimated, first $\chi^2$
|
If {$x_1 \dots x_p$} are the parameters which must be estimated, first $\chi^2$
|
||||||
is computed in the starting point $\vec{x_k}$, then its value in a point
|
is computed in a point $\vec{x_k}$, then its value in a point $\vec{x}_k +
|
||||||
$\vec{x}_k + \vec{\delta}$ is modelled by a function $m_k (\vec{\delta})$ which
|
\vec{\delta}$ is modelled by a function $m_k (\vec{\delta})$ which is the
|
||||||
is the second order Taylor expansion around the point $\vec{x}_k$, namely:
|
second order Taylor expansion around the point $\vec{x}_k$, namely:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\chi^2 (\vec{x}_k + \vec{\delta}) \sim m_k (\vec{\delta}) =
|
\chi^2 (\vec{x}_k + \vec{\delta}) \sim m_k (\vec{\delta}) =
|
||||||
\chi^2 (\vec{x}_k) + \nabla_k^T \vec{\delta} + \frac{1}{2}
|
\chi^2 (\vec{x}_k) + \nabla_k^T \vec{\delta} + \frac{1}{2}
|
||||||
@ -284,41 +274,48 @@ $$
|
|||||||
|
|
||||||
where $\nabla_k$ and $H_k$ are the gradient and the Hessian matrix of $\chi^2$
|
where $\nabla_k$ and $H_k$ are the gradient and the Hessian matrix of $\chi^2$
|
||||||
in the point $\vec{x}_k$ and the superscript $T$ stands for the transpose. The
|
in the point $\vec{x}_k$ and the superscript $T$ stands for the transpose. The
|
||||||
initial problem is reduced the so called trust-region subproblem (TRS), which
|
initial problem is reduced into the so called trust-region subproblem (TRS),
|
||||||
is the minimisation of $m_k(\vec{\delta})$ in a region where
|
which is the minimisation of $m_k(\vec{\delta})$ in a region where
|
||||||
$m_k(\vec{\delta})$ should be a good approximation of $\chi^2 (\vec{x}_k
|
$m_k(\vec{\delta})$ should be a good approximation of $\chi^2 (\vec{x}_k +
|
||||||
+ \vec{\delta})$, given by the condition:
|
\vec{\delta})$, given by the condition:
|
||||||
$$
|
$$
|
||||||
\|D_k \vec\delta\| < \Delta_k
|
\|D_k \vec\delta\| < \Delta_k
|
||||||
$$
|
$$
|
||||||
If $D_k = I$ the region is a hypersphere of radius $\Delta_k$
|
|
||||||
centered at $\vec{x}_k$ but can be deformed according to $D_k$. This is
|
|
||||||
necessary in the case one or more parameters have scale very different scales.
|
|
||||||
|
|
||||||
Given an initial point $x_0$, radius $\Delta_0$, scale matrix $D_0$
|
If $D_k = I$, the region is a hypersphere of radius $\Delta_k$ centered at
|
||||||
and step $\vec\delta_0$ the LSQ algorithm consist in the following iteration:
|
$\vec{x}_k$. In case one or more parameters have very different scales, the
|
||||||
|
region should be deformed according to a proper $D_k$.
|
||||||
|
|
||||||
1. Construct the function $m_k(\vec\delta)$.
|
Given an initial point $x_0$, the radius $\Delta_0$, the scale matrix $D_0$
|
||||||
2. Solve the TRS for step $\vec\delta_k$.
|
and step $\vec\delta_0$, the LSQ algorithm consist in the following iteration:
|
||||||
3. Check whether the solution actually decreases $\chi^2$
|
|
||||||
1. If positive increase the trust region radius $\Delta_{k+1} =
|
1. construct the function $m_k(\vec\delta)$,
|
||||||
\alpha\Delta_k$ and shift the position $\vec x_{k+1} = \vec x_k +
|
2. solve the TRS and find $x_k + \vec\delta_k$ which corresponds to the
|
||||||
\vec \delta_k$.
|
plausible minimum,
|
||||||
2. If negative decrease the radius $\Delta_{k+1} = \Delta_k/\beta$.
|
3. check whether the solution actually decreases $\chi^2$:
|
||||||
4. Repeat
|
1. if positive and converged (see below), stop;
|
||||||
|
1. if positive but not converged, it means that $m_k$ still well
|
||||||
|
approximates $\chi^2$ in the trust region which is therefore enlarged
|
||||||
|
by increasing the radius $\Delta_{k+1} = \alpha\Delta_k$ for a given
|
||||||
|
$\alpha$ and the position of the central point is shifted: $\vec
|
||||||
|
x_{k+1} = \vec x_k + \vec \delta_k$;
|
||||||
|
2. if negative, it means that $m_k$ does not approximate properly
|
||||||
|
$\chi^2$ in the trust region which is therefore decreased by reducing
|
||||||
|
the radius $\Delta_{k+1} = \Delta_k/\beta$ for a given $\beta$.
|
||||||
|
4. Repeat.
|
||||||
|
|
||||||
This method is advantageous compared to a general minimisation because
|
This method is advantageous compared to a general minimisation because
|
||||||
the TRS usually amounts to solving a linear system. There are many algorithms
|
the TRS usually amounts to solving a linear system. There are many algorithms
|
||||||
to solve the problem, in this case the Levenberg-Marquardt was used. It is based
|
to solve the problem, in this case the Levenberg-Marquardt was used. It is
|
||||||
on a theorem that proves the existence of a number $\mu_k$ such that
|
based on a theorem which proves the existence of a number $\mu_k$ such that:
|
||||||
\begin{align*}
|
$$
|
||||||
\Delta_k \mu_k = \|D_k \vec\delta_k\| &&
|
\mu_k (\Delta_k - \|D_k \vec\delta_k\|) = 0 \et
|
||||||
(H_k + \mu_k D_k^TD_k) \vec\delta_k = -\nabla_k
|
(H_k + \mu_k D_k^TD_k) \vec\delta_k = -\nabla_k
|
||||||
\end{align*}
|
$$
|
||||||
|
|
||||||
Using the approximation[^2] $H\approx J^TJ$, obtained by computing the Hessian
|
Using the approximation[^2] $H\approx J^TJ$, obtained by computing the Hessian
|
||||||
of the first-order Taylor expansion of $\chi^2$, $\vec\delta_k$ can
|
of the first-order Taylor expansion of $\chi^2$, $\vec\delta_k$ can
|
||||||
be found by solving the system
|
be found by solving the system:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\begin{cases}
|
\begin{cases}
|
||||||
J_k \vec{\delta_k} + \vec{f_k} = 0 \\
|
J_k \vec{\delta_k} + \vec{f_k} = 0 \\
|
||||||
@ -326,10 +323,13 @@ $$
|
|||||||
\end{cases}
|
\end{cases}
|
||||||
$$
|
$$
|
||||||
|
|
||||||
|
For more informations, see [@Lou05].
|
||||||
|
|
||||||
[^2]: Here $J_{ij} = \partial f_i/\partial x_j$ is the Jacobian matrix of the
|
[^2]: Here $J_{ij} = \partial f_i/\partial x_j$ is the Jacobian matrix of the
|
||||||
vector-valued function $\vec f(\vec x)$.
|
vector-valued function $\vec f(\vec x)$.
|
||||||
|
|
||||||
The algorithm terminates if on of the following condition are satisfied:
|
The algorithm terminates if one of the following convergence conditions is
|
||||||
|
satisfied:
|
||||||
|
|
||||||
1. $|\delta_i| \leq \text{xtol} (|x_i| + \text{xtol})$ for every component
|
1. $|\delta_i| \leq \text{xtol} (|x_i| + \text{xtol})$ for every component
|
||||||
of $\vec \delta$.
|
of $\vec \delta$.
|
||||||
@ -337,11 +337,10 @@ The algorithm terminates if on of the following condition are satisfied:
|
|||||||
3. $\|\vec f(\vec x+ \vec\delta) - \vec f(\vec x)|| \leq \text{ftol}
|
3. $\|\vec f(\vec x+ \vec\delta) - \vec f(\vec x)|| \leq \text{ftol}
|
||||||
\cdot \max(\|\vec f(\vec x)\|, 1)$
|
\cdot \max(\|\vec f(\vec x)\|, 1)$
|
||||||
|
|
||||||
These tolerance have all been set to \SI{1e-8}{}. The program converged in 7
|
Where xtol, gtol and ftol are tolerance values all been set to \SI{1e-8}{}. The
|
||||||
iterations giving the results below. The covariance of the parameters can again
|
program converged in 7 iterations giving the results below. The covariance of
|
||||||
been estimated through the Hessian matrix at the minimum. The following results
|
the parameters can again been estimated through the Hessian matrix at the
|
||||||
were obtained:
|
minimum. The following results were obtained:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
\bar{\alpha_{\chi}} = 0.6537 \et
|
\bar{\alpha_{\chi}} = 0.6537 \et
|
||||||
\bar{\beta_{\chi}} = 0.05972 \et
|
\bar{\beta_{\chi}} = 0.05972 \et
|
||||||
@ -363,12 +362,14 @@ Hence:
|
|||||||
&\gamma_{\chi} = -0.1736 \pm 0.0016
|
&\gamma_{\chi} = -0.1736 \pm 0.0016
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
|
See @sec:res_comp for results compatibility.
|
||||||
|
|
||||||
### Results compatibility
|
|
||||||
|
### Results compatibility {#sec:res_comp}
|
||||||
|
|
||||||
In order to compare the values $x_L$ and $x_{\chi}$ obtained from both methods
|
In order to compare the values $x_L$ and $x_{\chi}$ obtained from both methods
|
||||||
with the correct ones (namely {$\alpha_0$, $\beta_0$, $\gamma_0$}), the
|
with the correct ones ({$\alpha_0$, $\beta_0$, $\gamma_0$}), the following
|
||||||
following compatibility t-test was applied:
|
compatibility t-test was applied:
|
||||||
|
|
||||||
$$
|
$$
|
||||||
p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
|
p = 1 - \text{erf}\left(\frac{t}{\sqrt{2}}\right)\ \with
|
||||||
@ -380,6 +381,8 @@ where $i$ stands either for the MLM or LSQ parameters and $\sigma_{x_i}$ is the
|
|||||||
uncertainty of the value $x_i$. At 95% confidence level, the values are
|
uncertainty of the value $x_i$. At 95% confidence level, the values are
|
||||||
compatible if $p > 0.05$.
|
compatible if $p > 0.05$.
|
||||||
|
|
||||||
|
\vspace{30pt}
|
||||||
|
|
||||||
Likelihood results:
|
Likelihood results:
|
||||||
|
|
||||||
----------------------------
|
----------------------------
|
||||||
@ -417,8 +420,8 @@ arrangement of $F$ would be required in order to justify this outcome.
|
|||||||
## Isotropic hypothesis testing
|
## Isotropic hypothesis testing
|
||||||
|
|
||||||
What if the probability distribution function was isotropic? Could it be
|
What if the probability distribution function was isotropic? Could it be
|
||||||
compatible with the found results? If $F$ was isotropic, then $\alpha_I$,
|
compatible with the found results?
|
||||||
$\beta_I$ and $\gamma_I$ would be $1/3$, 0, and 0 respectively, since this
|
If $F$ was isotropic, then $\alpha_I$, $\beta_I$ and $\gamma_I$ would be $1/3$
|
||||||
gives $F_I = 1/{4 \pi}$. The t-test gives a $p$-value approximately zero for all
|
, 0, and 0 respectively, since this gives $F_I = 1/{4 \pi}$. The t-test gives a
|
||||||
the three parameters, meaning that there is no compatibility at all with this
|
$p$-value approximately zero for all the three parameters, meaning that there is
|
||||||
hypothesis.
|
no compatibility at all with this hypothesis.
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
- cambiare simbolo convoluzione
|
- cambiare simbolo convoluzione
|
||||||
- aggiungere citazioni e referenze
|
- aggiungere citazioni e referenze
|
||||||
- rifare grafici senza bordino
|
- rifare grafici senza bordino
|
||||||
|
- leggere l'articolo di Lucy
|
||||||
|
Loading…
Reference in New Issue
Block a user