ex-2: add fast variant of brent-mcmillan

2020-05-23 17:11:41 +02:00 · 2020-05-23 17:11:41 +02:00 · 90ab77b676
commit 90ab77b676
parent a263c6a8da
2 changed files with 216 additions and 1 deletions
--- a/ex-2/fast.c
+++ b/ex-2/fast.c
@ -0,0 +1,215 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <gmp.h>
+
+/* The Euler-Mascheroni constant is here computed to
+ * arbitrary precision using GMP floating point numbers
+ * with the formula:
+ *
+ *    0 < |γ - U(N)/V(N)| < πexp(-4N)
+ *
+ * with:
+ *
+ *    U(N) = Σ_(k = 1)^+∞ (N^k/k!)² *(H(k) - log(N))
+ *    V(N) = Σ_(k = 0)^+∞ (N^k/k!)²
+ *    H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number
+ *
+ * If N = 2^p (to simplify the log) to get the result to D
+ * decimal digits we must have 2^(p+1) > D / log(π), which is
+ * satisfied by p = floor(log(D)/log(2)).
+ *
+ * source: http://www.numberworld.org/y-cruncher/internals/formulas.html
+ *
+ */
+
+
+/* Approximation of the Lambert W(x) function at large values
+ * where x = log(2) 10^(D/2). The expression of x has been
+ * inlined and optimized to avoid overflowing on large D.
+ *
+ * Note: from x>10 this is an overestimate of W(x), which is
+ * good for the application below.
+ *
+ * source: "On the Lambert W function", Corless et al.
+ */
+double lambertW_large(size_t digits) {
+  double L1 = log(log(2.0)) + log(10.0)*(double)digits/2.0;
+  double L2 = log(L1);
+  return L1 - L2  + L2/L1 + (L2 - 2)/(2*L1*L1)
+       + L2*(3*L2*L2*L2 - 22*L2*L2 + 36*L2 - 12)/(12*L1*L1*L1*L1);
+}
+
+
+/* `mpf_log2(z, D)`
+ * calculate log(2) to D decimal places
+ * using the series
+ *
+ *   log(2) = Σ_{k=1}^+∞ 1/(k 2^k)
+ *
+ * The error of the n-th partial sum is
+ * estimated as
+ *
+ *   Δn = 1/(n(n+2) 2^(n-1)).
+ *
+ * So to fix the first D decimal places
+ * we evaluate up to n terms such that
+ *
+ *   Δn < 1/10^D
+ *
+ * Then take the average of the upper and
+ * lower bounds of the series, given by
+ *
+ *  Sk + ak/(1 - ak+1/ak) < S < Sk + ak L/(1-L)
+ *
+ *  where L = lim k→∞ ak+1/ak = 1/2.
+ */
+void mpf_log2(mpf_t rop, size_t digits) {
+  mpf_t sum, term;
+  mpf_inits(sum, term, NULL);
+
+  fprintf(stderr, "[mpf_log2] decimal digits: %ld\n", digits);
+
+  /* Compute the number of required terms
+   * A couple of extra digits are added for safety.
+   */
+  size_t n = 2.0/log(2)*lambertW_large(digits+2);
+  fprintf(stderr, "[mpf_log2] series terms: %ld\n", n);
+
+  /* Compute the nth partial num */
+  for (size_t i=1; i<=n; i++) {
+    mpf_set_ui(term, 1);
+    mpf_mul_2exp(term, term, i);
+    mpf_mul_ui(term, term, i);
+    mpf_ui_div(term, 1, term);
+    mpf_add(sum, sum, term);
+  }
+
+  /* Compute upper/lower bounds */
+  mpf_t upp, low;
+  mpf_inits(upp, low, NULL);
+
+  // upp = sum + term
+  mpf_add(upp, sum, term);
+
+  // low = sum + term * n/(n+2)
+  mpf_mul_ui(low, term, n);
+  mpf_div_ui(low, low, n+2);
+  mpf_add(low, sum, low);
+
+  /* average the bounds */
+  mpf_add(sum, low, upp);
+  mpf_div_ui(sum, sum, 2);
+
+  /* Note: printf() rounds floating points to the number of digits
+   * in an implementation-dependent manner. To avoid printing
+   * (wrongly) rounded digits we increase the count by 1 and hide
+   * the last by overwiting with a space.
+   */
+  size_t d = digits>50? 50 : digits;
+  gmp_fprintf(stderr, "[mpf_log2] upper bound: %.*Ff\b \n", d+3, upp);
+  gmp_fprintf(stderr, "[mpf_log2] lower bound: %.*Ff\b \n", d+3, low);
+  gmp_fprintf(stderr, "[mpf_log2] log(2)=%.*Ff\b \n",  d+1, sum);
+
+  // return sum
+  mpf_set(rop, sum);
+
+  // free memory
+  mpf_clears(sum, term, NULL);
+  mpf_clears(upp, low, NULL);
+}
+
+
+void mpf_gamma(mpf_t rop, size_t digits, size_t prec) {
+  unsigned int p = log2(digits);
+  fprintf(stderr, "[mpf_gamma] p: %d\n", p);
+
+  // initialise variables
+  mpf_t A, B, a, b, stop, temp;
+  mpf_inits(A, B, a, b, stop, temp, NULL);
+
+  /* Smallest representable number */
+  mpf_set_ui(stop, 10);
+  mpf_pow_ui(stop, stop, digits);
+  mpf_ui_div(stop, 1, stop);
+  gmp_fprintf(stderr, "[mpf_gamma] stop=%Fe\n", stop);
+
+  // A = a0 = -log(2^p) = -plog(2)
+  mpf_log2(a, digits);
+  mpf_mul_ui(a, a, p);
+  mpf_neg(a, a);
+  mpf_set(A, a);
+
+  // B = b0 = 1
+  mpf_set_si(b, 1);
+  mpf_set_si(B, 1);
+
+  size_t k;
+  for(k = 1;; k++) {
+    // bk = bk-1 (n/k)^2
+    mpf_mul_2exp(b, b, 2*p);
+    mpf_div_ui(b, b, k);
+    mpf_div_ui(b, b, k);
+
+    // ak = ak-1 (n/k)^2
+    mpf_mul_2exp(a, a, 2*p);
+    mpf_div_ui(a, a, k);
+    mpf_div_ui(a, a, k);
+
+    // ak = ak - bk/k
+    mpf_div_ui(temp, b, k);
+    mpf_add(a, a, temp);
+
+    // A += ak, B += bk
+    mpf_add(A, A, a);
+    mpf_add(B, B, b);
+
+    /* Exit when both ak and bk are smaller
+     * than the asked precision (10^-D).
+     */
+    if (mpf_cmp(a, b) >= 0) {
+      if (mpf_cmp(a, stop) < 0) break;
+    } else {
+      if (mpf_cmp(b, stop) < 0) break;
+    }
+  }
+  fprintf(stderr, "[mpf_gamma] iterations: %ld\n", k);
+
+  // return A/B
+  mpf_div(rop, A, B);
+
+  // free memory
+  mpf_clears(A, B, a, b, temp, stop, NULL);
+}
+
+int main(int argc, char** argv) {
+  /* if no argument is given, show the usage */
+  if (argc != 2) {
+    fprintf(stderr, "usage: %s D\n", argv[0]);
+    fprintf(stderr, "Computes γ up to D decimal places.\n");
+    return EXIT_FAILURE;
+  }
+
+  /* Compute the required precision, ie. the number
+   * of bits of the mantissa. This can be found from:
+   *
+   *   10^d = 2^b  ⇒  b = dlog2(10)
+   */
+  size_t digits = atol(argv[1]);
+  size_t prec = digits * log2(10);
+  fprintf(stderr, "[main] precision: %gbit\n", (double)prec);
+
+  /* Set the default precision. This affects the mantissa
+   * size of all mpf_t variables initialised from now on.
+   */
+  mpf_set_default_prec(prec + 64);
+
+  mpf_t y; mpf_init(y);
+  mpf_gamma(y, digits, prec);
+
+  // again, trick to avoid rounding
+  gmp_printf("%.*Ff\b \n", digits+1, y);
+  mpf_clear(y);
+
+  return EXIT_SUCCESS;
+}
--- a/2
+++ b/2
@ -14,7 +14,7 @@ ex-1/bin/pdf: ex-1/pdf.c
 ex-1/bin/histo: ex-1/histo.c
 	$(CCOMPILE)

-ex-2: ex-2/bin/fancy ex-2/bin/fancier ex-2/bin/limit ex-2/bin/naive ex-2/bin/recip
+ex-2: ex-2/bin/fancy ex-2/bin/fancier ex-2/bin/fast ex-2/bin/limit ex-2/bin/naive ex-2/bin/recip
 ex-2/bin/%: ex-2/%.c
 	$(CCOMPILE)