From 90ab77b676a7340c6695971f6cb7f0fe6877339b Mon Sep 17 00:00:00 2001 From: rnhmjoj Date: Sat, 23 May 2020 17:11:41 +0200 Subject: [PATCH] ex-2: add fast variant of brent-mcmillan --- ex-2/fast.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++ makefile | 2 +- 2 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 ex-2/fast.c diff --git a/ex-2/fast.c b/ex-2/fast.c new file mode 100644 index 0000000..80deeec --- /dev/null +++ b/ex-2/fast.c @@ -0,0 +1,215 @@ +#include +#include +#include +#include + +/* The Euler-Mascheroni constant is here computed to + * arbitrary precision using GMP floating point numbers + * with the formula: + * + * 0 < |γ - U(N)/V(N)| < πexp(-4N) + * + * with: + * + * U(N) = Σ_(k = 1)^+∞ (N^k/k!)² *(H(k) - log(N)) + * V(N) = Σ_(k = 0)^+∞ (N^k/k!)² + * H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number + * + * If N = 2^p (to simplify the log) to get the result to D + * decimal digits we must have 2^(p+1) > D / log(π), which is + * satisfied by p = floor(log(D)/log(2)). + * + * source: http://www.numberworld.org/y-cruncher/internals/formulas.html + * + */ + + +/* Approximation of the Lambert W(x) function at large values + * where x = log(2) 10^(D/2). The expression of x has been + * inlined and optimized to avoid overflowing on large D. + * + * Note: from x>10 this is an overestimate of W(x), which is + * good for the application below. + * + * source: "On the Lambert W function", Corless et al. + */ +double lambertW_large(size_t digits) { + double L1 = log(log(2.0)) + log(10.0)*(double)digits/2.0; + double L2 = log(L1); + return L1 - L2 + L2/L1 + (L2 - 2)/(2*L1*L1) + + L2*(3*L2*L2*L2 - 22*L2*L2 + 36*L2 - 12)/(12*L1*L1*L1*L1); +} + + +/* `mpf_log2(z, D)` + * calculate log(2) to D decimal places + * using the series + * + * log(2) = Σ_{k=1}^+∞ 1/(k 2^k) + * + * The error of the n-th partial sum is + * estimated as + * + * Δn = 1/(n(n+2) 2^(n-1)). + * + * So to fix the first D decimal places + * we evaluate up to n terms such that + * + * Δn < 1/10^D + * + * Then take the average of the upper and + * lower bounds of the series, given by + * + * Sk + ak/(1 - ak+1/ak) < S < Sk + ak L/(1-L) + * + * where L = lim k→∞ ak+1/ak = 1/2. + */ +void mpf_log2(mpf_t rop, size_t digits) { + mpf_t sum, term; + mpf_inits(sum, term, NULL); + + fprintf(stderr, "[mpf_log2] decimal digits: %ld\n", digits); + + /* Compute the number of required terms + * A couple of extra digits are added for safety. + */ + size_t n = 2.0/log(2)*lambertW_large(digits+2); + fprintf(stderr, "[mpf_log2] series terms: %ld\n", n); + + /* Compute the nth partial num */ + for (size_t i=1; i<=n; i++) { + mpf_set_ui(term, 1); + mpf_mul_2exp(term, term, i); + mpf_mul_ui(term, term, i); + mpf_ui_div(term, 1, term); + mpf_add(sum, sum, term); + } + + /* Compute upper/lower bounds */ + mpf_t upp, low; + mpf_inits(upp, low, NULL); + + // upp = sum + term + mpf_add(upp, sum, term); + + // low = sum + term * n/(n+2) + mpf_mul_ui(low, term, n); + mpf_div_ui(low, low, n+2); + mpf_add(low, sum, low); + + /* average the bounds */ + mpf_add(sum, low, upp); + mpf_div_ui(sum, sum, 2); + + /* Note: printf() rounds floating points to the number of digits + * in an implementation-dependent manner. To avoid printing + * (wrongly) rounded digits we increase the count by 1 and hide + * the last by overwiting with a space. + */ + size_t d = digits>50? 50 : digits; + gmp_fprintf(stderr, "[mpf_log2] upper bound: %.*Ff\b \n", d+3, upp); + gmp_fprintf(stderr, "[mpf_log2] lower bound: %.*Ff\b \n", d+3, low); + gmp_fprintf(stderr, "[mpf_log2] log(2)=%.*Ff\b \n", d+1, sum); + + // return sum + mpf_set(rop, sum); + + // free memory + mpf_clears(sum, term, NULL); + mpf_clears(upp, low, NULL); +} + + +void mpf_gamma(mpf_t rop, size_t digits, size_t prec) { + unsigned int p = log2(digits); + fprintf(stderr, "[mpf_gamma] p: %d\n", p); + + // initialise variables + mpf_t A, B, a, b, stop, temp; + mpf_inits(A, B, a, b, stop, temp, NULL); + + /* Smallest representable number */ + mpf_set_ui(stop, 10); + mpf_pow_ui(stop, stop, digits); + mpf_ui_div(stop, 1, stop); + gmp_fprintf(stderr, "[mpf_gamma] stop=%Fe\n", stop); + + // A = a0 = -log(2^p) = -plog(2) + mpf_log2(a, digits); + mpf_mul_ui(a, a, p); + mpf_neg(a, a); + mpf_set(A, a); + + // B = b0 = 1 + mpf_set_si(b, 1); + mpf_set_si(B, 1); + + size_t k; + for(k = 1;; k++) { + // bk = bk-1 (n/k)^2 + mpf_mul_2exp(b, b, 2*p); + mpf_div_ui(b, b, k); + mpf_div_ui(b, b, k); + + // ak = ak-1 (n/k)^2 + mpf_mul_2exp(a, a, 2*p); + mpf_div_ui(a, a, k); + mpf_div_ui(a, a, k); + + // ak = ak - bk/k + mpf_div_ui(temp, b, k); + mpf_add(a, a, temp); + + // A += ak, B += bk + mpf_add(A, A, a); + mpf_add(B, B, b); + + /* Exit when both ak and bk are smaller + * than the asked precision (10^-D). + */ + if (mpf_cmp(a, b) >= 0) { + if (mpf_cmp(a, stop) < 0) break; + } else { + if (mpf_cmp(b, stop) < 0) break; + } + } + fprintf(stderr, "[mpf_gamma] iterations: %ld\n", k); + + // return A/B + mpf_div(rop, A, B); + + // free memory + mpf_clears(A, B, a, b, temp, stop, NULL); +} + +int main(int argc, char** argv) { + /* if no argument is given, show the usage */ + if (argc != 2) { + fprintf(stderr, "usage: %s D\n", argv[0]); + fprintf(stderr, "Computes γ up to D decimal places.\n"); + return EXIT_FAILURE; + } + + /* Compute the required precision, ie. the number + * of bits of the mantissa. This can be found from: + * + * 10^d = 2^b ⇒ b = dlog2(10) + */ + size_t digits = atol(argv[1]); + size_t prec = digits * log2(10); + fprintf(stderr, "[main] precision: %gbit\n", (double)prec); + + /* Set the default precision. This affects the mantissa + * size of all mpf_t variables initialised from now on. + */ + mpf_set_default_prec(prec + 64); + + mpf_t y; mpf_init(y); + mpf_gamma(y, digits, prec); + + // again, trick to avoid rounding + gmp_printf("%.*Ff\b \n", digits+1, y); + mpf_clear(y); + + return EXIT_SUCCESS; +} diff --git a/makefile b/makefile index 6ae6c9b..783009b 100644 --- a/makefile +++ b/makefile @@ -14,7 +14,7 @@ ex-1/bin/pdf: ex-1/pdf.c ex-1/bin/histo: ex-1/histo.c $(CCOMPILE) -ex-2: ex-2/bin/fancy ex-2/bin/fancier ex-2/bin/limit ex-2/bin/naive ex-2/bin/recip +ex-2: ex-2/bin/fancy ex-2/bin/fancier ex-2/bin/fast ex-2/bin/limit ex-2/bin/naive ex-2/bin/recip ex-2/bin/%: ex-2/%.c $(CCOMPILE)