analistica/ex-2/fancier.c

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <math.h>
#include <gsl/gsl_sf_lambert.h>
#include <gmp.h>
#include <time.h>

/* The Euler-Mascheroni constant is here computed to
 * arbitrary precision using GMP rational numbers
 * and the standard Brent-McMillan algorithm.
 *
 *    γ = A(N)/B(N) - C(N)/B(N)² - log(N)
 *
 * where:
 *
 *    A(N) = Σ_(k = 1)^(k_max) (N^k/k!)² * H(k)
 *    B(N) = Σ_(k = 0)^(k_max) (N^k/k!)²
 *    C(N) = 1/4N Σ_(k = 0)^(2N) ((2k)!)^3/((k!)^4*(16N))^2k
 *    H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number
 *
 * The error of the estimation is given by
 *
 *   Δ(N) = 5/12 √(2π) exp(-8N)/√x
 *
 * so, given a number D of decimal digits to compute we ask
 * that the error be smaller than 10^-D. The smallest integer
 * to solve the inequality can be proven to be
 *
 *   N = floor(W(5π/9 10^(2D + 1))/16) + 1
 *
 *  where W is the principal value of the Lambert W function.
 *
 * The number of series terms needed to reduce the error, due to
 * to less than 10^-D is α⋅2^p where α is the solution of:
 *
 *   α log(α) = 3 + α  ⇒  α = exp(W(3e) - 1) ≈ 4.97
 *
 * source: Precise error estimate of the Brent-McMillan algorithm for
 * the computation of Euler's constant, Jean-Pierre Demailly.
 */


/* Approximation of the Lambert W(x) function at large values
 * where x = 5π/9 10^(2D + 1). The expression of x has been
 * inlined and optimized to avoid overflowing on large D.
 *
 * Note: from x>10 this is an overestimate of W(x), which is
 * good for the application below.
 *
 * source: "On the Lambert W function", Corless et al.
 */
double lambertW_large(size_t digits) {
  double L1 = log(5*M_PI/9) + (2*digits + 1)*log(10);
  double L2 = log(L1);
  return L1 - L2  + L2/L1 + (L2 - 2)/(2*L1*L1)
       + L2*(3*L2*L2*L2 - 22*L2*L2 + 36*L2 - 12)/(12*L1*L1*L1*L1);
}

/* `mpq_dec_str(z, n)`
 * returns the decimal representation of the
 * rational z as a string, up to n digits.
 */
char* mpq_dec_str(mpq_t z, size_t mantissa_size) {
  mpz_t num, den, quot, rem;
  mpz_inits(num, den, quot, rem, NULL);

  // calculate integral part
  mpq_get_num(num, z);
  mpq_get_den(den, z);
  mpz_tdiv_qr(quot, rem, num, den);

  // store the integral part
  size_t integral_size = mpz_sizeinbase(quot, 10);
  char* repr = calloc(integral_size + 1 + mantissa_size + 1, sizeof(char));
  mpz_get_str(repr, 10, quot);

  // add decimal point
  repr[integral_size] = '.';

  // calculate the mantissa
  mpz_t cur;
  mpz_init(cur);
  size_t digit;
  for (size_t i = 0; i < mantissa_size; i++) {
    // calculate i-th digit as:
    // digit = (remainder * 10) / denominator
    // new_ramainder = (remainder * 10) % denominator
    mpz_set(cur, rem);
    mpz_mul_ui(cur, rem, 10);
    mpz_tdiv_qr(quot, rem, cur, den);

    // store digit in mantissa
    digit = mpz_get_ui(quot);
    sprintf(repr + integral_size + 1 + i, "%ld", digit);
  }

  // free memory
  mpz_clears(num, den, quot, rem, cur, NULL);

  return repr;
}


/* printf() like function with special conversion %q, which
 * prints the deciamal expansion of mpq_t rational numbers.
 * The second argument is the number of digits shown.
 */
void mpq_fprintf(FILE *fp, size_t digits, char* fmt, ...) {
  va_list args;
  va_start(args, fmt);

  char* str;
  char fmtc[2] = "%x";

  for (char* p = fmt; *p; p++) {
    if (*p != '%') {
      fprintf(fp, "%c", *p);
      continue;
    }
    switch (*++p) {
      case 's':
        str = va_arg(args, char*);
        fputs(str, fp);
        break;
      case 'l':
        if (*++p == 'd')
          fprintf(fp, "%ld", va_arg(args, size_t));
        break;
      case 'q':
        str = mpq_dec_str(*va_arg(args, mpq_t*), digits);
        fputs(str, fp);
        free(str);
        break;
      default:
        fmtc[1] = *p;
        gmp_fprintf(fp, fmtc, *va_arg(args, mpq_t*));
    }
  }
  va_end(args);
}


/* `mpq_log2(z, D)`
 * calculate log(2) to D decimal places
 * using the series
 *
 *   log(2) = Σ_{k=1} 1/(k 2^k)
 *
 * The error of the n-th partial sum is
 * estimated as
 *
 *   Δn = 1/(n(n+2) 2^(n-1)).
 *
 * So to fix the first D decimal places
 * we evaluate up to n terms such that
 *
 *   Δn < 1/10^D
 */
void mpq_log2(mpq_t rop, size_t digits) {
  mpq_t sum, term;
  mpq_inits(sum, term, NULL);

  fprintf(stderr, "[mpq_log2] decimal digits: %ld\n", digits);

  /* calculate n, the number of required terms */
  size_t n;
  mpz_t temp, prec, width; mpz_inits(temp, prec, width, NULL);
  // prec = 10^digits
  mpz_ui_pow_ui(prec, 10, digits + 2);

  for (n = 1;; n++) {
    // width = n*(n + 2)*2^(n - 1)
    mpz_set_ui(width, n);
    mpz_set_ui(temp, n);
    mpz_add_ui(temp, temp, 2);
    mpz_mul(width, width, temp);
    mpz_sub_ui(temp, temp, 3);
    mpz_mul_2exp(width, width, mpz_get_ui(temp));

    if (mpz_cmp(width, prec) > 0) break;
  }
  fprintf(stderr, "[mpq_log2] series terms: %ld\n", n);

  /* Compute the nth partial sum */
  for (size_t i=1; i<=n; i++) {
    mpq_set_ui(term, 1, 1);
    mpz_ui_pow_ui(mpq_denref(term), 2, i);
    mpz_mul_ui(mpq_denref(term), mpq_denref(term), i);
    mpq_add(sum, sum, term);
  }

  /* Compute upper/lower bounds */
  mpq_t upp, low;
  mpq_inits(upp, low, NULL);

  // upp = sum + term
  mpq_add(upp, sum, term);

  // low = sum + term * n/(n+2)
  mpz_mul_ui(mpq_numref(low), mpq_numref(term), n);
  mpz_mul_ui(mpq_denref(low), mpq_denref(term), n+2);
  mpq_canonicalize(low);
  mpq_add(low, sum, low);

  /* Average the two bounds */
  mpq_add(sum, upp, low);
  mpz_mul_ui(mpq_denref(sum), mpq_denref(sum), 2);
  mpq_canonicalize(sum);

  size_t d = digits > 50 ? 50 : digits;
  mpq_fprintf(stderr, d+2, "[mpq_log2] upper bound: %q...\n", &upp);
  mpq_fprintf(stderr, d+2, "[mpq_log2] lower bound: %q...\n", &low);
  mpq_fprintf(stderr, d,   "[mpq_log2] log(2)=%q...\n", &sum);

  // return sum
  mpq_set(rop, sum);

  // free memory
  mpq_clears(sum, term, upp, low, NULL);
  mpz_clears(temp, prec, width, NULL);
}


/* `mpq_log_ui(z, x, D)`
 * calculates the logarithm of x up
 * to D decimal places using the formula
 *
 *   log(x) = log(a(x) * 2^(n-1))
 *          = log(a) + (n-1) log(2)
 *
 * where 1<a<2 and n is the number of binary
 * digits of x.
 *
 * log(a) is computed from
 *
 *   log((1+y)/(1-y)) = 2Σ_{k=0} y^(2k+1)/(2k+1)
 *
 * where y = (a-1)/(a+1).
 *
 * The error of the n-th partial sum is
 * estimated as:
 *
 *   Δ(n, y) = 4y^(2n + 3) /
 *        (4n^2y^4 - 8n^2y^2 + 4n^2 + 4ny^4 - 12ny^2 + 8n + y^4 - 4y^2 + 3)
 *
 * So to fix the first D decimal places
 * we evaluate up to n terms such that
 *
 *   Δ(n, y) < 1/10^D
 */
void mpq_log_ui(mpq_t rop, size_t x, size_t digits) {
  /* calculate `a` such that x = a⋅2^(m-1) */
  mpq_t a;         mpq_init(a);
  mpz_t xz, power; mpz_inits(xz, power, NULL);
  mpz_set_ui(xz, x);

  // m = digits in base 2 of x
  size_t m = mpz_sizeinbase(xz, 2);
  mpz_ui_pow_ui(power, 2, m - 1);
  mpq_set_num(a, xz);
  mpq_set_den(a, power);

  fprintf(stderr, "[mpq_log_ui] binary digits: %ld\n", m);
  fprintf(stderr, "[mpq_log_ui] a=%g\n", mpq_get_d(a));

  // calculate log(2)*(m-1)
  mpq_t log2; mpq_init(log2);
  mpq_log2(log2, digits);

  mpz_mul_ui(mpq_numref(log2), mpq_numref(log2), m-1);
  mpq_canonicalize(log2);

  /* When x is a power of two a=1 and y=0.
   * The series for log(1+y/1-y) is not defined for y=0
   * so this case must be handled separately.
   */
  if (mpq_cmp_ui(a, 1, 1) == 0) {
    mpq_set(rop, log2);

    // free memory
    mpq_clears(a, log2, NULL);
    mpz_clears(xz, power, NULL);
    return;
  }

  /* calculate y = (a-1)/(a+1) */
  mpq_t y, temp; mpq_inits(y, temp, NULL);

  mpq_set(temp, a);
  // decrement: a/b -= b/b
  mpz_submul_ui(mpq_numref(temp),
                mpq_denref(temp), 1);
  mpq_set(y, temp);

  mpq_set(temp, a);
  // increment: a/b += b/b
  mpz_addmul_ui(mpq_numref(temp),
                mpq_denref(temp), 1);
  mpq_div(y, y, temp);

  fprintf(stderr, "[mpq_log_ui] y=%g\n", mpq_get_d(y));


  /* calculate number of required terms:
   * find smallest n such that Δ(n, y) < 1/10^digits.
   */
  mpq_t y4, y2, y2n, t1, t2;
  mpq_inits(y4, y2, y2n, t1, t2, NULL);

  // y4 = y^4
  mpz_pow_ui(mpq_numref(y4), mpq_numref(y), 4);
  mpz_pow_ui(mpq_denref(y4), mpq_denref(y), 4);

  // y2 = y^2
  mpz_pow_ui(mpq_numref(y2), mpq_numref(y), 2);
  mpz_pow_ui(mpq_denref(y2), mpq_denref(y), 2);

  size_t n;
  size_t p1, p2, p3;

  for(n = 0;; n++) {
    // polynomials in n
    p1 = 4*pow(n, 2) + 4*n + 1;
    p2 = 8*pow(n, 2) + 12*n - 4;
    p3 = 4*pow(n, 2) + 3;

    // t1 = y4 * p1
    mpz_mul_ui(mpq_numref(t1), mpq_numref(y4), p1);
    mpq_canonicalize(t1);
    // t2 = y2 * p2
    mpz_mul_ui(mpq_numref(t2), mpq_numref(y2), p2);
    mpq_canonicalize(t2);

    // t1 -= t2
    mpq_sub(t1, t1, t2);
    // t1 += p3
    mpz_addmul_ui(mpq_numref(t1), mpq_denref(t1), p3);

    // y2n = 4*y^(2n + 3)
    mpq_set(y2n, y);
    mpz_pow_ui(mpq_numref(y2n), mpq_numref(y2n), 2*n+3);
    mpz_pow_ui(mpq_denref(y2n), mpq_denref(y2n), 2*n+3);
    mpz_mul_ui(mpq_numref(y2n), mpq_numref(y2n), 4);
    mpq_canonicalize(y2n);

    // t2 = 10^digits * y2n
    mpq_set_ui(t2, 1, 1);
    mpz_ui_pow_ui(mpq_numref(t2), 10, digits + 1);
    mpq_mul(t2, t2, y2n);

    if (mpq_cmp(t1, t2) > 0) break;
  }

  fprintf(stderr, "[mpq_log_ui] series terms: %ld\n", n);
  fprintf(stderr, "[mpq_log_ui] decimal digits: %ld\n", digits);


  /* Compute the nth partial sum of
   * log(A) = log((1+y)/(1-y)) = 2 Σ_{k=0} y^(2k+1)/(2k+1)
   */
  mpq_t sum, term; mpq_inits(sum, term, NULL);

  for (size_t i = 0; i <= n; i++) {
    // numerator: y^(2i+1)
    mpq_set(temp, y);
    mpz_pow_ui(mpq_numref(temp), mpq_numref(temp), 2*i + 1);
    mpz_pow_ui(mpq_denref(temp), mpq_denref(temp), 2*i + 1);
    mpq_canonicalize(temp);
    mpq_set(term, temp);

    // denominator: 2i+1
    mpq_set_ui(temp, 2*i + 1, 1);
    mpq_div(term, term, temp);

    // sum the term
    mpq_add(sum, sum, term);
  }

  /* Compute the upper and lower bounds */
  mpq_t upp, low;
  mpq_inits(upp, low, NULL);

  // upp = sum + term * y2/(1-y2)
  mpq_mul(upp, term, y2);
  mpq_set_ui(temp, 1, 1);
  mpq_sub(temp, temp, y2);
  mpq_div(upp, upp, temp);
  mpq_add(upp, sum, upp);

  // low = sum + term / ((2*n + 3)/(2*n + 1)*y^-2 - 1)
  mpq_set_ui(temp, 2*n + 3, 2*n + 1);
  mpq_div(temp, temp, y2);
  mpz_sub(mpq_numref(temp), mpq_numref(temp), mpq_denref(temp));
  mpq_canonicalize(temp);
  mpq_div(low, term, temp);
  mpq_add(low, sum, low);

  /* Average the two bounds
   * Note: we don't divive the sum by 2 because we
   * actually need 2 Σ_{k=0} y^(2k+1)/(2k+1).
   */
  mpq_add(sum, low, upp);

  size_t d = digits > 50 ? 50 : digits;
  mpq_fprintf(stderr, d+2, "[mpq_log_ui] upper bound: %q...\n", upp);
  mpq_fprintf(stderr, d+2, "[mpq_log_ui] lower bound: %q...\n", low);
  mpq_fprintf(stderr, d,   "[mpq_log_ui] series: %q...\n", sum);

  // series += log(2)*(m-1)
  mpq_add(sum, sum, log2);
  mpq_set(rop, sum);

  mpq_fprintf(stderr, digits > 50 ? 50 : digits,
          "[mpq_log_ui] log(%ld)=%q...\n", x, sum);

  // free memory
  mpq_clears(a, y, temp, sum, term, log2,
             y4, y2, y2n, t1, t2, NULL);
  mpz_clears(xz, power, NULL);
}


/* A series */
void a_series(mpq_t rop, size_t N) {
  mpq_t sum, term, factq, harm, recip;
  mpz_t fact, power, stop;

  mpq_inits(sum, term, factq, harm, recip, NULL);
  mpz_inits(fact, power, stop, NULL);
  mpz_set_ui(stop, N);

  mpz_set_ui(fact, 1);
  mpz_set_ui(power, 1);
  size_t kmax = 4.97 * N;
  for (size_t k = 0; k <= kmax; k++) {
    if (k > 0) {
      mpz_mul_ui(fact, fact, k);
      mpz_mul(power, power, stop);
    }

    mpq_set_z(term, power);
    mpq_set_z(factq, fact);
    mpq_div(term, term, factq);
    mpq_mul(term, term, term);

    if (k > 0)
      mpq_set_ui(recip, 1, k);
    mpq_add(harm, harm, recip);
    mpq_mul(term, term, harm);
    mpq_add(sum, sum, term);
  }

  mpq_set(rop, sum);

  // free memory
  mpq_clears(sum, term, factq, harm, recip, NULL);
  mpz_clears(fact, power, stop, NULL);
}


/* B series */
void b_series(mpq_t rop, size_t N){
  mpq_t sum, term, factq;
  mpz_t fact, power, stop;

  mpq_inits(sum, term, factq, NULL);
  mpz_inits(fact, power, stop, NULL);
  mpz_set_ui(stop, N);

  mpz_set_ui(fact, 1);
  mpz_set_ui(power, 1);
  size_t kmax = 4.97 * N;
  for (size_t k = 0; k <= kmax; k++) {
    if (k > 0) {
      mpz_mul_ui(fact, fact, k);
      mpz_mul(power, power, stop);
    }

    mpq_set_z(term, power);
    mpq_set_z(factq, fact);
    mpq_div(term, term, factq);

    mpq_mul(term, term, term);
    mpq_add(sum, sum, term);
  }

  mpq_set(rop, sum);

  // free memory
  mpq_clears(sum, term, factq, NULL);
  mpz_clears(fact, power, stop, NULL);
}


/* C series */
void c_series(mpq_t rop, size_t N) {
  mpq_t sum, term, stop2, fact2q;
  mpz_t fact1, pfact1, fact2, pfact2, power, stop1;

  mpq_inits(sum, term, stop2, fact2q, NULL);
  mpz_inits(fact1, pfact1, fact2, pfact2, power, stop1, NULL);
  mpq_set_ui(stop2, 4*N, 1);

  mpz_set_ui(fact1, 1);
  mpz_set_ui(fact2, 1);
  mpz_set_ui(stop1, 1);

  for (size_t k = 0; k <= 2*N; k++) {
    if (k > 0) {
      mpz_mul_ui(fact1, fact1, 2*k * (2*k - 1));
      mpz_mul_ui(fact2, fact2, k);
      mpz_mul_ui(stop1, stop1, 16*N);
      mpz_mul_ui(stop1, stop1, 16*N);
    }
    mpz_pow_ui(pfact1, fact1, 3);
    mpz_pow_ui(pfact2, fact2, 4);
    mpz_mul(pfact2, pfact2, stop1);

    mpq_set_z(term, pfact1);
    mpq_set_z(fact2q, pfact2);

    mpq_div(term, term, fact2q);
    mpq_add(sum, sum, term);
  }
  mpq_div(sum, sum, stop2);

  mpq_set(rop, sum);

  // free memory
  mpq_clears(sum, term, stop2, fact2q, NULL);
  mpz_clears(fact1, pfact1, fact2, pfact2, power, stop1, NULL);
}


int main(int argc, char** argv) {
  /* if no argument is given, show the usage */
  if (argc != 2) {
    fprintf(stderr, "usage: %s D\n", argv[0]);
    fprintf(stderr, "Computes γ up to D decimal places.\n");
    return EXIT_FAILURE;
  }

  // requested decimal digits
  size_t D = atol(argv[1]);
  // Brent-McMillan number N
  size_t N = lambertW_large(D)/16 + 1;

  mpq_t exact, indicator;
  mpq_inits(exact, indicator, NULL);

  // value of γ up to 500 digits
  mpq_set_str(
    exact,
    "47342467929426395252720948664321583815681737620205"
    "78046578043524355342995304421105234825234769529727"
    "60710541774258570118818437511973945213031180449905"
    "77067110892216494927227098092412641670668896104172"
    "59380467495735805599918127376547804186055507379270"
    "09659478275476110597673948659342592173551684412254"
    "47940426607883070678658320829056222506449877887289"
    "02099638646695565284675455912794915138438516540912"
    "22061900908485016328626399040675802962645141413722"
    "567840383761005003563012969560659130635723002086605/"
    "82018681765164048732047980836753451023877954010252"
    "60062364748361667340168652059998708337602423525120"
    "45225158774173869894826877890589130978987229877889"
    "33367849273189687823618289122425446493605087108634"
    "04387981302669131224273324182166778131513056804533"
    "58955006355665628938266331979307689540884269372365"
    "76288367811322713649805442241450184023209087215891"
    "55369788474437679223152173114447113970483314961392"
    "48250188991402851129033493732164230227458717486395"
    "514436574417275149404197774547389507462779807727616",
    10);

  // number with decimal expansion equal to
  // 0123456789 periodic: useful to count digits
  mpq_set_str(
      indicator,
      "137174210/1111111111",
      10);

  fprintf(stderr, "[main] N: %ld\n", N);

  /* calculate series */
  mpq_t A, B, C;
  mpq_inits(A, B, C, NULL);

  clock_t begin, end;
  double time_a, time_b, time_c, time_log;

  begin = clock();
  a_series(A, N);
  end = clock();
  time_a = (double)(end - begin)/CLOCKS_PER_SEC;
  fprintf(stderr, "[main] a series time: %gs\n", time_a);

  begin = clock();
  b_series(B, N);
  end = clock();
  time_b = (double)(end - begin)/CLOCKS_PER_SEC;
  fprintf(stderr, "[main] b series time: %gs\n", time_b);

  begin = clock();
  c_series(C, N);
  end = clock();
  time_c = (double)(end - begin)/CLOCKS_PER_SEC;
  fprintf(stderr, "[main] c series time: %gs\n", time_c);

  /* calculate γ... */
  mpq_t gamma, corr, logn;
  mpq_inits(gamma, corr, logn, NULL);

  begin = clock();
  mpq_log_ui(logn, N, D);
  end = clock();
  time_log = (double)(end - begin)/CLOCKS_PER_SEC;
  fprintf(stderr, "[main] log(n) time: %gs\n", time_log);

  fprintf(stderr, "[main] total time: %gs\n",
          time_a + time_b + time_c + time_log);

  mpq_div(gamma, A, B);
  mpq_div(corr, C, B);
  mpq_div(corr, corr, B);
  mpq_sub(gamma, gamma, corr);
  mpq_sub(gamma, gamma, logn);

  /* calculate difference */
  mpq_t diff;
  mpq_init(diff);
  mpq_sub(diff, gamma, exact);

  /* print results */
  size_t d = D+1 > 100 ? 100 : D+1;

  mpq_fprintf(stderr, d, "[main] approx:\t%q...\n", gamma);
  mpq_fprintf(stderr, d, "[main] exact:\t%q...\n",  exact);
  mpq_fprintf(stderr, d, "[main] diff:\t%q...\n",   diff);
  mpq_fprintf(stderr, d, "[main] digits:\t%q\n",    indicator);

  mpq_fprintf(stdout, D, "%q\n", gamma);

  // free memory
  mpq_clears(A, B, C, gamma, corr, logn,
             exact, indicator, diff, NULL);

  return EXIT_SUCCESS;
}