#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <gmp.h>
#include <time.h>

/* The Euler-Mascheroni constant is here computed to
 * arbitrary precision using GMP floating point numbers
 * with the formula:
 *
 *    0 < |γ - U(N)/V(N)| < πexp(-4N)
 *
 * with:
 *
 *    U(N) = Σ_(k = 1)^+∞ (N^k/k!)² *(H(k) - log(N))
 *    V(N) = Σ_(k = 0)^+∞ (N^k/k!)²
 *    H(k) = Σ_(j = 1)^(k) (1/k), the k-th harmonic number
 *
 * If N = 2^p (to simplify the log) to get the result to D
 * decimal digits we must have 2^(p+1) > D / log(π), which is
 * satisfied by p = floor(log2(N log(10) + log(π))) - 1.
 *
 * The number of series terms needed to reduce the error, due to
 * to less than 10^-D is α⋅2^p where α is the solution of:
 *
 *   α log(α) = 3 + α  ⇒  α = exp(W(3e) - 1) ≈ 4.97
 *
 * source: http://www.numberworld.org/y-cruncher/internals/formulas.html
 *
 */


/* Approximation of the Lambert W(x) function at large values
 * where x = log(2) 10^(D/2). The expression of x has been
 * inlined and optimized to avoid overflowing on large D.
 *
 * Note: from x>10 this is an overestimate of W(x), which is
 * good for the application below.
 *
 * source: "On the Lambert W function", Corless et al.
 */
double lambertW_large(size_t digits) {
  double L1 = log(log(2.0)) + log(10.0)*(double)digits/2.0;
  double L2 = log(L1);
  return L1 - L2  + L2/L1 + (L2 - 2)/(2*L1*L1)
       + L2*(3*L2*L2*L2 - 22*L2*L2 + 36*L2 - 12)/(12*L1*L1*L1*L1);
}


/* `mpf_log2(z, D)`
 * calculate log(2) to D decimal places
 * using the series
 *
 *   log(2) = Σ_{k=1}^+∞ 1/(k 2^k)
 *
 * The error of the n-th partial sum is
 * estimated as
 *
 *   Δn = 1/(n(n+2) 2^(n-1)).
 *
 * So to fix the first D decimal places
 * we evaluate up to n terms such that
 *
 *   Δn < 1/10^D
 *
 * Then take the average of the upper and
 * lower bounds of the series, given by
 *
 *  Sk + ak/(1 - ak+1/ak) < S < Sk + ak L/(1-L)
 *
 *  where L = lim k→∞ ak+1/ak = 1/2.
 */
void mpf_log2(mpf_t rop, size_t digits) {
  mpf_t sum, term;
  mpf_inits(sum, term, NULL);

  fprintf(stderr, "[mpf_log2] decimal digits: %ld\n", digits);

  /* Compute the number of required terms
   * A couple of extra digits are added for safety.
   */
  size_t n = 2.0/log(2)*lambertW_large(digits+2);
  fprintf(stderr, "[mpf_log2] series terms: %ld\n", n);

  /* Compute the nth partial num */
  for (size_t i=1; i<=n; i++) {
    mpf_set_ui(term, 1);
    mpf_mul_2exp(term, term, i);
    mpf_mul_ui(term, term, i);
    mpf_ui_div(term, 1, term);
    mpf_add(sum, sum, term);
  }

  /* Compute upper/lower bounds */
  mpf_t upp, low;
  mpf_inits(upp, low, NULL);

  // upp = sum + term
  mpf_add(upp, sum, term);

  // low = sum + term * n/(n+2)
  mpf_mul_ui(low, term, n);
  mpf_div_ui(low, low, n+2);
  mpf_add(low, sum, low);

  /* average the bounds */
  mpf_add(sum, low, upp);
  mpf_div_ui(sum, sum, 2);

  /* Note: printf() rounds floating points to the number of digits
   * in an implementation-dependent manner. To avoid printing
   * (wrongly) rounded digits we increase the count by 1 and hide
   * the last by overwriting with a space.
   */
  size_t d = digits>50? 50 : digits;
  gmp_fprintf(stderr, "[mpf_log2] upper bound: %.*Ff\b \n", d+3, upp);
  gmp_fprintf(stderr, "[mpf_log2] lower bound: %.*Ff\b \n", d+3, low);
  gmp_fprintf(stderr, "[mpf_log2] log(2)=%.*Ff\b \n",  d+1, sum);

  // return sum
  mpf_set(rop, sum);

  // free memory
  mpf_clears(sum, term, NULL);
  mpf_clears(upp, low, NULL);
}


/* `mpf_gamma(rop, digits)` computes γ to `digits` decimal places
 * using the above described algorithm and set results to `rop`.
 */
void mpf_gamma(mpf_t rop, size_t digits) {
  /* The number N=2^p is fixed by requiring that the error
   * is smaller than the precision/digits required:
   *
   *   π exp(-4N) < 10^(-D)
   *
   * The first power of 2 solving the constraint is
   *
   *   p = floor(log2(N log(10) + log(π))) - 1
   */
  unsigned int p = log2(digits * log(10.0) + log(M_PI)) - 1;
  fprintf(stderr, "[mpf_gamma] p: %d\n", p);
  fprintf(stderr, "[mpf_gamma] log2(digits): %d\n", (int)log2(digits));

  /* The number of iterations needed to reduce the error, due to
   * ignoring the C/B² term, to less than 10^-D is α⋅2^p where α
   * is the solution of:
   *
   *   α log(α) = 1 + α  ⇒  α = exp(W(3e) - 1) ≈ 4.97
   *
   */
  size_t kmax = 3.59 * (1 << p);
  fprintf(stderr, "[mpf_gamma] iterations: %ld\n", kmax);

  // initialise variables
  mpf_t A, B, a, b, stop, temp;
  mpf_inits(A, B, a, b, stop, temp, NULL);

  // A = a0 = -log(2^p) = -plog(2)
  mpf_log2(a, digits);
  mpf_mul_ui(a, a, p);
  mpf_neg(a, a);
  mpf_set(A, a);

  // B = b0 = 1
  mpf_set_si(b, 1);
  mpf_set_si(B, 1);

  for(size_t k = 1; k <= kmax; k++) {
    // bk = bk-1 (n/k)^2
    mpf_mul_2exp(b, b, 2*p);
    mpf_div_ui(b, b, k);
    mpf_div_ui(b, b, k);

    // ak = ak-1 (n/k)^2
    mpf_mul_2exp(a, a, 2*p);
    mpf_div_ui(a, a, k);
    mpf_div_ui(a, a, k);

    // ak = ak - bk/k
    mpf_div_ui(temp, b, k);
    mpf_add(a, a, temp);

    // A += ak, B += bk
    mpf_add(A, A, a);
    mpf_add(B, B, b);
  }

  // return A/B
  mpf_div(rop, A, B);

  // free memory
  mpf_clears(A, B, a, b, temp, NULL);
}

int main(int argc, char** argv) {
  /* if no argument is given, show the usage */
  if (argc != 2) {
    fprintf(stderr, "usage: %s D\n", argv[0]);
    fprintf(stderr, "Computes γ up to D decimal places.\n");
    return EXIT_FAILURE;
  }

  /* Compute the required precision, ie. the number
   * of bits of the mantissa. This can be found from:
   *
   *   10^d = 2^b  ⇒  b = dlog2(10)
   */
  size_t digits = atol(argv[1]);
  size_t prec = digits * log2(10);
  fprintf(stderr, "[main] precision: %gbit\n", (double)prec);

  /* Set the default precision. This affects the mantissa
   * size of all mpf_t variables initialised from now on.
   */
  mpf_set_default_prec(prec + 64);

  mpf_t y; mpf_init(y);

  clock_t begin = clock();
  mpf_gamma(y, digits);
  clock_t end = clock();
  double time = (double)(end - begin)/CLOCKS_PER_SEC;
  fprintf(stderr, "[main] running time: %gs\n", time);

  // again, trick to avoid rounding
  gmp_printf("%.*Ff\b \n", digits+1, y);
  mpf_clear(y);

  return EXIT_SUCCESS;
}