Simple recursive parallel gmp-chudnovsky program using Cilk

David Carver dcarver at login1.longhorn
Tue Apr 13 18:38:28 CEST 2010


/*
Hi again!
Here is a simple recursive parallel gmp-chudnovsky program using Cilk or Cilk++.

The Cilk/Cilk++ compiler is based on GNU C, with a few extra keywords to control
parallelism and works with the normal GNU GMP library.  Using Cilk/Cilk++ 
instead of C with Pthreads or OpenMP makes recursive divide-and-conquer code
easier to implement, debug (see cilkscreen), and maybe faster.
More information about Cilk is at http://en.wikipedia.org/wiki/Cilk.

Also, this simple version of gmp-chudnovsky has the factorization performance 
enhancement removed for readability, so running with one core will not be as 
fast as the Hanhong Xue's original version. 

Enjoy,
David Carver
*/

/* Pi computation using Chudnovsky's algortithm.

 * Copyright 2002,2005 Hanhong Xue (macroxue at yahoo dot com)

 * Slightly modified 2005 by Torbjorn Granlund to allow more than 2G
   digits to be computed.

 * Modified 2010 by David Carver (dcarver at tacc dot utexas dot edu) to 
   demonstrate a parallel and fully recursive version of the gmp-chudnovsky 
   program, using cilk/cilk++.

   To compile with GMP:
   cilk++ -Wall -O2 -o pgmp-chudnovsky pgmp-chudnovsky.cilk -lgmp -lm -lmiser

   To run:
   ./pgmp-chudnovsky 1000 1

   or add the Cilk++ option "-cilk_set_worker_count=" to specify the number 
   of workers (hardware threads used). 

   ./pgmp-chudnovsky 1000 1 -cilk_set_worker_count=1

   To get help run the program with no options:
   ./pgmp-chudnovsky

   Syntax: ./pgmp-chudnovsky.cilk.bin <digits> <option>
         <digits> digits of pi to output
         <option> 0 - just run (default)
                  1 - output decimal digits to stdout

 * Redistribution and use in source and binary forms,with or without
 * modification,are permitted provided that the following conditions are met:
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES,INCLUDING,BUT NOT LIMITED TO,THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,
 * SPECIAL,EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT,STRICT LIABILITY,OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include <gmp.h>
#include <cilk.h>

#define A   13591409
#define B   545140134
#define C   640320
#define D   12

#define BITS_PER_DIGIT   3.32192809488736234787
#define DIGITS_PER_ITER  14.1816474627254776555
#define DOUBLE_PREC      53

////////////////////////////////////////////////////////////////////////////

double wall_clock()
{
        struct timeval timeval;
        (void) gettimeofday (&timeval,NULL);
        return (timeval.tv_sec + (timeval.tv_usec / 1000000.0));
}

////////////////////////////////////////////////////////////////////////////

/* binary splitting */
void
bs(unsigned long a,unsigned long b,unsigned gflag,mpz_t pstack1,mpz_t qstack1,
   mpz_t gstack1)
{
  unsigned long mid;
  mpz_t pstack2,qstack2,gstack2;

  if (b-a==1) {

    /*
      g(b-1,b) = (6b-5)(2b-1)(6b-1)
      p(b-1,b) = b^3 * C^3 / 24
      q(b-1,b) = (-1)^b*g(b-1,b)*(A+Bb).
    */

    mpz_set_ui(pstack1,b);
    mpz_mul_ui(pstack1,pstack1,b);
    mpz_mul_ui(pstack1,pstack1,b);
    mpz_mul_ui(pstack1,pstack1,(C/24)*(C/24));
    mpz_mul_ui(pstack1,pstack1,C*24);

    mpz_set_ui(gstack1,2*b-1);
    mpz_mul_ui(gstack1,gstack1,6*b-1);
    mpz_mul_ui(gstack1,gstack1,6*b-5);

    mpz_set_ui(qstack1,b);
    mpz_mul_ui(qstack1,qstack1,B);
    mpz_add_ui(qstack1,qstack1,A);
    mpz_mul   (qstack1,qstack1,gstack1);
    if (b%2)
      mpz_neg(qstack1,qstack1);

  } else {

    /*
      p(a,b) = p(a,m) * p(m,b)
      g(a,b) = g(a,m) * g(m,b)
      q(a,b) = q(a,m) * p(m,b) + q(m,b) * g(a,m)
    */

    mid = a+(b-a)*0.5224;     /* tuning parameter */

    mpz_init(pstack2);
    mpz_init(qstack2);
    mpz_init(gstack2);

    cilk_spawn bs(mid,b,gflag,pstack2,qstack2,gstack2);
    bs(a,mid,1,pstack1,qstack1,gstack1);
    cilk_sync;

    mpz_mul(pstack1,pstack1,pstack2);
    mpz_mul(qstack1,qstack1,pstack2);
    mpz_mul(qstack2,qstack2,gstack1);
    mpz_add(qstack1,qstack1,qstack2);

    if (gflag) {
      mpz_mul(gstack1,gstack1,gstack2);
    }

    mpz_clear(pstack2);
    mpz_clear(qstack2);
    mpz_clear(gstack2);
  }
}

int
cilk_main(int argc,char *argv[])
{
  mpf_t  pi,qi;
  mpz_t   pstack,qstack,gstack;
  long int d=100,out=0,terms;
  unsigned long psize,qsize;
  char *prog_name;
  clock_t begin,end;
  double wbegin,wend;

  prog_name = argv[0];

  if (argc==1) {
    fprintf(stderr,"\nSyntax: %s <digits> <option>\n",prog_name);
    fprintf(stderr,"      <digits> digits of pi to output\n");
    fprintf(stderr,"      <option> 0 - just run (default)\n");
    fprintf(stderr,"               1 - output decimal digits to stdout\n");
    exit(1);
  }
  if (argc>1)
    d = strtoul(argv[1],0,0);
  if (argc>2)
    out = atoi(argv[2]);

  terms = d/DIGITS_PER_ITER;

  fprintf(stderr,"#terms=%ld workers=%d\n",terms,cilk::current_worker_count());

  begin = clock();
  wbegin = wall_clock();

  mpz_init(pstack);
  mpz_init(qstack);
  mpz_init(gstack);

  /* begin binary splitting process */

  if (terms<=0) {
    mpz_set_ui(pstack,1);
    mpz_set_ui(qstack,0);
    mpz_set_ui(gstack,1);
  } else {
    bs(0,terms,0,pstack,qstack,gstack);
  }

  end = clock();
  wend = wall_clock();
  fprintf(stderr,"bs         cputime = %6.3f  wallclock = %6.3f\n",
    (double)(end-begin)/CLOCKS_PER_SEC,(wend-wbegin));
  fflush(stderr);

  mpz_clear(gstack);

  /* prepare to convert integers to floats */

  mpf_set_default_prec((long int)(d*BITS_PER_DIGIT+16));

  /*
	  p*(C/D)*sqrt(C)
    pi = -----------------
	     (q+A*p)
  */

  psize = mpz_sizeinbase(pstack,10);
  qsize = mpz_sizeinbase(qstack,10);

  mpz_addmul_ui(qstack,pstack,A);
  mpz_mul_ui(pstack,pstack,C/D);

  mpf_init(pi);
  mpf_set_z(pi,pstack);
  mpz_clear(pstack);

  mpf_init(qi);
  mpf_set_z(qi,qstack);
  mpz_clear(qstack);

  /* final step */

  mpf_div(qi,pi,qi);
  mpf_sqrt_ui(pi,C);
  mpf_mul(qi,qi,pi);

  end = clock();
  wend = wall_clock();

  /* output Pi and timing statistics */

  fprintf(stderr,"total      cputime = %6.3f  wallclock = %6.3f\n",
    (double)(end-begin)/CLOCKS_PER_SEC,(wend-wbegin));
  fflush(stderr);

  printf("   P size=%ld digits (%f)\n"
	 "   Q size=%ld digits (%f)\n",
	 psize,(double)psize/d,qsize,(double)qsize/d);

  if (out&1)  {
    printf("pi(0,%ld)=\n",terms);
    mpf_out_str(stdout,10,d+2,qi);
    printf("\n");
  }

  /* free float resources */

  mpf_clear(pi);
  mpf_clear(qi);

  exit (0);
}


More information about the gmp-discuss mailing list