# Binomial improvements

Robert Gerbicz robert.gerbicz at gmail.com
Sat Dec 10 00:11:35 CET 2005

```Hi!

I'm Robert Gerbicz from Hungary.
Here is an improvement for binomial coefficients.
By Legendre's theorem it's easy to compute the prime factorization of
n! so we can also
compute the factorization of binomial(n,k). Note that every prime power divisors
of binomial(n,k) is at most n ( you can prove this from Legendre's theorem ).
Then we can compute the product by binary splitting method.

If k is small then the computation of prime factorization is hard, so
I defined a
break even point: if k*log(n)<n then I use another method: it's easy to cancel
all numbers from the denominator . ( because if there are x numbers up to k
that are divisible by p^j then there are at least x numbers from n-k+1 to n
that are divisible by p^j , so by sieving we can remove factor of p
from every p^j-th
number in the numerator). After this sieving we can use also binary
splitting method
to compute the product.

In the second case the remaining numbers are not relative primes but there are
at most k<=n/log(n) remaining numbers and Pi(n) is about n/log(n) so in both
cases there are less than about n/log(n) remaining numbers ( in the first case
the numbers are relative primes , these are prime powers).

I've used also a theorem: if n>75 then Pi(n)<n/(log(n)-1.5) is true.

Some timings ( on a P4 Celeron 1.7 GHz )

robertgerbicz at linux:~/gmp-4.1.4> ./a.out 400000 200000
mpz_bin_uiui took 27750ms
mpz_bin_uiui2 took 80ms
robertgerbicz at linux:~/gmp-4.1.4> ./a.out 400000 100000
mpz_bin_uiui took 10570ms
mpz_bin_uiui2 took 60ms
robertgerbicz at linux:~/gmp-4.1.4> ./a.out 400000 50000
mpz_bin_uiui took 3400ms
mpz_bin_uiui2 took 40ms
robertgerbicz at linux:~/gmp-4.1.4> ./a.out 400000 25000
mpz_bin_uiui took 1030ms
mpz_bin_uiui2 took 30ms
robertgerbicz at linux:~/gmp-4.1.4> ./a.out 800000 400000
mpz_bin_uiui took 120130ms
mpz_bin_uiui2 took 190ms
robertgerbicz at linux:~/gmp-4.1.4> ./a.out 2000000 10000
mpz_bin_uiui took 260ms
mpz_bin_uiui2 took 10ms
robertgerbicz at linux:~/gmp-4.1.4> ./a.out 2000000 1000000
mpz_bin_uiui took 891730ms
mpz_bin_uiui2 took 650ms

It's more than 1300 times faster for binomial(2000000,1000000)
GMP can use my code.

// computation of binomial(n,k) by a new method
// Written by Robert Gerbicz
// Binary splitting algorithm from Paul Zimmermann

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <math.h>
#include "gmp.h"

int cputime (void);
void mpz_bin_uiui2 (mpz_ptr, unsigned long int, unsigned long int );
void mpz_bin_uiui3 (mpz_ptr, unsigned long int *, unsigned long int );
void mpz_bin_uiui4 (unsigned long int *,unsigned long int *,unsigned
long int & ,
unsigned long int,unsigned long int, unsigned long int);
void mpz_bin_uiui5 (unsigned long int *, unsigned long int, unsigned
long int &);
void mpz_bin_uiui6 (unsigned long int *, unsigned long int *,unsigned long int,
unsigned long int, unsigned long int, unsigned long int &, unsigned long int);

int cputime ()
{
struct rusage rus;

getrusage (0, &rus);
return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}

void mpz_bin_uiui2 (mpz_ptr result, unsigned long int n, unsigned long int k)

{
unsigned long int size,size1,num_primes=0,kk=k;
unsigned long int i,sp=0,*a,*q,u=( int ) sqrt(n)+1;

if(kk>n)
{
mpz_set_ui(result,0);
return;
}

if(kk>=n-kk) kk=n-kk;

if(kk==0)
{
mpz_set_ui(result,1);
return;
}

if(kk==1)
{
mpz_set_ui(result,n);
return;
}

if((int) kk*log(n)>n)
{
if(n<75) size=25;
else size=( int ) (( double ) n/(log(n)-1.5));
if(n<75) size1=25;
else size1=(int) (( double ) u/(log(u)-1.5));
q = (unsigned long int*) malloc ((size1+1) * sizeof(unsigned long int));
a = (unsigned long int*) malloc ((size+1) * sizeof(unsigned long int));
mpz_bin_uiui5(q, u, num_primes);
mpz_bin_uiui6(a, q, num_primes, n, kk, sp, size1);
mpz_bin_uiui3 (result, a, sp);
}

else {
size1=kk;
size=kk+1;
q = (unsigned long int*) malloc ((size1+1) * sizeof(unsigned long int));
a = (unsigned long int*) malloc ((size+1) * sizeof(unsigned long int));
mpz_bin_uiui5 (q, kk, num_primes);
mpz_bin_uiui4 (a, q, sp, num_primes, n, kk);
mpz_bin_uiui3 (result, a, sp);
}

free(a);
free(q);
}

void  mpz_bin_uiui3 (mpz_ptr result, unsigned long int *a, unsigned long int n)
{
unsigned long int i, n0, n1;
mpz_t tmp;

if (n == 0)
{
mpz_set_ui (result, 1);
return;
}

if (n <= 3)
{
mpz_set_ui (result, a[0]);
for (i = 1; i < n; i++)
mpz_mul_ui (result, result, a[i]);
return;
}

n0 = n / 2;
n1 = n - n0;
mpz_bin_uiui3 (result, a, n1);
mpz_init (tmp);
mpz_bin_uiui3 (tmp, a + n1, n0);
mpz_mul (result, result, tmp);
mpz_clear (tmp);
}

void mpz_bin_uiui4 (unsigned long int *a,unsigned long int *q,unsigned
long int & sp,
unsigned long int num_primes, unsigned long int n, unsigned long int k)

{
unsigned long int *w,i,j,start,p,m,h,z;

w = (unsigned long int*) malloc ((k+1) * sizeof(unsigned long int));

for(i=0;i<=k;i++) w[i]=1;

for(j=0;j<=num_primes;j++)
{
p=q[j],h=p;
while(h<=k)
{
start=(int) (n-k)/h*h-(n-k+1);
for(m=h;m<=k;m+=h)
w[start+m]*=p;
h*=p;
}
}

z=n-k+1;

for(i=0;i<k;i++)
if(w[i]<z+i) a[sp]=(int) (z+i)/w[i],sp++;

free(w);
}

void mpz_bin_uiui5 (unsigned long int *q, unsigned long int f,
unsigned long int & num_primes)

{
unsigned long int *w,i,j,p;

w = (unsigned long int*) malloc (((f-1)/2+1) * sizeof(unsigned long int));

num_primes=1,q[0]=2;

for(i=0;i<=(f-1)/2;i++) w[i]=1;

for(i=1;2*i<=(int) sqrt(f);i++)
{
if(w[i])
{
p=2*i+1;
for(j=(p*p-1)/2;j<=(f-1)/2;j+=p) w[j]=0;
}
}

for(i=1;i<=(f-1)/2;i++)
if(w[i]) q[num_primes++]=2*i+1;

num_primes--;
free(w);
}

void mpz_bin_uiui6 (unsigned long int *a,unsigned long int *q,unsigned
long int  num_primes,
unsigned long int n,unsigned long int k,unsigned long int &
sp,unsigned long int size1)

{
unsigned long int *c,*r,i,j,s,t,h,h1,h2,h3,u,w,pow,step,rem[32];

u = ( int ) sqrt(n)+1;
c = (unsigned long int*) malloc ((u+2) * sizeof(unsigned long int));
r = (unsigned long int*) malloc ((size1+1) * sizeof(unsigned long int));

for(i=0;i<=num_primes;i++)
{
s=q[i],pow=0,h1=n/s,h2=k/s,h3=(n-k)/s,w=1;
while(h1>0) pow+=h1-h2-h3,h1/=s,h2/=s,h3/=s;
if(pow)
{
t=1,w=0;
while(pow) w++,rem[w]=pow&1,pow>>=1;
for(j=w;j>0;j--)
{
t*=t;
if(rem[j]) t*=s;
}
a[sp]=t,sp++;
}
}

for(i=1;i<=num_primes;i++)  r[i]=(q[i]-1)>>1;

for(i=0;i<u;i++)  c[i]=1;

for(h=0;h<=(n-1)/2;h+=u)
{
for(i=1;i<=num_primes;i++)
{
step=q[i];
for(j=r[i];j<u;j+=step)
c[j]=0;
r[i]=j-u;
}

if(h==0) c[0]=0;

for(i=0;i<u;i++)
{
if (c[i])
{
s=2*(h+i)+1;
if(n/s-k/s-(n-k)/s) a[sp]=s,sp++;
}
c[i]=1;
}
}

free(c);
free(r);
}

main (int argc, char *argv[])

{
unsigned long int n = atoi (argv[1]);
unsigned long int k = atoi (argv[2]);
int st;

mpz_t b, b2;
mpz_init (b);
mpz_init (b2);

st = cputime ();
mpz_bin_uiui (b, n, k);
printf ("mpz_bin_uiui took %dms\n", cputime () - st);

st = cputime ();

mpz_set_ui(b2,1);
mpz_bin_uiui2 (b2, n, k);

printf ("mpz_bin_uiui2 took %dms\n", cputime () - st);

if (mpz_cmp(b,b2)!=0)
{
fprintf (stderr, "b and b2 differ\n");
exit (1);
}

mpz_clear(b);
mpz_clear(b2);

return 0;

}
```