Fwd: Factorial improvements
Paul Zimmermann
Paul.Zimmermann at loria.fr
Thu Sep 15 21:50:43 CEST 2005
From: Marsac Laurent <laurent.marsac at esil.univ-mrs.fr>
Date: Sep 14, 2005 10:19 PM
Subject: Factorial improvements
Hello,
I'm currently trying to improve n! calculation (mpz/fac_ui.c) and i just
want to know if anyone else is already working on it, or who is
responsible of that file ?
As described in the TODO list, i'm doing the prime factorization of n!,
and shiftings for factors of 2. The first results seems to show my
algorithm improves performances and i hope i could send more details
here ASAP.
--
Laurent Marsac
the best known algorithm is the one from Scho"nhage, Grotefeld and Vetter.
See the implementation below. Timings on a 1.7Ghz Athlon with gmp-4.1.4:
% ./fac_ui 100000
mpz_fac_ui took 439ms
mpz_fac_ui2 took 250ms
% ./fac_ui 1000000
mpz_fac_ui took 10641ms
mpz_fac_ui2 took 5717ms
% ./fac_ui 2000000
mpz_fac_ui took 27371ms
mpz_fac_ui2 took 13764ms
% ./fac_ui 5000000
mpz_fac_ui took 99720ms
mpz_fac_ui2 took 42751ms
Paul Zimmermann
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/resource.h>
#include "gmp.h"
/* implements algorithm page 226 of the book
"Fast Algorithms, A Multitape Turing Machine Implementation",
by A. Scho"nhage, A. F. W. Grotefeld and E. Vetter,
BI-Wissenschaftsverlag, 1994.
*/
int cputime (void);
void mpz_fac_ui4 (mpz_ptr, unsigned long int *, unsigned long int);
void mpz_fac_ui3 (mpz_ptr, unsigned long int *, unsigned long int *,
unsigned long int *, unsigned long int);
void mpz_fac_ui2 (mpz_ptr, unsigned long int);
int
cputime ()
{
struct rusage rus;
getrusage (0, &rus);
return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}
/* result <- q[0]*q[1]*...*q[n-1] by binary splitting */
void
mpz_fac_ui4 (mpz_ptr result, unsigned long int *q, unsigned long int n)
{
unsigned long int i, n0, n1;
mpz_t tmp;
if (n == 0)
{
mpz_set_ui (result, 1);
return;
}
if (n <= 3)
{
mpz_set_ui (result, q[0]);
for (i = 1; i < n; i++)
mpz_mul_ui (result, result, q[i]);
return;
}
n0 = n / 2;
n1 = n - n0;
mpz_fac_ui4 (result, q, n1);
mpz_init (tmp);
mpz_fac_ui4 (tmp, q + n1, n0);
mpz_mul (result, result, tmp);
mpz_clear (tmp);
}
/* put p[0]^e[0]*...*p[n-1]^e[n-1] in result,
assuming e[0] >= e[1] >= ... >= e[n-1] > 0.
Uses auxiliary table q[0...n-1].
*/
void
mpz_fac_ui3 (mpz_ptr result, unsigned long int *p, unsigned long int *e,
unsigned long int *q, unsigned long int n)
{
unsigned long int i, j, mask;
mpz_t tmp;
/* find largest power of two <= the exponent of two */
for (mask = 1; e[0] >= mask << 1; mask <<= 1);
mpz_init (tmp);
mpz_set_ui (result, 1);
while (mask)
{
/* put part corresponding to bit mask set in q[] */
for (i = 0, j = 0; i < n; i++)
if (e[i] & mask)
q[j++] = p[i];
mpz_fac_ui4 (tmp, q, j);
mpz_mul (result, result, result);
mpz_mul (result, result, tmp);
mask >>= 1;
}
mpz_clear (tmp);
}
void
mpz_fac_ui2 (mpz_ptr result, unsigned long int n)
{
unsigned long int *p;
unsigned long int *e;
unsigned long int *q;
unsigned long int i, j, k;
if (n <= 1)
{
mpz_set_ui (result, 1);
return;
}
p = (unsigned long int*) malloc ((n + 1) * sizeof(unsigned long int));
e = (unsigned long int*) malloc ((n + 1) * sizeof(unsigned long int));
for (i = 2; i <= n; i++)
{
p[i] = i;
e[i] = 1;
}
for (i = 2; i <= n / 2; i++)
{
if (p[i] != 1) /* i is prime */
{
/* remove factor i in 2i, 3i, ... */
for (j = 2 * i; j <= n; j += i)
{
p[j] /= i;
e[i] ++;
}
/* remove factor i in i^2, 2i^2, ..., i^3, 2i^3, ... */
for (k = i; k <= n;)
{
/* check that k * i does not overflow */
if (k > (~(0UL)) / i)
break;
k *= i;
for (j = k; j <= n; j += k)
{
p[j] /= i;
e[i] ++;
}
}
}
}
/* shrink table */
for (i = 2, j = 0; i <= n; i++)
{
if (p[i] != 1)
{
p[j] = p[i];
e[j] = e[i];
j++;
}
}
q = (unsigned long int*) malloc (j * sizeof(unsigned long int));
mpz_fac_ui3 (result, p + 1, e + 1, q, j - 1);
/* treat apart exponent of two */
mpz_mul_2exp (result, result, e[0]);
free (q);
free (p);
free (e);
}
int
main (int argc, char *argv[])
{
int n = atoi (argv[1]);
mpz_t f, f2;
int st;
mpz_init (f);
mpz_init (f2);
st = cputime ();
mpz_fac_ui (f, n);
printf ("mpz_fac_ui took %dms\n", cputime () - st);
st = cputime ();
mpz_fac_ui2 (f2, n);
printf ("mpz_fac_ui2 took %dms\n", cputime () - st);
if (mpz_cmp (f, f2))
{
fprintf (stderr, "f and f2 differ\n");
exit (1);
}
mpz_clear (f);
mpz_clear (f2);
return 0;
}
More information about the gmp-discuss
mailing list