improved mpz_fac_ui code

Paul Zimmermann Paul.Zimmermann at loria.fr
Tue Apr 20 17:47:51 CEST 2004


Here is some improved mpz_fac_ui code, using the algorithm described 
in Chapter 6 of the following book:

@book{Schonhage94,
author= {A. Sch\"onhage and A. F. W. Grotefeld and E. Vetter},
title="Fast Algorithms, A Multitape {T}uring Machine Implementation",
publisher="BI-Wissenschaftsverlag",
year=1994
}

Some timings on my 3Ghz P4:

mermoz% ./a.out 100000
mpz_fac_ui took 500ms
mpz_fac_ui2 took 300ms

Note that the tail function (mpz_fac_ui4) can still be improved using
binary splitting, and powers of two should be treated separately.

Paul

#include <stdio.h>
#include <stdlib.h>
#include "gmp.h"
#include "cputime.h"

/* result <- q[0]*q[1]*...*q[n-1] */
void
mpz_fac_ui4 (mpz_ptr result, unsigned long int *q, unsigned long int n)
{
  mpz_set_ui (result, q[0]);
  if (n > 1)
    {
      unsigned long int i;
      for (i = 1; i < n; i++)
        mpz_mul_ui (result, result, q[i]);
    }
}

/* put p[0]^e[0]*...*p[n-1]^e[n-1] in result,
   assuming e[0] >= e[1] >= ... >= e[n-1] > 0.

   Uses auxiliary table q[0...n-1].
*/
void
mpz_fac_ui3 (mpz_ptr result, unsigned long int *p, unsigned long int *e,
             unsigned long int *q, unsigned long int n)
{
  unsigned long int i, j, l, k;
  mpz_t *tmp;

  for (l = 1, i = e[0]; i > 1; l++, i/=2);
  tmp = (mpz_t*) malloc (l * sizeof (mpz_t));
  for (i = 0; i < l; i++)
    mpz_init (tmp[i]);
  k = 0;
  while (n)
    {
      /* put odd part in q[] */
      for (i = 0, j = 0; i < n; i++)
        if (e[i] % 2)
          q[j++] = p[i];
      mpz_fac_ui4 (tmp[k], q, j);
      for (i = 0; i < n && e[i] >= 2; i++)
        e[i] /= 2;
      n = i;
      k ++;
    }
  while (k > 1)
    {
      k--;
      mpz_mul (tmp[k], tmp[k], tmp[k]);
      mpz_mul (tmp[k-1], tmp[k-1], tmp[k]);
    }
  mpz_set (result, tmp[0]);
  for (i = 0; i < l; i++)
    mpz_clear (tmp[i]);
  free (tmp);
}

void
mpz_fac_ui2 (mpz_ptr result, unsigned long int n)
{
  unsigned long int *p;
  unsigned long int *e;
  unsigned long int *q;
  unsigned long int i, j, k;

  p = (unsigned long int*) malloc ((n + 1) * sizeof(unsigned long int));
  e = (unsigned long int*) malloc ((n + 1) * sizeof(unsigned long int));
  for (i = 2; i <= n; i++)
    {
      p[i] = i;
      e[i] = 1;
    }
  for (i = 2; i <= n / 2; i++)
    {
      if (p[i] != 1) /* i is prime */
        {
          /* remove factor i in 2i, 3i, ... */
          for (j = 2 * i; j <= n; j += i)
            {
              p[j] /= i;
              e[i] ++;
            }
          /* remove factor i in i^2, 2i^2, ..., i^3, 2i^3, ... */
          for (k = i * i; k <= n; k *= i)
            for (j = k; j <= n; j += k)
              {
                p[j] /= i;
                e[i] ++;
              }
        }
    }
  /* shrink table */
  for (i = 2, j = 0; i <= n; i++)
    {
      if (p[i] != 1)
        {
          p[j] = p[i];
          e[j] = e[i];
          j++;
        }
    }
  q = (unsigned long int*) malloc (j * sizeof(unsigned long int));
  mpz_fac_ui3 (result, p, e, q, j);
  free (q);
  free (p);
  free (e);
}

int
main (int argc, char *argv[])
{
  int n = atoi (argv[1]);
  mpz_t f, f2;
  int st;

  mpz_init (f);
  mpz_init (f2);

  st = cputime ();
  mpz_fac_ui (f, n);
  printf ("mpz_fac_ui took %dms\n", cputime () - st);

  st = cputime ();
  mpz_fac_ui2 (f2, n);
  printf ("mpz_fac_ui2 took %dms\n", cputime () - st);

  if (mpz_cmp (f, f2))
    {
      fprintf (stderr, "f and f2 differ\n");
      printf ("f="); mpz_out_str (stdout, 10, f); printf ("\n");
      printf ("f2="); mpz_out_str (stdout, 10, f2); printf ("\n");
      exit (1);
    }

  mpz_clear (f);
  mpz_clear (f2);
}


More information about the gmp-devel mailing list