mpz_powm_ui(...) is MUCH slower than mpz_powm(...)

Will Galway galway at math.uiuc.edu
Tue Oct 2 04:02:34 CEST 2012


Of course what I mean is that on the architecture I'm running on, for my 
particular application, using mpz_powm(...) is roughly twice as fast as 
using the equivalent mpz_powm_ui(...).  I'll leave the decision as to 
whether or not this is a "bug" to you.

I'm using a Core i7-870 processor and 10 GB of memory, running Ubuntu 
Linux.  On this machine, with the gcc compiler, an unsigned long integer 
is 64-bits.  ALL of my variables are 64-bit integers--I'm using GMP to 
get around the problem of working with 128-bit intermediate values when 
computing modular powers.

Further details about the system I'm running on are given following my 
signature.  I've also attached two files: "benchmark-powm.c" should 
illustrate the behavior on systems similar to mine.  The comments at the 
start of the code give instructions on how to compile and run the 
program.  The attached file "LOG-benchmark-powm-1e8.txt" gives the 
output of running the program, running 10^8 iterations.

Please let me know if you'd like me to provide any further information.

-- Regards, Will Galway

I'm using a "standard" installation of gmp-5.0.5, viz. one installed 
from the gmp distribution root directory with the commands:
  > ./configure
  > make
  > make check
  > sudo make install

(And, of course, all tests were passed after running "make check".)

Here are more details about the environment I was running under.

  > gcc -v
  Using built-in specs.
  Target: x86_64-linux-gnu
  Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro 
4.4.4-14ubuntu5.1' 
--with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs 
--enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr 
--program-suffix=-4.4 --enable-shared --enable-multiarch 
--enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib 
--without-included-gettext --enable-threads=posix 
--with-gxx-include-dir=/usr/include/c++/4.4 --libdir=/usr/lib 
--enable-nls --with-sysroot=/ --enable-clocale=gnu 
--enable-libstdcxx-debug --enable-objc-gc --disable-werror 
--with-arch-32=i686 --with-tune=generic --enable-checking=release 
--build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
  Thread model: posix
  gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5.1)

  > uname -a
  Linux MyMachine 2.6.35-32-generic #67-Ubuntu SMP Mon Mar 5 19:39:49 
UTC 2012 x86_64 GNU/Linux

  >  ./config.guess
  coreinhm-unknown-linux-gnu

  > ./configfsf.guess
  x86_64-unknown-linux-gnu


-------------- next part --------------
/*
 * Time-stamp: <benchmark-powm.c:  01-Oct-2012 21:36:20 EDT galway at math.uiuc.edu>
 *
 * Compare the speed of mpz_powm_ui(...) against that of mpz_powm(...).
 *
 * To compile:
 *   gcc benchmark-powm.c -O3 -o benchmark-powm -lgmp -lrt
 *
 * To run 10^8 iterations of the timing loops, use:
 *   benchmark-powm 100000000
 *
 */

#define _POSIX_C_SOURCE 199309L /* See man entry for clock_gettime(2) */

#include <stdio.h>
#include <stdlib.h>
#include <gmp.h>
#include <time.h>

/*
 * typedefs for 64-bit and 32-bit unsigned integers on the architecture tested.
 */
typedef unsigned long int  Uns64; /* 64-bit integer */
typedef unsigned int       Uns32; /* 32-bit integer */

/* Global "working variables" for the code below.  */
static mpz_t N_mpz, A_mpz, TWO_mpz, TMP_mpz;

/* ********************************************************************** */

/* 
 * Return the "current" CPU time in seconds as a floating-point (double) number.
 * This value starts at zero at an arbitrary time in the past.  See the man
 * entry for clock_getres(...) for further information.
 */
double Current_CPU_Time()
{
  int errflag;
  struct timespec cpu_time;
  double result;

  /* Ignore errflag for now.  */
  errflag = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &cpu_time);
  result = cpu_time.tv_sec + cpu_time.tv_nsec/1.0e9;

  return result;
}

/*
 * Return 2^a modulo n.  This "slow" version uses mpz_powm_ui(...).
 */
Uns64
pow2mod_ui(Uns64 a, Uns64 n)
{
  mpz_set_ui(N_mpz, n);
  /* TMP_mpz = 2^a modulo n */
  mpz_powm_ui(TMP_mpz, TWO_mpz, a, N_mpz);

  return mpz_get_ui(TMP_mpz);
}

/*
 * Return 2^a modulo n.  This "fast" version uses mpz_powm(...) instead of
 * mpz_powm_ui(...).
 */
Uns64
pow2mod_mpz(Uns64 a, Uns64 n)
{
  mpz_set_ui(N_mpz, n);
  mpz_set_ui(A_mpz, a);
  /* TMP_mpz = 2^a modulo n */
  mpz_powm(TMP_mpz, TWO_mpz, A_mpz, N_mpz);

  return mpz_get_ui(TMP_mpz);
}

int
main(int argc, char *argv[])
{
  Uns64 i;
  Uns64 test_count;
  Uns64 rslt;
  double time_0, time_1;

  if ((argc != 2) || (sscanf(argv[1], "%lu", &test_count) != 1)) {
    printf("Usage is:  \"benchmark-powm testcount\".\n");
    return EXIT_FAILURE;
  }

  /* Initialize global mpz_t variables.  */
  mpz_init_set_ui(TWO_mpz,2);
  mpz_init2(N_mpz, 64);
  mpz_init2(A_mpz, 64);
  mpz_init2(TMP_mpz, 64);

  /* Loop using  mpz_powm_ui(...) to find pow2mod. */
  time_0 = Current_CPU_Time();
  printf("pow2mod_ui ...\n");
  for (i = 0; i < test_count; i++) {
    rslt = pow2mod_ui(30010, 30011 + i*30010);
    if (rslt == 1) {
      printf("%lu\n",  30011 + i*30010);
    }
  }
  time_1 = Current_CPU_Time();
  printf("Time for pow2mod_ui: %4.2f sec., %lu (%0.3g) tests, %0.3g tests/sec.\n\n",
         time_1 - time_0,
         test_count, (double)test_count,
         test_count/(time_1 - time_0)
         );

  /* Loop using  mpz_powm(...) to find pow2mod. */
  time_0 = Current_CPU_Time();
  printf("pow2mod_mpz ...\n");
  for (i = 0; i < test_count; i++) {
    rslt = pow2mod_mpz(30010, 30011 + i*30010);
    if (rslt == 1) {
      printf("%lu\n", 30011 + i*30010);
    }
  }
  time_1 = Current_CPU_Time();
  printf("Time for pow2mod_mpz: %4.2f sec., %lu (%0.3g) tests, %0.3g tests/sec.\n\n",
         time_1 - time_0,
         test_count, (double)test_count,
         test_count/(time_1 - time_0)
         );

  mpz_clear(N_mpz);
  mpz_clear(TWO_mpz);
  mpz_clear(A_mpz);
  mpz_clear(TMP_mpz);

  return EXIT_SUCCESS;
}
-------------- next part --------------
pow2mod_ui ...
30011
46935641
1408585522051
Time for pow2mod_ui: 148.15 sec., 100000000 (1e+08) tests, 6.75e+05 tests/sec.

pow2mod_mpz ...
30011
46935641
1408585522051
Time for pow2mod_mpz: 61.22 sec., 100000000 (1e+08) tests, 1.63e+06 tests/sec.



More information about the gmp-bugs mailing list