mpz_powm_ui(...) is MUCH slower than mpz_powm(...)
Will Galway
galway at math.uiuc.edu
Tue Oct 2 04:02:34 CEST 2012
Of course what I mean is that on the architecture I'm running on, for my
particular application, using mpz_powm(...) is roughly twice as fast as
using the equivalent mpz_powm_ui(...). I'll leave the decision as to
whether or not this is a "bug" to you.
I'm using a Core i7-870 processor and 10 GB of memory, running Ubuntu
Linux. On this machine, with the gcc compiler, an unsigned long integer
is 64-bits. ALL of my variables are 64-bit integers--I'm using GMP to
get around the problem of working with 128-bit intermediate values when
computing modular powers.
Further details about the system I'm running on are given following my
signature. I've also attached two files: "benchmark-powm.c" should
illustrate the behavior on systems similar to mine. The comments at the
start of the code give instructions on how to compile and run the
program. The attached file "LOG-benchmark-powm-1e8.txt" gives the
output of running the program, running 10^8 iterations.
Please let me know if you'd like me to provide any further information.
-- Regards, Will Galway
I'm using a "standard" installation of gmp-5.0.5, viz. one installed
from the gmp distribution root directory with the commands:
> ./configure
> make
> make check
> sudo make install
(And, of course, all tests were passed after running "make check".)
Here are more details about the environment I was running under.
> gcc -v
Using built-in specs.
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro
4.4.4-14ubuntu5.1'
--with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs
--enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr
--program-suffix=-4.4 --enable-shared --enable-multiarch
--enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib
--without-included-gettext --enable-threads=posix
--with-gxx-include-dir=/usr/include/c++/4.4 --libdir=/usr/lib
--enable-nls --with-sysroot=/ --enable-clocale=gnu
--enable-libstdcxx-debug --enable-objc-gc --disable-werror
--with-arch-32=i686 --with-tune=generic --enable-checking=release
--build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5.1)
> uname -a
Linux MyMachine 2.6.35-32-generic #67-Ubuntu SMP Mon Mar 5 19:39:49
UTC 2012 x86_64 GNU/Linux
> ./config.guess
coreinhm-unknown-linux-gnu
> ./configfsf.guess
x86_64-unknown-linux-gnu
-------------- next part --------------
/*
* Time-stamp: <benchmark-powm.c: 01-Oct-2012 21:36:20 EDT galway at math.uiuc.edu>
*
* Compare the speed of mpz_powm_ui(...) against that of mpz_powm(...).
*
* To compile:
* gcc benchmark-powm.c -O3 -o benchmark-powm -lgmp -lrt
*
* To run 10^8 iterations of the timing loops, use:
* benchmark-powm 100000000
*
*/
#define _POSIX_C_SOURCE 199309L /* See man entry for clock_gettime(2) */
#include <stdio.h>
#include <stdlib.h>
#include <gmp.h>
#include <time.h>
/*
* typedefs for 64-bit and 32-bit unsigned integers on the architecture tested.
*/
typedef unsigned long int Uns64; /* 64-bit integer */
typedef unsigned int Uns32; /* 32-bit integer */
/* Global "working variables" for the code below. */
static mpz_t N_mpz, A_mpz, TWO_mpz, TMP_mpz;
/* ********************************************************************** */
/*
* Return the "current" CPU time in seconds as a floating-point (double) number.
* This value starts at zero at an arbitrary time in the past. See the man
* entry for clock_getres(...) for further information.
*/
double Current_CPU_Time()
{
int errflag;
struct timespec cpu_time;
double result;
/* Ignore errflag for now. */
errflag = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &cpu_time);
result = cpu_time.tv_sec + cpu_time.tv_nsec/1.0e9;
return result;
}
/*
* Return 2^a modulo n. This "slow" version uses mpz_powm_ui(...).
*/
Uns64
pow2mod_ui(Uns64 a, Uns64 n)
{
mpz_set_ui(N_mpz, n);
/* TMP_mpz = 2^a modulo n */
mpz_powm_ui(TMP_mpz, TWO_mpz, a, N_mpz);
return mpz_get_ui(TMP_mpz);
}
/*
* Return 2^a modulo n. This "fast" version uses mpz_powm(...) instead of
* mpz_powm_ui(...).
*/
Uns64
pow2mod_mpz(Uns64 a, Uns64 n)
{
mpz_set_ui(N_mpz, n);
mpz_set_ui(A_mpz, a);
/* TMP_mpz = 2^a modulo n */
mpz_powm(TMP_mpz, TWO_mpz, A_mpz, N_mpz);
return mpz_get_ui(TMP_mpz);
}
int
main(int argc, char *argv[])
{
Uns64 i;
Uns64 test_count;
Uns64 rslt;
double time_0, time_1;
if ((argc != 2) || (sscanf(argv[1], "%lu", &test_count) != 1)) {
printf("Usage is: \"benchmark-powm testcount\".\n");
return EXIT_FAILURE;
}
/* Initialize global mpz_t variables. */
mpz_init_set_ui(TWO_mpz,2);
mpz_init2(N_mpz, 64);
mpz_init2(A_mpz, 64);
mpz_init2(TMP_mpz, 64);
/* Loop using mpz_powm_ui(...) to find pow2mod. */
time_0 = Current_CPU_Time();
printf("pow2mod_ui ...\n");
for (i = 0; i < test_count; i++) {
rslt = pow2mod_ui(30010, 30011 + i*30010);
if (rslt == 1) {
printf("%lu\n", 30011 + i*30010);
}
}
time_1 = Current_CPU_Time();
printf("Time for pow2mod_ui: %4.2f sec., %lu (%0.3g) tests, %0.3g tests/sec.\n\n",
time_1 - time_0,
test_count, (double)test_count,
test_count/(time_1 - time_0)
);
/* Loop using mpz_powm(...) to find pow2mod. */
time_0 = Current_CPU_Time();
printf("pow2mod_mpz ...\n");
for (i = 0; i < test_count; i++) {
rslt = pow2mod_mpz(30010, 30011 + i*30010);
if (rslt == 1) {
printf("%lu\n", 30011 + i*30010);
}
}
time_1 = Current_CPU_Time();
printf("Time for pow2mod_mpz: %4.2f sec., %lu (%0.3g) tests, %0.3g tests/sec.\n\n",
time_1 - time_0,
test_count, (double)test_count,
test_count/(time_1 - time_0)
);
mpz_clear(N_mpz);
mpz_clear(TWO_mpz);
mpz_clear(A_mpz);
mpz_clear(TMP_mpz);
return EXIT_SUCCESS;
}
-------------- next part --------------
pow2mod_ui ...
30011
46935641
1408585522051
Time for pow2mod_ui: 148.15 sec., 100000000 (1e+08) tests, 6.75e+05 tests/sec.
pow2mod_mpz ...
30011
46935641
1408585522051
Time for pow2mod_mpz: 61.22 sec., 100000000 (1e+08) tests, 1.63e+06 tests/sec.
More information about the gmp-bugs
mailing list