mpn_mul_fft type overflow issue

Wed Sep 18 11:37:52 CEST 2013

Hello GMP developers. I wasn't quite sure if this message should
be sent to gmp-bugs or to gmp-devel.

Since this isn't the first issue due to types of internal variables
(e.g. http://gmplib.org/list-archives/gmp-bugs/2009-July/001538.html)
I thought it might deserve some discussion. Apologies if that's not
the case.

I have isolated a problem with internal types in mpn_mul for huge
operands on 64 bit machines. This is present in GMP 5.1.2 and also
seems to be present in the gmp development version. You will need
a machine with several hundred GB of RAM to reproduce this.

The essential issue is in mpn_mul_fft_decompose. Variables K and l
are passed in is as "int" type. A local variable is then computed as:

   mp_size_t Kl = K * l;

This causes internal allocation to overflow here:

       tmp = TMP_ALLOC_LIMBS(Kl + 1);

leading to an unrecoverable error:

GNU MP: Cannot allocate memory (size=18446744056529682440)

I have attached a gdb session that illustrates the problem as the
file gdb-example. I was a bit conservative in the build of GMP
completely disabling assembly code to allow debugging - you
might want to tweak that to make it run faster.

The test program I used in the debug session is also attached as
mpn_mul_test.c.

I have also prepared a patch against GMP 5.1.2 that is attached as
patch-mul_fft. This attempts to remove uses of int, unsigned int etc
replacing with scalable types.

I hope you find this useful.

Best wishes, Mark

Mark Sofroniou
Wolfram Research

-------------- next part --------------
# configured GMP 5.1.2 as:

./configure --disable-shared --enable-assert \
--enable-alloca=debug --disable-assembly CFLAGS=-g

# compile and link mpn_mul_test.c

# this passes

a.out 4294967296

# this fails

a.out 4831838208

# here is a gdb session illustrating the problem in the failed case

Starting program: /Developer/marks/GMP/mul/a.out 
GNU MP: Cannot allocate memory (size=18446744056529682440)

Program received signal SIGABRT, Aborted.
0x0000003609e328a5 in raise (sig=6)
    at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
64	  return INLINE_SYSCALL (tgkill, 3, pid, selftid, sig);
(gdb) up
#1  0x0000003609e34085 in abort () at abort.c:92
92	      raise (SIGABRT);
(gdb) 
#2  0x00000000004254e4 in __gmp_default_allocate (size=18446744056529682440)
    at memory.c:48
48	      abort ();
(gdb) 
#3  0x000000000041ff67 in __gmp_tmp_debug_alloc (file=0x429e0f "mul_fft.c", 
    line=642, dummy=1, markp=0x7fffffffce48, 
    decl_name=0x42a0af "__tmp_marker", size=18446744056529682440)
    at tal-debug.c:100
100	  p->block = (*__gmp_allocate_func) (size);
(gdb) 
#4  0x0000000000424037 in mpn_mul_fft_decompose (A=0x7fdff3fda010, 
    Ap=0x630370, K=2048, nprime=2099200, n=0x7ffbf7feb010, nl=2147483648, 
    l=1048576, Mp=65600, T=0x7fe7f5fdf010) at mul_fft.c:642
642	      tmp = TMP_ALLOC_LIMBS(Kl + 1);
(gdb) p Kl
$1 = -2147483648
(gdb) p K
$2 = 2048
(gdb) p l
$3 = 1048576
(gdb) p K*l
$4 = -2147483648
(gdb) p (size_t) (Kl+1)
$6 = 18446744071562067969
(gdb) p K*((mp_size_t) l)
$5 = 2147483648

<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /Developer/marks/GMP/mul/a.out...done.
(gdb) r
Starting program: /Developer/marks/GMP/mul/a.out 
mul_fft.c:745: GNU MP assertion failed: (pla) >= 0

Program received signal SIGABRT, Aborted.
0x0000003609e328a5 in raise (sig=6)



r 8589934592 /* failed */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mpn_mul_test.c
Type: text/x-csrc
Size: 3789 bytes
Desc: not available
URL: <http://gmplib.org/list-archives/gmp-devel/attachments/20130918/ee05f04a/attachment.bin>
-------------- next part --------------
diff -r 266c28da4680 mpn/generic/mul_fft.c

--- a/mpn/generic/mul_fft.c	Wed Jul 17 17:42:56 2013 +0200
+++ b/mpn/generic/mul_fft.c	Wed Sep 18 03:55:03 2013 -0500
@@ -67,8 +67,8 @@
 static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
 				       mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
 				       mp_size_t, mp_size_t, int **, mp_ptr, int);
-static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, int, mp_srcptr,
-				   mp_size_t, int, int, mp_ptr);
+static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, mp_size_t, mp_srcptr,
+				   mp_size_t, mp_size_t, mp_size_t, mp_ptr);
 
 
 /* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
@@ -192,9 +192,9 @@
    r and a must have n+1 limbs, and not overlap.
 */
 static void
-mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n)
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_size_t d, mp_size_t n)
 {
-  int sh;
+  mp_size_t sh;
   mp_limb_t cc, rd;
 
   sh = d % GMP_NUMB_BITS;
@@ -283,7 +283,7 @@
    Assumes a and b are semi-normalized.
 */
 static inline void
-mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
 {
   mp_limb_t c, x;
 
@@ -314,7 +314,7 @@
    Assumes a and b are semi-normalized.
 */
 static inline void
-mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
 {
   mp_limb_t c, x;
 
@@ -428,7 +428,8 @@
 
   if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
     {
-      int k, K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+      int k, K2, maxLK;
+      mp_size_t N, Nprime2, nprime2, M2, Mp2, l;
       int **fft_l;
       mp_ptr *Ap, *Bp, A, B, T;
 
@@ -445,10 +446,10 @@
       /* we should ensure that nprime2 is a multiple of the next K */
       if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
 	{
-	  unsigned long K3;
+	  mp_limb_t K3;
 	  for (;;)
 	    {
-	      K3 = 1L << mpn_fft_best_k (nprime2, sqr);
+	      K3 = (mp_limb_t) 1 << mpn_fft_best_k (nprime2, sqr);
 	      if ((nprime2 & (K3 - 1)) == 0)
 		break;
 	      nprime2 = (nprime2 + K3 - 1) & -K3;
@@ -492,7 +493,7 @@
     {
       mp_ptr a, b, tp, tpn;
       mp_limb_t cc;
-      int n2 = 2 * n;
+      mp_size_t n2 = 2 * n;
       tp = TMP_ALLOC_LIMBS (n2);
       tpn = tp + n;
       TRACE (printf ("  mpn_mul_n %d of %ld limbs\n", K, n));
@@ -570,7 +571,7 @@
 static void
 mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, int k, mp_size_t n)
 {
-  int i;
+  mp_size_t i;
 
   ASSERT (r != a);
   i = 2 * n * GMP_NUMB_BITS - k;
@@ -585,13 +586,13 @@
    Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
    then {rp,n}=0.
 */
-static int
+static mp_size_t
 mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
 {
   mp_size_t l;
-  long int m;
+  mp_size_t m;
   mp_limb_t cc;
-  int rpn;
+  mp_size_t rpn;
 
   ASSERT ((n <= an) && (an <= 3 * n));
   m = an - 2 * n;
@@ -625,8 +626,8 @@
    We must have nl <= 2*K*l.
 */
 static void
-mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, int nprime, mp_srcptr n,
-		       mp_size_t nl, int l, int Mp, mp_ptr T)
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, mp_size_t nprime, mp_srcptr n,
+		       mp_size_t nl, mp_size_t l, mp_size_t Mp, mp_ptr T)
 {
   int i, j;
   mp_ptr tmp;
@@ -712,7 +713,8 @@
 		      mp_size_t nprime, mp_size_t l, mp_size_t Mp,
 		      int **fft_l, mp_ptr T, int sqr)
 {
-  int K, i, pla, lo, sh, j;
+  int K, i, j;
+  mp_size_t lo, pla, sh;
   mp_ptr p;
   mp_limb_t cc;
 
@@ -792,10 +794,10 @@
 }
 
 /* return the lcm of a and 2^k */
-static unsigned long int
-mpn_mul_fft_lcm (unsigned long int a, unsigned int k)
+static mp_limb_t
+mpn_mul_fft_lcm (mp_limb_t a, mp_limb_t k)
 {
-  unsigned long int l = k;
+  mp_limb_t l = k;
 
   while (a % 2 == 0 && k > 0)
     {
@@ -832,7 +834,7 @@
   K = 1 << k;
   M = N >> k;	/* N = 2^k M */
   l = 1 + (M - 1) / GMP_NUMB_BITS;
-  maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+  maxLK = mpn_mul_fft_lcm ((mp_limb_t) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
 
   Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
   /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
@@ -842,10 +844,10 @@
   /* we should ensure that recursively, nprime is a multiple of the next K */
   if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
     {
-      unsigned long K2;
+      mp_limb_t K2;
       for (;;)
 	{
-	  K2 = 1L << mpn_fft_best_k (nprime, sqr);
+	  K2 = (mp_limb_t) 1 << mpn_fft_best_k (nprime, sqr);
 	  if ((nprime & (K2 - 1)) == 0)
 	    break;
 	  nprime = (nprime + K2 - 1) & -K2;