mpn_mul_fft type overflow issue
Mark Sofroniou
marks at wolfram.com
Mon Sep 23 12:24:18 CEST 2013
Attached is a revised patch for mul_fft.
I have tested it to confirm that both mpn_mul and mpn_tdiv_qr now
work with numbers of size 2^32 limbs.
Regards, Mark
-------------- next part --------------
diff -r 266c28da4680 mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c Wed Jul 17 17:42:56 2013 +0200
+++ b/mpn/generic/mul_fft.c Mon Sep 23 05:14:02 2013 -0500
@@ -67,8 +67,8 @@
static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
mp_size_t, mp_size_t, int **, mp_ptr, int);
-static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, int, mp_srcptr,
- mp_size_t, int, int, mp_ptr);
+static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, mp_size_t, mp_srcptr,
+ mp_size_t, mp_size_t, mp_size_t, mp_ptr);
/* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
@@ -192,9 +192,9 @@
r and a must have n+1 limbs, and not overlap.
*/
static void
-mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n)
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_size_t d, mp_size_t n)
{
- int sh;
+ mp_size_t sh;
mp_limb_t cc, rd;
sh = d % GMP_NUMB_BITS;
@@ -283,7 +283,7 @@
Assumes a and b are semi-normalized.
*/
static inline void
-mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
{
mp_limb_t c, x;
@@ -314,7 +314,7 @@
Assumes a and b are semi-normalized.
*/
static inline void
-mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
{
mp_limb_t c, x;
@@ -428,12 +428,14 @@
if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
{
- int k, K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+ mp_size_t K2, maxLK;
+ mp_size_t N, Nprime2, nprime2, M2, Mp2, l;
+ int k;
int **fft_l;
mp_ptr *Ap, *Bp, A, B, T;
k = mpn_fft_best_k (n, sqr);
- K2 = 1 << k;
+ K2 = (mp_size_t) 1 << k;
ASSERT_ALWAYS((n & (K2 - 1)) == 0);
maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS;
M2 = n * GMP_NUMB_BITS >> k;
@@ -445,10 +447,10 @@
/* we should ensure that nprime2 is a multiple of the next K */
if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
{
- unsigned long K3;
+ size_t K3;
for (;;)
{
- K3 = 1L << mpn_fft_best_k (nprime2, sqr);
+ K3 = (size_t) 1 << mpn_fft_best_k (nprime2, sqr);
if ((nprime2 & (K3 - 1)) == 0)
break;
nprime2 = (nprime2 + K3 - 1) & -K3;
@@ -470,7 +472,7 @@
fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
mpn_fft_initl (fft_l, k);
- TRACE (printf ("recurse: %ldx%ld limbs -> %d times %dx%d (%1.2f)\n", n,
+ TRACE (printf ("recurse: %ldx%ld limbs -> %ld times %ldx%ld (%1.2f)\n", n,
n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
for (i = 0; i < K; i++, ap++, bp++)
{
@@ -492,7 +494,7 @@
{
mp_ptr a, b, tp, tpn;
mp_limb_t cc;
- int n2 = 2 * n;
+ mp_size_t n2 = 2 * n;
tp = TMP_ALLOC_LIMBS (n2);
tpn = tp + n;
TRACE (printf (" mpn_mul_n %d of %ld limbs\n", K, n));
@@ -570,7 +572,7 @@
static void
mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, int k, mp_size_t n)
{
- int i;
+ mp_size_t i;
ASSERT (r != a);
i = 2 * n * GMP_NUMB_BITS - k;
@@ -585,13 +587,11 @@
Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
then {rp,n}=0.
*/
-static int
+static mp_size_t
mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
{
- mp_size_t l;
- long int m;
+ mp_size_t l, m, rpn;
mp_limb_t cc;
- int rpn;
ASSERT ((n <= an) && (an <= 3 * n));
m = an - 2 * n;
@@ -625,10 +625,11 @@
We must have nl <= 2*K*l.
*/
static void
-mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, int nprime, mp_srcptr n,
- mp_size_t nl, int l, int Mp, mp_ptr T)
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, mp_size_t nprime, mp_srcptr n,
+ mp_size_t nl, mp_size_t l, mp_size_t Mp, mp_ptr T)
{
- int i, j;
+ int i;
+ mp_size_t j;
mp_ptr tmp;
mp_size_t Kl = K * l;
TMP_DECL;
@@ -712,7 +713,8 @@
mp_size_t nprime, mp_size_t l, mp_size_t Mp,
int **fft_l, mp_ptr T, int sqr)
{
- int K, i, pla, lo, sh, j;
+ int K, i, j;
+ mp_size_t lo, pla, sh;
mp_ptr p;
mp_limb_t cc;
@@ -792,10 +794,10 @@
}
/* return the lcm of a and 2^k */
-static unsigned long int
-mpn_mul_fft_lcm (unsigned long int a, unsigned int k)
+static mp_size_t
+mpn_mul_fft_lcm (mp_size_t a, mp_size_t k)
{
- unsigned long int l = k;
+ mp_size_t l = k;
while (a % 2 == 0 && k > 0)
{
@@ -812,7 +814,8 @@
mp_srcptr m, mp_size_t ml,
int k)
{
- int K, maxLK, i;
+ int K, i;
+ mp_size_t maxLK;
mp_size_t N, Nprime, nprime, M, Mp, l;
mp_ptr *Ap, *Bp, A, T, B;
int **fft_l;
@@ -832,27 +835,27 @@
K = 1 << k;
M = N >> k; /* N = 2^k M */
l = 1 + (M - 1) / GMP_NUMB_BITS;
- maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+ maxLK = mpn_mul_fft_lcm ((mp_size_t) GMP_NUMB_BITS, (mp_size_t) k); /* lcm (GMP_NUMB_BITS, 2^k) */
Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
/* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
nprime = Nprime / GMP_NUMB_BITS;
- TRACE (printf ("N=%ld K=%d, M=%ld, l=%ld, maxLK=%d, Np=%ld, np=%ld\n",
+ TRACE (printf ("N=%ld K=%d, M=%ld, l=%ld, maxLK=%ld, Np=%ld, np=%ld\n",
N, K, M, l, maxLK, Nprime, nprime));
/* we should ensure that recursively, nprime is a multiple of the next K */
if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
{
- unsigned long K2;
+ size_t K2;
for (;;)
{
- K2 = 1L << mpn_fft_best_k (nprime, sqr);
+ K2 = (size_t) 1 << mpn_fft_best_k (nprime, sqr);
if ((nprime & (K2 - 1)) == 0)
break;
nprime = (nprime + K2 - 1) & -K2;
Nprime = nprime * GMP_LIMB_BITS;
/* warning: since nprime changed, K2 may change too! */
}
- TRACE (printf ("new maxLK=%d, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
+ TRACE (printf ("new maxLK=%ld, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
}
ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
More information about the gmp-devel
mailing list