Allocation for toom33
Niels Möller
nisse at lysator.liu.se
Sun Oct 25 23:33:17 CET 2009
Torbjorn Granlund <tg at gmplib.org> writes:
> nisse at lysator.liu.se (Niels Möller) writes:
>
> Ok, it should be easy to increase the size claimed by
> mpn_toom33_mul_itch by n+1 (and then hope that it still works with
> current callers that bypass the itch function).
Below is a patch to do that. But due to the recursion, the increase is
actually 1.5 n (+ logterm which is supposedly still small).
> Those naughty callers deserve to have their wrists slapped!
I'll leave that to you... I seemed to remember that one offending
function was mpn_mul, but it looks like it does the right thing.
Regards,
/Niels
diff -r bd0e76eee038 gmp-impl.h
--- a/gmp-impl.h Sat Oct 24 23:03:26 2009 +0200
+++ b/gmp-impl.h Sun Oct 25 22:48:37 2009 +0100
@@ -1021,35 +1021,7 @@
For now, assume n is arbitrarily large. */
#define MPN_KARA_MUL_N_TSIZE(n) (2*(n) + 2*GMP_LIMB_BITS)
#define MPN_KARA_SQR_N_TSIZE(n) (2*(n) + 2*GMP_LIMB_BITS)
-
-/* toom3 uses 2n + 2n/3 + o(n) limbs of temporary space if mpn_sublsh1_n is
- unavailable, but just 2n + o(n) if mpn_sublsh1_n is available. It is hard
- to pin down the value of o(n), since it is a complex function of
- MUL_TOOM3_THRESHOLD and n. Normally toom3 is used between kara and fft; in
- that case o(n) will be really limited. If toom3 is used for arbitrarily
- large operands, o(n) will be larger. These definitions handle operands of
- up to 8956264246117233 limbs. A single multiplication using toom3 on the
- fastest hardware currently (2008) would need 10 million years, which
- suggests that these limits are acceptable. */
-#if WANT_FFT
-#if HAVE_NATIVE_mpn_sublsh1_n
-#define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 63)
-#define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 63)
-#else
-#define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 2*(n/3) + 63)
-#define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 2*(n/3) + 63)
-#endif
-#else /* WANT_FFT */
-#if HAVE_NATIVE_mpn_sublsh1_n
-#define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 255)
-#define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 255)
-#else
-#define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 2*(n/3) + 255)
-#define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 2*(n/3) + 255)
-#endif
-#define MPN_TOOM44_MAX_N 285405
-#endif /* WANT_FFT */
-
+
/* need 2 so that n2>=1 */
#define MPN_KARA_MUL_N_MINSIZE 2
#define MPN_KARA_SQR_N_MINSIZE 2
@@ -4300,10 +4272,9 @@
static inline mp_size_t
mpn_toom33_mul_itch (mp_size_t an, mp_size_t bn)
{
- /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
- mpn_toom_interpolate_5pts only needs scratch otherwise. */
+ /* Can probably be trimmed to 2 an + O(log an). */
mp_size_t n = (an + 2) / (size_t) 3;
- return 6 * n + GMP_NUMB_BITS;
+ return 15 * n / 2 + GMP_NUMB_BITS;
}
static inline mp_size_t
@@ -4372,10 +4343,10 @@
static inline mp_size_t
mpn_toom3_sqr_itch (mp_size_t an)
{
- /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
- mpn_toom_interpolate_5pts only needs scratch otherwise. */
+ /* Same as mpn_toom33_mul_itch. Can probably be trimmed to 2 an +
+ O(log an). */
mp_size_t n = (an + 2) / (size_t) 3;
- return 6 * n + GMP_NUMB_BITS;
+ return 15 * n / 2 + GMP_NUMB_BITS;
}
static inline mp_size_t
diff -r bd0e76eee038 mpn/generic/mul_n.c
--- a/mpn/generic/mul_n.c Sat Oct 24 23:03:26 2009 +0200
+++ b/mpn/generic/mul_n.c Sun Oct 25 22:48:37 2009 +0100
@@ -713,7 +713,7 @@
mp_ptr ws;
TMP_SDECL;
TMP_SMARK;
- ws = TMP_SALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (n));
+ ws = TMP_SALLOC_LIMBS (mpn_toom33_mul_itch (n, n));
mpn_toom33_mul (p, a, n, b, n, ws);
TMP_SFREE;
}
@@ -782,7 +782,7 @@
mp_ptr ws;
TMP_SDECL;
TMP_SMARK;
- ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));
+ ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
mpn_toom3_sqr_n (p, a, n, ws);
TMP_SFREE;
}
diff -r bd0e76eee038 mpn/generic/toom33_mul.c
--- a/mpn/generic/toom33_mul.c Sat Oct 24 23:03:26 2009 +0200
+++ b/mpn/generic/toom33_mul.c Sun Oct 25 22:48:37 2009 +0100
@@ -1,4 +1,4 @@
-/* mpn_toom33_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in
+/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
size. Or more accurately, bn <= an < (3/2)bn.
Contributed to the GNU project by Torbjorn Granlund.
@@ -78,7 +78,6 @@
mp_ptr gp;
mp_ptr as1, asm1, as2;
mp_ptr bs1, bsm1, bs2;
- TMP_DECL;
#define a0 ap
#define a1 (ap + n)
@@ -97,9 +96,7 @@
ASSERT (0 < s && s <= n);
ASSERT (0 < t && t <= n);
- TMP_MARK;
-
- as1 = TMP_SALLOC_LIMBS (n + 1); /* Should be (pp + 4 * n + 4), but we need 5n+5<=4n+s+t i.e. s+t>n+4*/
+ as1 = scratch + 4*n + 4;
asm1 = scratch + 2 * n + 2;
as2 = pp + n + 1;
@@ -217,7 +214,7 @@
#define vinf (pp + 4 * n) /* s+t */
#define vm1 scratch /* 2n+1 */
#define v2 (scratch + 2 * n + 1) /* 2n+2 */
-#define scratch_out (scratch + 4 * n + 4)
+#define scratch_out (scratch + 5 * n + 5)
/* vm1, 2n+1 limbs */
#ifdef SMALLER_RECURSION
@@ -279,6 +276,4 @@
TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, 1^vm1_neg, vinf0);
-
- TMP_FREE;
}
diff -r bd0e76eee038 mpn/generic/toom3_sqr.c
--- a/mpn/generic/toom3_sqr.c Sat Oct 24 23:03:26 2009 +0200
+++ b/mpn/generic/toom3_sqr.c Sun Oct 25 22:48:37 2009 +0100
@@ -72,7 +72,6 @@
mp_limb_t cy, vinf0;
mp_ptr gp;
mp_ptr as1, asm1, as2;
- TMP_DECL;
#define a0 ap
#define a1 (ap + n)
@@ -84,9 +83,7 @@
ASSERT (0 < s && s <= n);
- TMP_MARK;
-
- as1 = TMP_SALLOC_LIMBS (n + 1); /* Should be (pp + 4 * n + 4), but we need 5n+5<=4n+s+t i.e. s+t>n+4*/
+ as1 = scratch + 4 * n + 4;
asm1 = scratch + 2 * n + 2;
as2 = pp + n + 1;
@@ -145,7 +142,7 @@
#define vinf (pp + 4 * n) /* s+s */
#define vm1 scratch /* 2n+1 */
#define v2 (scratch + 2 * n + 1) /* 2n+2 */
-#define scratch_out (scratch + 4 * n + 4)
+#define scratch_out (scratch + 5 * n + 5)
/* vm1, 2n+1 limbs */
#ifdef SMALLER_RECURSION
@@ -205,6 +202,4 @@
TOOM3_SQR_N_REC (v0, ap, n, scratch_out); /* v0, 2n limbs */
mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 1, vinf0);
-
- TMP_FREE;
}
More information about the gmp-devel
mailing list