[Gmp-commit] /var/hg/gmp-5.0: toom3_itch: support any recursion depth.

Thu Feb 9 20:55:23 CET 2012

details:   /var/hg/gmp-5.0/rev/cd740e23c8a3
changeset: 13566:cd740e23c8a3
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Feb 09 20:39:58 2012 +0100
description:
toom3_itch: support any recursion depth.

diffstat:

 ChangeLog      |   5 +++++
 gmp-impl.h     |  13 +++++++++----
 tests/refmpn.c |   6 +++---
 3 files changed, 17 insertions(+), 7 deletions(-)

diffs (65 lines):

diff -r 4849e62500a9 -r cd740e23c8a3 ChangeLog

--- a/ChangeLog	Thu Feb 09 18:22:15 2012 +0100
+++ b/ChangeLog	Thu Feb 09 20:39:58 2012 +0100
@@ -1,3 +1,8 @@
+2012-02-09 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* gmp-impl.h (mpn_toom3*_itch): Support any recursion depth.
+	* tests/refmpn.c (refmpn_mul): Restore tight allocations.
+
 2012-02-09  Marc Glisse  <marc.glisse at inria.fr>
 
 	* gmp-impl.h (ABS_CAST): New macro.
diff -r 4849e62500a9 -r cd740e23c8a3 gmp-impl.h
--- a/gmp-impl.h	Thu Feb 09 18:22:15 2012 +0100
+++ b/gmp-impl.h	Thu Feb 09 20:39:58 2012 +0100
@@ -4521,12 +4521,17 @@
 #define mpn_toom2_sqr_itch(an) \
   (2 * ((an) + GMP_NUMB_BITS))
 
-/* Can probably be trimmed to 2 an + O(log an). */
+/* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
+   We use 3an + C, so that we can use a smaller constant.
+ */
 #define mpn_toom33_mul_itch(an, bn) \
-  ((5 * (an) >> 1) + GMP_NUMB_BITS)
+  (3 * (an) + GMP_NUMB_BITS)
 #define mpn_toom3_sqr_itch(an) \
-  ((5 * (an) >> 1) + GMP_NUMB_BITS)
-
+  (3 * (an) + GMP_NUMB_BITS)
+
+/* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
+   We use 3an + C, so that we can use a smaller constant.
+ */
 #define mpn_toom44_mul_itch(an, bn) \
   (3 * (an) + GMP_NUMB_BITS)
 #define mpn_toom4_sqr_itch(an) \
diff -r 4849e62500a9 -r cd740e23c8a3 tests/refmpn.c
--- a/tests/refmpn.c	Thu Feb 09 18:22:15 2012 +0100
+++ b/tests/refmpn.c	Thu Feb 09 20:39:58 2012 +0100
@@ -1488,21 +1488,21 @@
   if (vn < TOOM4_THRESHOLD)
     {
       /* In the mpn_toom33_mul range, use mpn_toom22_mul.  */
-      tn = 3 * vn + mpn_toom22_mul_itch (vn, vn);
+      tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
       tp = refmpn_malloc_limbs (tn);
       mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
     }
   else if (vn < FFT_THRESHOLD)
     {
       /* In the mpn_toom44_mul range, use mpn_toom33_mul.  */
-      tn = 3 * vn + mpn_toom33_mul_itch (vn, vn);
+      tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
       tp = refmpn_malloc_limbs (tn);
       mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
     }
   else
     {
       /* Finally, for the largest operands, use mpn_toom44_mul.  */
-      tn = 3 * vn + mpn_toom44_mul_itch (vn, vn);
+      tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
       tp = refmpn_malloc_limbs (tn);
       mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
     }