Small toom43 cleanup

Niels Möller nisse at lysator.liu.se
Tue Oct 20 16:41:35 CEST 2009


I've adapted toom32_mul to use a proper itch/scratch interface. I also
simplified the allocation by requiring that s + t >= 5, so that 5
temporary values of n+1 limbs each fit in the product area. That
should be true except for very small inputs (I think an >= 25 or bn >=
19 is sufficient).

Marco, Torbjörn, does this look ok?

Regards,
/Niels

diff -r e61371bc8820 gmp-impl.h
--- a/gmp-impl.h	Tue Oct 20 16:18:13 2009 +0200
+++ b/gmp-impl.h	Tue Oct 20 16:34:15 2009 +0200
@@ -4303,6 +4303,14 @@
 }
 
 static inline mp_size_t
+mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (unsigned long) 3);
+
+  return 6*n + 3 + 1;
+}
+
+static inline mp_size_t
 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
 {
   mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
diff -r e61371bc8820 mpn/generic/toom43_mul.c
--- a/mpn/generic/toom43_mul.c	Tue Oct 20 16:18:13 2009 +0200
+++ b/mpn/generic/toom43_mul.c	Tue Oct 20 16:34:15 2009 +0200
@@ -55,10 +55,6 @@
   mp_size_t n, s, t;
   enum toom6_flags flags = toom6_all_pos;
   mp_limb_t cy;
-  mp_ptr as1, as2;
-/*   mp_ptr as1, asm1, as2, asm2; */
-/*   mp_ptr bs1, bsm1, bs2, bsm2; */
-  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -76,7 +72,10 @@
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
 
-  TMP_MARK;
+  /* This is true whenever an >= 25 or bn >= 19, I think. It
+     guarantees that we can fit 5 values of size n+1 in the product
+     area. */
+  ASSERT (s+t >= 5);
 
 #define v0    pp				/* 2n */
 #define vm1   (scratch)				/* 2n+1 */
@@ -90,35 +89,11 @@
 #define asm2  (scratch + 4 * n + 4)		/* n+1 */
 #define bsm2  (pp + n + 1)			/* n+1 */
 #define bs2   (pp + 2 * n + 2)			/* n+1 */
+#define as2   (pp + 3 * n + 3)			/* n+1 */
+#define as1   (pp + 4 * n + 4)			/* n+1 */
 
- /* Alloc one more byte, because products will overwrite 2n+2
-    limbs. If n=1 asm2 reaches scratch+5n+5=scratch+10 and the last
-    byte is used anyway. */
-  scratch = TMP_SALLOC_LIMBS (6 * n + 3 + 1);
-
-  /* #define as2   (pp + 3 * n + 3)			/\* n+1 *\/ */
-  /* n+(n-2)<=n+(s+t), so the next test is always false if n>2.
-     Consider replacing it with the above define if threshold is big
-     enough.
-  */
-  if (n+s+t < 4)
-    as2 = TMP_SALLOC_LIMBS (n + 1);
-  else
-    as2 = (pp + 3 * n + 3);
-
-  /* #define as1   (pp + 4 * n + 4)			/\* n+1 *\/ */
-  /* (n-2)<=(s+t), so the next test is always false if n>6.
-     Consider replacing it with the above define if threshold is big
-     enough.
-  */
-  if (s+t < 5)
-    as1 = TMP_SALLOC_LIMBS (n + 1);
-  else
-    as1 = (pp + 4 * n + 4);
-
-/*   bsm1 = TMP_SALLOC_LIMBS (n + 1); */
-/*   bs2 = TMP_SALLOC_LIMBS (n + 1); */
-/*   bsm2 = TMP_SALLOC_LIMBS (n + 1); */
+  /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
+     limb, because products will overwrite 2n+2 limbs. */
 
 #define a0a2  scratch
 #define b0b2  scratch
@@ -296,6 +271,4 @@
 #undef b0
 #undef b1
 #undef b2
-
-  TMP_FREE;
 }


More information about the gmp-devel mailing list