[Gmp-commit] /home/hgfiles/gmp: Trim toom22,32,43 selection code.

mercurial at gmplib.org mercurial at gmplib.org
Thu Dec 17 17:04:33 CET 2009


details:   /home/hgfiles/gmp/rev/0c83a92cb85e
changeset: 13113:0c83a92cb85e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 17 17:04:30 2009 +0100
description:
Trim toom22,32,43 selection code.

diffstat:

 ChangeLog                |   6 ++++++
 mpn/generic/mul.c        |  47 ++++++++++++++++-------------------------------
 mpn/generic/toom22_mul.c |  10 +++++++++-
 3 files changed, 31 insertions(+), 32 deletions(-)

diffs (121 lines):

diff -r e1f87ead7745 -r 0c83a92cb85e ChangeLog
--- a/ChangeLog	Thu Dec 17 16:42:54 2009 +0100
+++ b/ChangeLog	Thu Dec 17 17:04:30 2009 +0100
@@ -1,5 +1,11 @@
 2009-12-17  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/generic/mul.c: Move allocation of ws to where it is used.
+	Identify toom22, 32, 42, in that order (in two places).  Use midline
+	between toom22, 32, 42.
+	* mpn/generic/toom22_mul.c (TOOM22_MUL_MN_REC): Call also
+	mpn_toom32_mul.
+
 	* doc/gmp.texi: Update References section.  Update Contributors
 	section.  Misc updates.
 
diff -r e1f87ead7745 -r 0c83a92cb85e mpn/generic/mul.c
--- a/mpn/generic/mul.c	Thu Dec 17 16:42:54 2009 +0100
+++ b/mpn/generic/mul.c	Thu Dec 17 17:04:30 2009 +0100
@@ -134,12 +134,9 @@
 	   (!TOOM33_OK (un, vn) && BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD * 3 / 2)))
     {
       /* Loop over toom42, then choose toom42, toom32, or toom22 */
-      mp_ptr ws;
       mp_ptr scratch;
       TMP_DECL; TMP_MARK;
 
-#define WSALL (4 * vn)
-      ws = TMP_SALLOC_LIMBS (WSALL + 1);
 
 #define ITCH ((un + vn) * 4 + 100)
       scratch = TMP_ALLOC_LIMBS (ITCH + 1);
@@ -147,6 +144,9 @@
       if (un >= 3 * vn)
 	{
 	  mp_limb_t cy;
+	  mp_ptr ws;
+#define WSALL (4 * vn)
+	  ws = TMP_SALLOC_LIMBS (WSALL + 1);
 
 	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
 	  un -= 2 * vn;
@@ -164,40 +164,25 @@
 	      prodp += 2 * vn;
 	    }
 
-	  /* FIXME: Test these in opposite order, following the philosophy of
-	     minimizing the relative overhead.  */
-	  if (5 * un > 9 * vn)
-	    {
-	      mpn_toom42_mul (ws, up, un, vp, vn, scratch);
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
-	  else if (9 * un > 10 * vn)
-	    {
-	      mpn_toom32_mul (ws, up, un, vp, vn, scratch);
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
+	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
 	  else
-	    {
-	      mpn_toom22_mul (ws, up, un, vp, vn, scratch);
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
+	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+
+	  cy = mpn_add_n (prodp, prodp, ws, vn);
+	  MPN_COPY (prodp + vn, ws + vn, un);
+	  mpn_incr_u (prodp + vn, cy);
 	}
       else
 	{
-	  /* FIXME: Test these in opposite order, following the philosophy of
-	     minimizing the relative overhead.  */
-	  if (5 * un > 9 * vn)
-	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-	  else if (9 * un > 10 * vn)
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
 	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
 	  else
-	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
 	}
       TMP_FREE;
     }
diff -r e1f87ead7745 -r 0c83a92cb85e mpn/generic/toom22_mul.c
--- a/mpn/generic/toom22_mul.c	Thu Dec 17 16:42:54 2009 +0100
+++ b/mpn/generic/toom22_mul.c	Thu Dec 17 17:04:30 2009 +0100
@@ -57,13 +57,21 @@
       mpn_toom22_mul (p, a, n, b, n, ws);				\
   } while (0)
 
+/* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
+   relative unbalance will become a larger and larger relative unbalance with
+   each recursion (the difference s-t will be invariant over recursive calls).
+   Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
+   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
 #define TOOM22_MUL_MN_REC(p, a, an, b, bn, ws)				\
   do {									\
     if (! MAYBE_mul_toom22						\
 	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
       mpn_mul_basecase (p, a, an, b, bn);				\
+    else if (4 * an < 5 * bn)						\
+      mpn_toom22_mul (p, a, an, b, bn, ws);				\
     else								\
-      mpn_toom22_mul (p, a, an, b, bn, ws);				\
+      mpn_toom32_mul (p, a, an, b, bn, ws);				\
   } while (0)
 
 void


More information about the gmp-commit mailing list