[Gmp-commit] /home/hgfiles/gmp: 3 new changesets

Tue Dec 22 12:14:32 CET 2009

details:   /home/hgfiles/gmp/rev/135c13a8b7c8
changeset: 13180:135c13a8b7c8
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Dec 22 12:12:40 2009 +0100
description:
MIN_AN): Set to MUL_TOOM6H_THRESHOLD.

details:   /home/hgfiles/gmp/rev/e45db982f8ba
changeset: 13181:e45db982f8ba
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Dec 22 12:13:39 2009 +0100
description:
Trivial merge.

details:   /home/hgfiles/gmp/rev/86a38a5e0a39
changeset: 13182:86a38a5e0a39
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Dec 22 12:14:29 2009 +0100
description:
Retune.

diffstat:

 ChangeLog                     |  17 +++++++++
 gmp-impl.h                    |  36 ++++++++++--------
 mpn/generic/sbpi1_div_q.c     |  80 ++----------------------------------------
 mpn/generic/sbpi1_divappr_q.c |   7 +++-
 mpn/x86_64/gmp-mparam.h       |   2 +-
 tests/mpn/t-toom33.c          |   2 +-
 tests/mpn/t-toom44.c          |   2 +
 tests/mpn/t-toom6h.c          |   4 +-
 8 files changed, 55 insertions(+), 95 deletions(-)

diffs (265 lines):

diff -r af80158a951d -r 86a38a5e0a39 ChangeLog

--- a/ChangeLog	Tue Dec 22 04:40:56 2009 +0100
+++ b/ChangeLog	Tue Dec 22 12:14:29 2009 +0100
@@ -1,5 +1,22 @@
+2009-12-22  Niels Möller  <<nisse at lysator.liu.se>>
+
+	* mpn/generic/sbpi1_div_q.c (mpn_sbpi1_div_q): Use udiv_qr_3by2.
+	Intended to change nothing after preprocessing.
+
+	* mpn/generic/sbpi1_divappr_q.c (mpn_sbpi1_divappr_q): For the
+	last call to udiv_qr_3by2, avoid using memory locations as output
+	parameters, and revert to explicitly copying n1 and n0 to memory.
+
+	* gmp-impl.h (udiv_qr_3by2): Tweaked to expand to precisely the
+	same code as was used before the introduction of this mamcro.
+	Eliminated some local variables, instead do multiple updates to
+	the output paramaters.
+
 2009-12-22  Torbjorn Granlund  <tege at gmplib.org>
 
+	* tests/mpn/t-toom6h.c (MIN_AN): Set to MUL_TOOM6H_THRESHOLD to avoid
+	invalid recursive sizes.
+
 	* tests/mpn/t-bdiv.c: Get itch function calls right.
 
 	* mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Rewrite.
diff -r af80158a951d -r 86a38a5e0a39 gmp-impl.h
--- a/gmp-impl.h	Tue Dec 22 04:40:56 2009 +0100
+++ b/gmp-impl.h	Tue Dec 22 12:14:29 2009 +0100
@@ -2726,33 +2726,37 @@
    >= B^2 / 2 and n < d B. di is the inverse
 
      floor ((B^3 - 1) / (d0 + d1 B)) - B.
+
+
+   NOTE: Output variables are updated multiple times. Only some inputs
+   and outputs may overlap.                                              
 */
 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
   do {									\
-    mp_limb_t _q1, _q0, _r1, _r0, _t1, _t0, _mask;			\
-    umul_ppmm (_q1, _q0, (n2), (dinv));					\
-    add_ssaaaa (_q1, _q0, _q1, _q0, (n2), (n1));			\
+    mp_limb_t _q0, _t1, _t0, _mask;					\
+    umul_ppmm ((q), _q0, (n2), (dinv));					\
+    add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));			\
 									\
     /* Compute the two most significant limbs of n - q'd */		\
-    _r1 = (n1) - _q1 * (d1);						\
-    sub_ddmmss (_r1, _r0, _r1, (n0), (d1), (d0));			\
-    umul_ppmm (_t1, _t0, _q1, (d0));					\
-    sub_ddmmss (_r1, _r0, _r1, _r0, _t1, _t0);				\
-    _q1++;								\
+    (r1) = (n1) - (d1) * (q);						\
+    (r0) = (n0);							\
+    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));			\
+    umul_ppmm (_t1, _t0, (d0), (q));					\
+    sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);			\
+    (q)++;								\
 									\
     /* Conditionally adjust q and the remainders */			\
-    _mask = - (mp_limb_t) (_r1 >= _q0);					\
-    _q1 += _mask;							\
-    add_ssaaaa (_r1, _r0, _r1, _r0, _mask & (d1), _mask & (d0));	\
-    if (UNLIKELY (_r1 >= (d1)))						\
+    _mask = - (mp_limb_t) ((r1) >= _q0);				\
+    (q) += _mask;							\
+    add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0));	\
+    if (UNLIKELY ((r1) >= (d1)))					\
       {									\
-	if (_r1 > (d1) || _r0 >= (d0))					\
+	if ((r1) > (d1) || (r0) >= (d0))				\
 	  {								\
-	    _q1++;							\
-	    sub_ddmmss (_r1, _r0, _r1, _r0, (d1), (d0));		\
+	    (q)++;							\
+	    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));		\
 	  }								\
       }									\
-    (q) = _q1; (r1) = _r1; (r0) = _r0;					\
   } while (0)
 
 #ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
diff -r af80158a951d -r 86a38a5e0a39 mpn/generic/sbpi1_div_q.c
--- a/mpn/generic/sbpi1_div_q.c	Tue Dec 22 04:40:56 2009 +0100
+++ b/mpn/generic/sbpi1_div_q.c	Tue Dec 22 12:14:29 2009 +0100
@@ -40,9 +40,8 @@
   mp_limb_t n1, n0;
   mp_limb_t d1, d0;
   mp_limb_t cy, cy1;
-  mp_limb_t q, q0;
-  mp_limb_t t1, t0;
-  mp_limb_t mask, flag;
+  mp_limb_t q;
+  mp_limb_t flag;
 
   mp_size_t dn_orig = dn;
   mp_srcptr dp_orig = dp;
@@ -87,30 +86,7 @@
 	}
       else
 	{
-	  umul_ppmm (q, q0, n1, dinv);
-	  add_ssaaaa (q, q0, q, q0, n1, np[1]);
-
-	  /* Compute the two most significant limbs of n - q'd */
-	  n1 = np[1] - d1 * q;
-	  n0 = np[0];
-	  sub_ddmmss (n1, n0, n1, n0, d1, d0);
-	  umul_ppmm (t1, t0, d0, q);
-	  sub_ddmmss (n1, n0, n1, n0, t1, t0);
-	  q++;
-
-	  /* Conditionally adjust q and the remainders */
-	  mask = - (mp_limb_t) (n1 >= q0);
-	  q += mask;
-	  add_ssaaaa (n1, n0, n1, n0, mask & d1, mask & d0);
-
-	  if (UNLIKELY (n1 >= d1))
-	    {
-	      if (n1 > d1 || n0 >= d0)
-		{
-		  q++;
-		  sub_ddmmss (n1, n0, n1, n0, d1, d0);
-		}
-	    }
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
 
 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
 
@@ -156,30 +132,7 @@
 	    }
 	  else
 	    {
-	      umul_ppmm (q, q0, n1, dinv);
-	      add_ssaaaa (q, q0, q, q0, n1, np[1]);
-
-	      /* Compute the two most significant limbs of n - q'd */
-	      n1 = np[1] - d1 * q;
-	      n0 = np[0];
-	      sub_ddmmss (n1, n0, n1, n0, d1, d0);
-	      umul_ppmm (t1, t0, d0, q);
-	      sub_ddmmss (n1, n0, n1, n0, t1, t0);
-	      q++;
-
-	      /* Conditionally adjust q and the remainders */
-	      mask = - (mp_limb_t) (n1 >= q0);
-	      q += mask;
-	      add_ssaaaa (n1, n0, n1, n0, mask & d1, mask & d0);
-
-	      if (UNLIKELY (n1 >= d1))
-		{
-		  if (n1 > d1 || n0 >= d0)
-		    {
-		      q++;
-		      sub_ddmmss (n1, n0, n1, n0, d1, d0);
-		    }
-		}
+	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
 
 	      cy = mpn_submul_1 (np - dn, dp, dn, q);
 
@@ -223,30 +176,7 @@
 	}
       else
 	{
-	  umul_ppmm (q, q0, n1, dinv);
-	  add_ssaaaa (q, q0, q, q0, n1, np[1]);
-
-	  /* Compute the two most significant limbs of n - q'd */
-	  n1 = np[1] - d1 * q;
-	  n0 = np[0];
-	  sub_ddmmss (n1, n0, n1, n0, d1, d0);
-	  umul_ppmm (t1, t0, d0, q);
-	  sub_ddmmss (n1, n0, n1, n0, t1, t0);
-	  q++;
-
-	  /* Conditionally adjust q and the remainders */
-	  mask = - (mp_limb_t) (n1 >= q0);
-	  q += mask;
-	  add_ssaaaa (n1, n0, n1, n0, mask & d1, mask & d0);
-
-	  if (UNLIKELY (n1 >= d1))
-	    {
-	      if (n1 > d1 || n0 >= d0)
-		{
-		  q++;
-		  sub_ddmmss (n1, n0, n1, n0, d1, d0);
-		}
-	    }
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
 
 	  np[0] = n0;
 	  np[1] = n1;
diff -r af80158a951d -r 86a38a5e0a39 mpn/generic/sbpi1_divappr_q.c
--- a/mpn/generic/sbpi1_divappr_q.c	Tue Dec 22 04:40:56 2009 +0100
+++ b/mpn/generic/sbpi1_divappr_q.c	Tue Dec 22 12:14:29 2009 +0100
@@ -174,11 +174,16 @@
 	}
       else
 	{
-	  udiv_qr_3by2 (q, np[1], np[0], n1, np[1], np[0], d1, d0, dinv);
+	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+	  np[1] = n1;
+	  np[0] = n0;
 	}
 
       *--qp = q;
     }
 
+  ASSERT_ALWAYS (np[1] == n1);
+
   return qh;
 }
diff -r af80158a951d -r 86a38a5e0a39 mpn/x86_64/gmp-mparam.h
--- a/mpn/x86_64/gmp-mparam.h	Tue Dec 22 04:40:56 2009 +0100
+++ b/mpn/x86_64/gmp-mparam.h	Tue Dec 22 12:14:29 2009 +0100
@@ -59,7 +59,7 @@
 #define REDC_1_TO_REDC_2_THRESHOLD       34
 #define REDC_2_TO_REDC_N_THRESHOLD       99
 
-#define MATRIX22_STRASSEN_THRESHOLD      33
+#define MATRIX22_STRASSEN_THRESHOLD      20
 #define HGCD_THRESHOLD                  144
 #define GCD_DC_THRESHOLD                501
 #define GCDEXT_DC_THRESHOLD             521
diff -r af80158a951d -r 86a38a5e0a39 tests/mpn/t-toom33.c
--- a/tests/mpn/t-toom33.c	Tue Dec 22 04:40:56 2009 +0100
+++ b/tests/mpn/t-toom33.c	Tue Dec 22 12:14:29 2009 +0100
@@ -2,7 +2,7 @@
 #define mpn_toomMN_mul_itch mpn_toom33_mul_itch
 
 /* Smaller sizes not supported; may lead to recursive calls to
- * mpn_toom22_mul with invalid input size. */
+   toom22_mul with invalid input size. */
 #define MIN_AN MUL_TOOM33_THRESHOLD
 #define MIN_BN(an) (1 + 2*(((an)+2)/(size_t) 3))
 
diff -r af80158a951d -r 86a38a5e0a39 tests/mpn/t-toom44.c
--- a/tests/mpn/t-toom44.c	Tue Dec 22 04:40:56 2009 +0100
+++ b/tests/mpn/t-toom44.c	Tue Dec 22 12:14:29 2009 +0100
@@ -1,6 +1,8 @@
 #define mpn_toomMN_mul mpn_toom44_mul
 #define mpn_toomMN_mul_itch mpn_toom44_mul_itch
 
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul or toom33_mul with invalid input size. */
 #define MIN_AN MUL_TOOM44_THRESHOLD
 #define MIN_BN(an) (1 + 3*(((an)+3)>>2))
 
diff -r af80158a951d -r 86a38a5e0a39 tests/mpn/t-toom6h.c
--- a/tests/mpn/t-toom6h.c	Tue Dec 22 04:40:56 2009 +0100
+++ b/tests/mpn/t-toom6h.c	Tue Dec 22 12:14:29 2009 +0100
@@ -1,7 +1,9 @@
 #define mpn_toomMN_mul mpn_toom6h_mul
 #define mpn_toomMN_mul_itch mpn_toom6h_mul_itch
 
-#define MIN_AN 46
+/* Smaller sizes not supported; may lead to recursive calls to
+   toom22_mul, toom33_mul, or toom44_mul with invalid input size. */
+#define MIN_AN MUL_TOOM6H_THRESHOLD
 #define MIN_BN(an) (MAX ((an*3)>>3, 42) )
 
 #include "toom-shared.h"