[Gmp-commit] /home/hgfiles/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun May 16 09:14:12 CEST 2010


details:   /home/hgfiles/gmp/rev/ebdf8aa713d5
changeset: 13637:ebdf8aa713d5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun May 16 08:45:51 2010 +0200
description:
Completely finish MOD_1_N tuning before tuning MOD_1U_TO_MOD_1_1_THRESHOLD.

details:   /home/hgfiles/gmp/rev/569a84988892
changeset: 13638:569a84988892
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun May 16 09:06:56 2010 +0200
description:
Do sizeof tests for 32-bit and 64-bit ABI.

details:   /home/hgfiles/gmp/rev/f06e8533c7b3
changeset: 13639:f06e8533c7b3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun May 16 09:10:18 2010 +0200
description:
Minor cleanup.

details:   /home/hgfiles/gmp/rev/8211734e5ef1
changeset: 13640:8211734e5ef1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun May 16 09:14:05 2010 +0200
description:
Major overhaul of sparc64 mod_1.

diffstat:

 ChangeLog              |   18 +++
 configure.in           |    2 +
 mpn/generic/divrem_1.c |   33 +++---
 mpn/generic/mod_1.c    |   13 +-
 mpn/generic/mod_1_1.c  |   12 +-
 mpn/generic/mod_1_2.c  |   16 +--
 mpn/generic/mod_1_3.c  |   14 +--
 mpn/generic/mod_1_4.c  |   16 +--
 mpn/sparc64/mod_1.c    |   59 ++++++++++++-
 mpn/sparc64/mod_1_4.c  |  221 +++++++++++++++++++++++++++++++++++++++++++++++++
 mpn/sparc64/sparc64.h  |   18 +++
 tune/tuneup.c          |   12 +-
 12 files changed, 359 insertions(+), 75 deletions(-)

diffs (truncated from 669 to 300 lines):

diff -r beaa098c547e -r 8211734e5ef1 ChangeLog
--- a/ChangeLog	Fri May 14 11:40:58 2010 +0200
+++ b/ChangeLog	Sun May 16 09:14:05 2010 +0200
@@ -1,3 +1,21 @@
+2010-05-16  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/sparc64/mod_1.c: Rewrite.
+	* mpn/sparc64/sparc64.h (umul_ppmm_s): New macro.
+	* mpn/sparc64/mod_1_4.c: New file.
+
+	* mpn/generic/divrem_1.c: Minor cleanup.
+	* mpn/generic/mod_1.c: Likewise.
+	* mpn/generic/mod_1_1.c: Likewise.
+	* mpn/generic/mod_1_2.c: Likewise.
+	* mpn/generic/mod_1_3.c: Likewise.
+	* mpn/generic/mod_1_4.c: Likewise.
+
+	* configure.in (ia64-hpux): Do sizeof tests for 32-bit and 64-bit ABI.
+
+	* tune/tuneup.c (tune_mod_1): Completely finish MOD_1_N tuning before
+	tuning MOD_1U_TO_MOD_1_1_THRESHOLD.
+
 2010-05-14  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/redc_2.c: Use asm code just for GNU C.
diff -r beaa098c547e -r 8211734e5ef1 configure.in
--- a/configure.in	Fri May 14 11:40:58 2010 +0200
+++ b/configure.in	Sun May 16 09:14:05 2010 +0200
@@ -683,6 +683,7 @@
     abilist="64"
     GMP_INCLUDE_MPN(ia64/ia64-defs.m4)
     SPEED_CYCLECOUNTER_OBJ=ia64.lo
+    any_testlist="sizeof-long-4"
 
     case $host_cpu in
       itanium)   path="ia64/itanium  ia64" ;;
@@ -716,6 +717,7 @@
         # let us use whatever seems to work.
         #
         abilist="32 64"
+        any_64_testlist="sizeof-long-8"
 
         cclist_32="gcc cc"
         path_32="ia64"
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/divrem_1.c
--- a/mpn/generic/divrem_1.c	Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/divrem_1.c	Sun May 16 09:14:05 2010 +0200
@@ -156,7 +156,7 @@
   else
     {
       /* Most significant bit of divisor == 0.  */
-      int norm;
+      int cnt;
 
       /* Skip a division if high < divisor (high quotient 0).  Testing here
 	 before normalizing will still skip as often as possible.  */
@@ -178,28 +178,28 @@
 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 	goto plain;
 
-      count_leading_zeros (norm, d);
-      d <<= norm;
-      r <<= norm;
+      count_leading_zeros (cnt, d);
+      d <<= cnt;
+      r <<= cnt;
 
       if (UDIV_NEEDS_NORMALIZATION
 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 	{
+	  mp_limb_t nshift;
 	  if (un != 0)
 	    {
 	      n1 = up[un - 1] << GMP_NAIL_BITS;
-	      r |= (n1 >> (GMP_LIMB_BITS - norm));
+	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
 	      for (i = un - 2; i >= 0; i--)
 		{
 		  n0 = up[i] << GMP_NAIL_BITS;
-		  udiv_qrnnd (*qp, r, r,
-			      (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
-			      d);
+		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+		  udiv_qrnnd (*qp, r, r, nshift, d);
 		  r >>= GMP_NAIL_BITS;
 		  qp--;
 		  n1 = n0;
 		}
-	      udiv_qrnnd (*qp, r, r, n1 << norm, d);
+	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
@@ -209,27 +209,26 @@
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
-	  return r >> norm;
+	  return r >> cnt;
 	}
       else
 	{
-	  mp_limb_t  dinv;
+	  mp_limb_t  dinv, nshift;
 	  invert_limb (dinv, d);
 	  if (un != 0)
 	    {
 	      n1 = up[un - 1] << GMP_NAIL_BITS;
-	      r |= (n1 >> (GMP_LIMB_BITS - norm));
+	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
 	      for (i = un - 2; i >= 0; i--)
 		{
 		  n0 = up[i] << GMP_NAIL_BITS;
-		  udiv_qrnnd_preinv (*qp, r, r,
-				     ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
-				     d, dinv);
+		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
 		  r >>= GMP_NAIL_BITS;
 		  qp--;
 		  n1 = n0;
 		}
-	      udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
+	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
@@ -239,7 +238,7 @@
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
-	  return r >> norm;
+	  return r >> cnt;
 	}
     }
 }
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1.c
--- a/mpn/generic/mod_1.c	Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1.c	Sun May 16 09:14:05 2010 +0200
@@ -118,12 +118,12 @@
   if (UDIV_NEEDS_NORMALIZATION
       && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
     {
+      mp_limb_t nshift;
       for (i = un - 2; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  udiv_qrnnd (dummy, r, r,
-		      (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
-		      d);
+	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+	  udiv_qrnnd (dummy, r, r, nshift, d);
 	  r >>= GMP_NAIL_BITS;
 	  n1 = n0;
 	}
@@ -133,15 +133,14 @@
     }
   else
     {
-      mp_limb_t inv;
+      mp_limb_t inv, nshift;
       invert_limb (inv, d);
 
       for (i = un - 2; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  udiv_qrnnd_preinv (dummy, r, r,
-			     (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
-			     d, inv);
+	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+	  udiv_qrnnd_preinv (dummy, r, r, nshift, d, inv);
 	  r >>= GMP_NAIL_BITS;
 	  n1 = n0;
 	}
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c	Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_1.c	Sun May 16 09:14:05 2010 +0200
@@ -67,15 +67,9 @@
   B1modb = bmodb[2];
   B2modb = bmodb[3];
 
-#if 1
-  umul_ppmm (ph, pl, ap[n - 1], B1modb);
+  rl = ap[n - 1];
+  umul_ppmm (ph, pl, rl, B1modb);
   add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
-#else
-  /* FIXME: We could avoid the above multiply when n > 2, i.e., we're about to
-     enter the loop.  But the post loop code assumes rh is reduced.  */
-  rh = ap[n - 1];
-  rl = ap[n - 2];
-#endif
 
   for (i = n - 3; i >= 0; i -= 1)
     {
@@ -90,8 +84,8 @@
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
+  cnt = bmodb[1];
   bi = bmodb[0];
-  cnt = bmodb[1];
 
   if (LIKELY (cnt != 0))
     rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_2.c
--- a/mpn/generic/mod_1_2.c	Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_2.c	Sun May 16 09:14:05 2010 +0200
@@ -9,7 +9,7 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -123,19 +123,13 @@
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  bi = cps[0];
-  cnt = cps[1];
-
-#if 1
   umul_ppmm (rh, cl, rh, B1modb);
   add_ssaaaa (rh, rl, rh, rl, 0, cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
-  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
-		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
-  ASSERT (q <= 2);	/* optimize for small quotient? */
-#endif
-
   udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_3.c
--- a/mpn/generic/mod_1_3.c	Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_3.c	Sun May 16 09:14:05 2010 +0200
@@ -130,19 +130,13 @@
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  bi = cps[0];
-  cnt = cps[1];
-
-#if 1
   umul_ppmm (rh, cl, rh, B1modb);
   add_ssaaaa (rh, rl, rh, rl, 0, cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
-  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
-		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
-  ASSERT (q <= 3);	/* optimize for small quotient? */
-#endif
-
   udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_4.c
--- a/mpn/generic/mod_1_4.c	Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_4.c	Sun May 16 09:14:05 2010 +0200
@@ -1,4 +1,4 @@
-/* mpn_mod_1s_3p (ap, n, b, cps)
+/* mpn_mod_1s_4p (ap, n, b, cps)
    Divide (ap,,n) by b.  Return the single-limb remainder.
    Requires that d < B / 4.
 
@@ -143,19 +143,13 @@
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  bi = cps[0];
-  cnt = cps[1];
-
-#if 1
   umul_ppmm (rh, cl, rh, B1modb);
   add_ssaaaa (rh, rl, rh, rl, 0, cl);
+
+  cnt = cps[1];
+  bi = cps[0];
+
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
-  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
-		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
-  ASSERT (q <= 4);	/* optimize for small quotient? */
-#endif
-
   udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 


More information about the gmp-commit mailing list