[Gmp-commit] /var/hg/gmp: 2 new changesets

Tue Aug 6 15:16:17 UTC 2019

details:   /var/hg/gmp/rev/ce85410f1fa3
changeset: 17789:ce85410f1fa3
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Aug 06 16:54:40 2019 +0200
description:
New function mpn_gcd_11

details:   /var/hg/gmp/rev/cdf9e11a028b
changeset: 17790:cdf9e11a028b
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Aug 06 17:16:07 2019 +0200
description:
tune/speed: Support mpn_gcd_11.

diffstat:

 ChangeLog            |  14 +++++++++
 configure.ac         |   3 +-
 gmp-h.in             |   3 ++
 mpn/generic/gcd_1.c  |  45 ++----------------------------
 mpn/generic/gcd_11.c |  75 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tune/common.c        |   5 +++
 tune/speed.c         |   1 +
 tune/speed.h         |   5 +++
 8 files changed, 109 insertions(+), 42 deletions(-)

diffs (252 lines):

diff -r 4c6ae64fa612 -r cdf9e11a028b ChangeLog

--- a/ChangeLog	Mon Aug 05 11:07:24 2019 +0200
+++ b/ChangeLog	Tue Aug 06 17:16:07 2019 +0200
@@ -1,3 +1,17 @@
+2019-08-06  Niels MÃ¶ller  <nisse at lysator.liu.se>
+
+	* tune/common.c (speed_mpn_gcd_11): New function.
+	* tune/speed.h (speed_mpn_gcd_11): Declare it.
+	(SPEED_ROUTINE_MPN_GCD_11): New macro.
+	* tune/speed.c (routine): Add mpn_gcd_11.
+
+	* configure.ac (gmp_mpn_functions): Added gcd_11. Also add
+	HAVE_NATIVE_mpn_gcd_11.
+	* mpn/generic/gcd_11.c (mpn_gcd_11): New file and function,
+	extracted from mpn_gcd_1.
+	* gmp-h.in (mpn_gcd_11): Declare it.
+	* mpn/generic/gcd_1.c (mpn_gcd_1): Adapted to call mpn_gcd_11.
+
 2019-07-30  Niels MÃ¶ller  <nisse at lysator.liu.se>
 
 	From Seth Troisi:
diff -r 4c6ae64fa612 -r cdf9e11a028b configure.ac
--- a/configure.ac	Mon Aug 05 11:07:24 2019 +0200
+++ b/configure.ac	Tue Aug 06 17:16:07 2019 +0200
@@ -2970,7 +2970,7 @@
   rootrem sqrtrem sizeinbase get_str set_str compute_powtab		   \
   scan0 scan1 popcount hamdist cmp zero_p				   \
   perfsqr perfpow strongfibo						   \
-  gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step				   \
+  gcd_11 gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step			   \
   gcdext_lehmer								   \
   div_q tdiv_qr jacbase jacobi_2 jacobi get_d				   \
   matrix22_mul matrix22_mul1_inverse_vector				   \
@@ -3559,6 +3559,7 @@
 #undef HAVE_NATIVE_mpn_divrem_1c
 #undef HAVE_NATIVE_mpn_divrem_2
 #undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_gcd_11
 #undef HAVE_NATIVE_mpn_hamdist
 #undef HAVE_NATIVE_mpn_invert_limb
 #undef HAVE_NATIVE_mpn_ior_n
diff -r 4c6ae64fa612 -r cdf9e11a028b gmp-h.in
--- a/gmp-h.in	Mon Aug 05 11:07:24 2019 +0200
+++ b/gmp-h.in	Tue Aug 06 17:16:07 2019 +0200
@@ -1516,6 +1516,9 @@
 #define mpn_gcd __MPN(gcd)
 __GMP_DECLSPEC mp_size_t mpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
 
+#define mpn_gcd_11 __MPN(gcd_11)
+__GMP_DECLSPEC mp_limb_t mpn_gcd_11 (mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
+
 #define mpn_gcd_1 __MPN(gcd_1)
 __GMP_DECLSPEC mp_limb_t mpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
 
diff -r 4c6ae64fa612 -r cdf9e11a028b mpn/generic/gcd_1.c
--- a/mpn/generic/gcd_1.c	Mon Aug 05 11:07:24 2019 +0200
+++ b/mpn/generic/gcd_1.c	Tue Aug 06 17:16:07 2019 +0200
@@ -1,6 +1,6 @@
 /* mpn_gcd_1 -- mpn and limb greatest common divisor.
 
-Copyright 1994, 1996, 2000, 2001, 2009, 2012 Free Software Foundation, Inc.
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -70,7 +70,7 @@
 	goto done;
 
       count_trailing_zeros (c, ulimb);
-      ulimb = (ulimb >> 1) >> c;
+      ulimb >>= c;
     }
   else
     {
@@ -92,48 +92,11 @@
 	    goto done;
 
 	  count_trailing_zeros (c, ulimb);
-	  ulimb = (ulimb >> 1) >> c;
-	}
-      else
-	{
-	  ASSERT (ulimb & 1);
-	  ulimb >>= 1;
+	  ulimb >>= c;
 	}
     }
 
-  ASSERT (vlimb & 1);
-  vlimb >>= 1;
-
-  /* In this loop, we represent the odd numbers ulimb and vlimb
-     without the redundant least significant one bit. This reduction
-     in size by one bit ensures that the high bit of t, below, is set
-     if and only if vlimb > ulimb. */
-  while (ulimb != vlimb)
-    {
-      mp_limb_t t;
-      mp_limb_t vgtu;
-
-      t = ulimb - vlimb;
-      vgtu = LIMB_HIGHBIT_TO_MASK (t);
-
-      /* v <-- min (u, v) */
-      vlimb += (vgtu & t);
-
-      /* u <-- |u - v| */
-      ulimb = (t ^ vgtu) - vgtu;
-
-      count_trailing_zeros (c, t);
-      /* We have c <= GMP_LIMB_BITS - 2 here, so that
-
-	   ulimb >>= (c + 1);
-
-	 would be safe. But unlike the addition c + 1, a separate
-	 shift by 1 is independent of c, and can be executed in
-	 parallel with count_trailing_zeros. */
-      ulimb = (ulimb >> 1) >> c;
-    }
-
-  vlimb = (vlimb << 1) | 1;
+  vlimb = mpn_gcd_11 (ulimb, vlimb);
 
  done:
   return vlimb << zero_bits;
diff -r 4c6ae64fa612 -r cdf9e11a028b mpn/generic/gcd_11.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/gcd_11.c	Tue Aug 06 17:16:07 2019 +0200
@@ -0,0 +1,75 @@
+/* mpn_gcd_11 -- limb greatest common divisor.
+
+Copyright 1994, 1996, 2000, 2001, 2009, 2012, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if !HAVE_NATIVE_mpn_gcd_11
+mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+  ASSERT (u & v & 1);
+
+  /* In this loop, we represent the odd numbers ulimb and vlimb
+     without the redundant least significant one bit. This reduction
+     in size by one bit ensures that the high bit of t, below, is set
+     if and only if vlimb > ulimb. */
+
+  u >>= 1;
+  v >>= 1;
+
+  while (u != v)
+    {
+      mp_limb_t t;
+      mp_limb_t vgtu;
+      int c;
+
+      t = u - v;
+      vgtu = LIMB_HIGHBIT_TO_MASK (t);
+
+      /* v <-- min (u, v) */
+      v += (vgtu & t);
+
+      /* u <-- |u - v| */
+      u = (t ^ vgtu) - vgtu;
+
+      count_trailing_zeros (c, t);
+      /* We have c <= GMP_LIMB_BITS - 2 here, so that
+
+	   ulimb >>= (c + 1);
+
+	 would be safe. But unlike the addition c + 1, a separate
+	 shift by 1 is independent of c, and can be executed in
+	 parallel with count_trailing_zeros. */
+      u = (u >> 1) >> c;
+    }
+  return (u << 1) + 1;
+}
+#endif /* !HAVE_NATIVE_mpn_gcd_11 */
diff -r 4c6ae64fa612 -r cdf9e11a028b tune/common.c
--- a/tune/common.c	Mon Aug 05 11:07:24 2019 +0200
+++ b/tune/common.c	Tue Aug 06 17:16:07 2019 +0200
@@ -1717,6 +1717,11 @@
   SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1);
 }
 double
+speed_mpn_gcd_11 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_GCD_11 (mpn_gcd_11);
+}
+double
 speed_mpn_gcd_1N (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1);
diff -r 4c6ae64fa612 -r cdf9e11a028b tune/speed.c
--- a/tune/speed.c	Mon Aug 05 11:07:24 2019 +0200
+++ b/tune/speed.c	Tue Aug 06 17:16:07 2019 +0200
@@ -295,6 +295,7 @@
   { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
 
   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
+  { "mpn_gcd_11",        speed_mpn_gcd_11, FLAG_R_OPTIONAL },
   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
 
   { "mpn_gcd",           speed_mpn_gcd                    },
diff -r 4c6ae64fa612 -r cdf9e11a028b tune/speed.h
--- a/tune/speed.h	Mon Aug 05 11:07:24 2019 +0200
+++ b/tune/speed.h	Tue Aug 06 17:16:07 2019 +0200
@@ -223,6 +223,7 @@
 double speed_mpn_hgcd_reduce_2 (struct speed_params *);
 double speed_mpn_gcd (struct speed_params *);
 double speed_mpn_gcd_1 (struct speed_params *);
+double speed_mpn_gcd_11 (struct speed_params *);
 double speed_mpn_gcd_1N (struct speed_params *);
 double speed_mpn_gcdext (struct speed_params *);
 double speed_mpn_gcdext_double (struct speed_params *);
@@ -2820,6 +2821,10 @@
 #define SPEED_ROUTINE_MPN_GCD_1(function)				\
   SPEED_ROUTINE_MPN_GCD_1_CALL( , function (&px[j-1], 1, py[j-1]))
 
+#define SPEED_ROUTINE_MPN_GCD_11(function)				\
+  SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1),		\
+			       function (px[j-1], py[j-1]))
+
 #define SPEED_ROUTINE_MPN_JACBASE(function)				\
   SPEED_ROUTINE_MPN_GCD_1_CALL						\
     ({									\