[Gmp-commit] /var/hg/gmp: New implementation of mpn_gcd_22, with less branches.

mercurial at gmplib.org mercurial at gmplib.org
Fri Aug 16 22:15:14 UTC 2019


details:   /var/hg/gmp/rev/6938e1d9fdf1
changeset: 17821:6938e1d9fdf1
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Sat Aug 17 00:14:58 2019 +0200
description:
New implementation of mpn_gcd_22, with less branches.

diffstat:

 ChangeLog            |    5 ++
 mpn/generic/gcd_22.c |  108 +++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 84 insertions(+), 29 deletions(-)

diffs (138 lines):

diff -r a905169c7cdb -r 6938e1d9fdf1 ChangeLog
--- a/ChangeLog	Fri Aug 16 14:38:08 2019 +0200
+++ b/ChangeLog	Sat Aug 17 00:14:58 2019 +0200
@@ -1,3 +1,8 @@
+2019-08-17  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/generic/gcd_22.c (mpn_gcd_22): New implementation with less
+	branches.
+
 2019-08-16 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/brootinv.c: Shorten computations, using even exponent.
diff -r a905169c7cdb -r 6938e1d9fdf1 mpn/generic/gcd_22.c
--- a/mpn/generic/gcd_22.c	Fri Aug 16 14:38:08 2019 +0200
+++ b/mpn/generic/gcd_22.c	Sat Aug 17 00:14:58 2019 +0200
@@ -39,43 +39,93 @@
 mpn_gcd_22 (mp_limb_t u1, mp_limb_t u0, mp_limb_t v1, mp_limb_t v0)
 {
   mp_double_limb_t g;
-  ASSERT (u0 & 1);
-  ASSERT (v0 & 1);
+  ASSERT (u0 & v0 & 1);
+
+  /* Implicit least significant bit */
+  u0 = (u0 >> 1) | (u1 << (GMP_LIMB_BITS - 1));
+  u1 >>= 1;
 
-  /* Check for u0 != v0 needed to ensure that argument to
-   * count_trailing_zeros is non-zero. */
-  while (u1 != v1 && u0 != v0)
+  v0 = (v0 >> 1) | (v1 << (GMP_LIMB_BITS - 1));
+  v1 >>= 1;
+
+  while (u1 || v1) /* u1 == 0 can happen at most twice per call */
     {
-      unsigned long int r;
-      if (u1 > v1)
+      mp_limb_t vgtu, t1, t0;
+      sub_ddmmss (t1, t0, u1, u0, v1, v0);
+      vgtu = LIMB_HIGHBIT_TO_MASK(t1);
+
+      if (UNLIKELY (t0 == 0))
 	{
-	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
-	  count_trailing_zeros (r, u0);
-	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
-	  u1 >>= r;
+	  if (t1 == 0)
+	    {
+	      g.d1 = (u1 << 1) | (u0 >> (GMP_LIMB_BITS - 1));
+	      g.d0 = (u0 << 1) | 1;
+	      return g;
+	    }
+	  int c;
+	  count_trailing_zeros (c, t1);
+
+	  /* v1 = min (u1, v1) */
+	  v1 += (vgtu & t1);
+	  /* u0 = |u1 - v1| */
+	  u0 = (t1 ^ vgtu) - vgtu;
+	  ASSERT (c < GMP_LIMB_BITS - 1);
+	  u0 >>= c + 1;
+	  u1 = 0;
 	}
-      else  /* u1 < v1.  */
+      else
 	{
-	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
-	  count_trailing_zeros (r, v0);
-	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
-	  v1 >>= r;
+	  int c;
+	  count_trailing_zeros (c, t0);
+	  c++;
+	  /* V <-- min (U, V).
+
+	     Assembly version should use cmov. Another alternative,
+	     avoiding carry propagation, would be
+
+	     v0 += vgtu & t0; v1 += vtgu & (u1 - v1);
+	  */
+	  add_ssaaaa (v1, v0, v1, v0, vgtu & t1, vgtu & t0);
+	  /* U  <--  |U - V|
+	     No carry handling needed in this conditional negation,
+	     since t0 != 0. */
+	  u0 = (t0 ^ vgtu) - vgtu;
+	  u1 = t1 ^ vgtu;
+	  if (UNLIKELY (c == GMP_LIMB_BITS))
+	    {
+	      u0 = u1;
+	      u1 = 0;
+	    }
+	  else
+	    {
+	      u0 = (u0 >> c) | (u1 << (GMP_LIMB_BITS - c));
+	      u1 >>= c;
+	    }
 	}
     }
+  while ((v0 | u0) & GMP_LIMB_HIGHBIT)
+    { /* At most two iterations */
+      mp_limb_t vgtu, t0;
+      int c;
+      sub_ddmmss (vgtu, t0, 0, u0, 0, v0);
+      if (UNLIKELY (t0 == 0))
+	{
+	  g.d1 = u0 >> (GMP_LIMB_BITS - 1);
+	  g.d0 = (u0 << 1) | 1;
+	  return g;
+	}
 
-  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
-  if (u1 == v1 && u0 == v0)
-    {
-      g.d0 = u0; g.d1 = u1;
-      return g;
+      /* v <-- min (u, v) */
+      v0 += (vgtu & t0);
+
+      /* u <-- |u - v| */
+      u0 = (t0 ^ vgtu) - vgtu;
+
+      count_trailing_zeros (c, t0);
+      u0 = (u0 >> 1) >> c;
     }
-  else {
-    mp_limb_t t[2];
-    t[0] = u0; t[1] = u1;
 
-    v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
-    g.d0 = mpn_gcd_1 (t, 1 + (u1 != 0), v0);
-    g.d1 = 0;
-    return g;
-  }
+  g.d0 = mpn_gcd_11 ((u0 << 1) + 1, (v0 << 1) + 1);
+  g.d1 = 0;
+  return g;
 }


More information about the gmp-commit mailing list