[Gmp-commit] /var/hg/gmp: 6 new changesets

Sat Apr 25 18:04:50 UTC 2015

details:   /var/hg/gmp/rev/2ffac95e6a34
changeset: 16588:2ffac95e6a34
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Apr 25 18:59:59 2015 +0200
description:
mpn/generic/invert.c: Split add in the correction test.

details:   /var/hg/gmp/rev/cd75c281aa5f
changeset: 16589:cd75c281aa5f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Apr 25 19:05:23 2015 +0200
description:
mpn/generic/invertappr.c: Use CNST_LIMB and general cleanup

details:   /var/hg/gmp/rev/f6667ca34e99
changeset: 16590:f6667ca34e99
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Apr 25 19:07:43 2015 +0200
description:
mpn/generic/invertappr.c: Remove branches in the B^nm-1 case

details:   /var/hg/gmp/rev/10557fe06670
changeset: 16591:10557fe06670
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Apr 25 19:10:19 2015 +0200
description:
mpn/generic/invertappr.c: Unroll the check loop.

details:   /var/hg/gmp/rev/e12ce06545be
changeset: 16592:e12ce06545be
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Apr 25 19:12:37 2015 +0200
description:
mpn/generic/invertappr.c: Use rsblsh1.

details:   /var/hg/gmp/rev/391231d8dc7d
changeset: 16593:391231d8dc7d
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Apr 25 20:04:46 2015 +0200
description:
ChangeLog

diffstat:

 ChangeLog                |   7 ++-
 mpn/generic/invert.c     |  11 ++++--
 mpn/generic/invertappr.c |  81 +++++++++++++++++++++++++++--------------------
 3 files changed, 58 insertions(+), 41 deletions(-)

diffs (192 lines):

diff -r 3fcee237e6d5 -r 391231d8dc7d ChangeLog

--- a/ChangeLog	Sat Apr 25 17:59:53 2015 +0200
+++ b/ChangeLog	Sat Apr 25 20:04:46 2015 +0200
@@ -1,10 +1,13 @@
 2015-04-25 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
+	* mpn/generic/invert.c: Split add in the correction test.
+	* mpn/generic/invertappr.c: Cleanup of loops and branches.
+
 	* mpn/generic/hgcd_reduce.c: Use TMP_ALLOC_LIMBS_3.
+	* mpn/generic/powm.c: Use TMP_ALLOC_LIMBS_2.
+	* mpn/generic/rootrem.c: Likewise.
 	* mpn/generic/remove.c: Remove redundant #ifdef.
 	* mpn/generic/perfpow.c: Likewise.
-	* mpn/generic/powm.c: Use TMP_ALLOC_LIMBS_2.
-	* mpn/generic/rootrem.c: Likewise.
 
 2015-04-21    <torbjorng at google.com>
 
diff -r 3fcee237e6d5 -r 391231d8dc7d mpn/generic/invert.c
--- a/mpn/generic/invert.c	Sat Apr 25 17:59:53 2015 +0200
+++ b/mpn/generic/invert.c	Sat Apr 25 20:04:46 2015 +0200
@@ -6,7 +6,7 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
 
-Copyright (C) 2007, 2009, 2010, 2012, 2014 Free Software Foundation, Inc.
+Copyright (C) 2007, 2009, 2010, 2012, 2014-2015 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -80,9 +80,12 @@
       if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
 	/* Code to detect and correct the "off by one" approximation. */
 	mpn_mul_n (scratch, ip, dp, n);
-	ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
-	if (! mpn_add (scratch, scratch, 2*n, dp, n))
-	  MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it.  */
+	e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/
+	if (LIKELY(e)) /* The high part can not give a carry by itself. */
+	  e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */
+	/* If the value was wrong (no carry), correct it (increment). */
+	e ^= CNST_LIMB (1);
+	MPN_INCR_U (ip, n, e);
       }
   }
 }
diff -r 3fcee237e6d5 -r 391231d8dc7d mpn/generic/invertappr.c
--- a/mpn/generic/invertappr.c	Sat Apr 25 17:59:53 2015 +0200
+++ b/mpn/generic/invertappr.c	Sat Apr 25 20:04:46 2015 +0200
@@ -12,7 +12,7 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
 
-Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
+Copyright (C) 2007, 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -107,10 +107,10 @@
     xp = tp + n + 2;				/* 2 * n limbs */
 
     /* n > 1 here */
-    i = n - 1;
+    i = n;
     do
-      xp[i] = GMP_NUMB_MAX;
-    while (--i >= 0);
+      xp[--i] = GMP_NUMB_MAX;
+    while (i);
     mpn_com (xp + n, dp, n);
 
     /* Now xp contains B^2n - {dp,n}*B^n - 1 */
@@ -126,7 +126,7 @@
 	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
       else
 	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
-      MPN_DECR_U(ip, n, 1);
+      MPN_DECR_U(ip, n, CNST_LIMB (1));
       return 1;
     }
   }
@@ -182,8 +182,8 @@
   rn = n;
   do {
     *sizp = rn;
-    rn = ((rn) >> 1) + 1;
-    sizp ++;
+    rn = (rn >> 1) + 1;
+    ++sizp;
   } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
 
   /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
@@ -221,44 +221,53 @@
       /* FIXME: We do only need {xp,n+1}*/
       mpn_mul (xp, dp - n, n, ip - rn, rn);
       mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
-      method = 1; /* Remember we used (truncated) product */
-      /* We computed cy.{xp,rn+n} <- 1.{ip,rn} * 0.{dp,n} */
-    } else { /* Use B^n-1 wraparound */
+      method = CNST_LIMB (1); /* Remember we truncated, Mod B^(n+1) */
+      /* We computed (truncated) {xp,n+1} <- 1.{ip,rn} * 0.{dp,n} */
+    } else { /* Use B^mn-1 wraparound */
       mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
       /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
       /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
       /* Add dp*B^rn mod (B^mn-1) */
       ASSERT (n >= mn - rn);
-      xp[mn] = 1 + mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
-      cy = mpn_add_n (xp, xp, dp - (n - (mn - rn)), n - (mn - rn));
-      MPN_INCR_U (xp + n - (mn - rn), mn + 1 - n + (mn - rn), cy);
-      ASSERT (n + rn >=  mn);
-      /* Subtract B^{rn+n} */
-      MPN_DECR_U (xp + rn + n - mn, 2*mn + 1 - rn - n, 1);
-      if (xp[mn])
-	MPN_INCR_U (xp, mn, xp[mn] - 1);
-      else
-	MPN_DECR_U (xp, mn, 1);
-      method = 0; /* Remember we are working Mod B^m-1 */
+      cy = mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
+      cy = mpn_add_nc (xp, xp, dp - (n - (mn - rn)), n - (mn - rn), cy);
+      /* Subtract B^{rn+n}, maybe only compensate the carry*/
+      xp[mn] = CNST_LIMB (1); /* set a limit for DECR_U */
+      MPN_DECR_U (xp + rn + n - mn, 2 * mn + 1 - rn - n, CNST_LIMB (1) - cy);
+      MPN_DECR_U (xp, mn, CNST_LIMB (1) - xp[mn]); /* if DECR_U eroded xp[mn] */
+      method = CNST_LIMB (0); /* Remember we are working Mod B^mn-1 */
     }
 
-    if (xp[n] < 2) { /* "positive" residue class */
-      cy = 1;
-      while (xp[n] || mpn_cmp (xp, dp - n, n)>0) {
-	xp[n] -= mpn_sub_n (xp, xp, dp - n, n);
-	cy ++;
+    if (xp[n] < CNST_LIMB (2)) { /* "positive" residue class */
+      cy = xp[n]; /* 0 <= cy <= 1 here. */
+#if ! USE_MUL_N
+      xp[n] = CNST_LIMB (0);
+#endif
+      if (cy++ && !mpn_sub_n (xp, xp, dp - n, n)) {
+	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+	++cy;
+      } /* 1 <= cy <= 2 here. */
+#if HAVE_NATIVE_mpn_rsblsh1_n
+      if (mpn_cmp (xp, dp - n, n) > 0) {
+	ASSERT_NOCARRY (mpn_rsblsh1_n (xp, xp, dp - n, n));
+	++cy;
+      } else
+	ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
+#else
+      if (mpn_cmp (xp, dp - n, n) > 0) {
+	ASSERT_NOCARRY (mpn_sub_n (xp, xp, dp - n, n));
+	++cy;
       }
-      MPN_DECR_U(ip - rn, rn, cy);
-      ASSERT (cy <= 4); /* at most 3 cycles for the while above */
       ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
-      ASSERT (xp[n] == 0);
+#endif
+      MPN_DECR_U(ip - rn, rn, cy); /* 1 <= cy <= 3 here. */
     } else { /* "negative" residue class */
       mpn_com (xp, xp, n + 1);
       MPN_INCR_U(xp, n + 1, method);
-      ASSERT (xp[n] <= 1);
+      ASSERT (xp[n] <= CNST_LIMB (1));
 #if USE_MUL_N
       if (xp[n]) {
-	MPN_INCR_U(ip - rn, rn, 1);
+	MPN_INCR_U(ip - rn, rn, CNST_LIMB (1));
 	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
       }
 #endif
@@ -271,14 +280,16 @@
     rp[2*rn] = 0;
     mpn_mul (rp, xp + n - rn, rn + xp[n], ip - rn, rn);
 #endif
-    /* We need _only_ the carry from the next addition  */
-    /* Anyway 2rn-n <= 2... we don't need to optimise.  */
     cy = mpn_add_n (rp + rn, rp + rn, xp + n - rn, 2*rn - n);
     cy = mpn_add_nc (ip - n, rp + 3*rn - n, xp + rn, n - rn, cy);
-    MPN_INCR_U (ip - rn, rn, cy + (1-USE_MUL_N)*(rp[2*rn] + xp[n]));
+#if USE_MUL_N
+    MPN_INCR_U (ip - rn, rn, cy);
+#else
+    MPN_INCR_U (ip - rn, rn, cy + rp[2*rn] + xp[n]);
+#endif
     if (sizp == sizes) { /* Get out of the cycle */
       /* Check for possible carry propagation from below. */
-      cy = rp[3*rn - n - 1] > GMP_NUMB_MAX - 7; /* Be conservative. */
+      cy = rp[3*rn - n - 1] > GMP_NUMB_MAX - CNST_LIMB (7); /* Be conservative. */
 /*    cy = mpn_add_1 (rp + rn, rp + rn, 2*rn - n, 4); */
       break;
     }