[Gmp-commit] /var/hg/gmp: 11 new changesets

Tue Jun 9 23:08:11 UTC 2015

details:   /var/hg/gmp/rev/504ee88ff200
changeset: 16687:504ee88ff200
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Jun 09 23:35:42 2015 +0200
description:
mpn/generic/rootrem.c: avoid division if result is 1.

details:   /var/hg/gmp/rev/1c55c6695535
changeset: 16688:1c55c6695535
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Jun 09 23:50:59 2015 +0200
description:
mpn/generic/rootrem.c: Shorten last iteration.

details:   /var/hg/gmp/rev/c1b981e3d6b7
changeset: 16689:c1b981e3d6b7
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Jun 09 23:57:29 2015 +0200
description:
mpn/generic/rootrem.c: Don't compute reminder if we know it's null.

details:   /var/hg/gmp/rev/d1e6615b326a
changeset: 16690:d1e6615b326a
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 00:16:10 2015 +0200
description:
mpn/generic/rootrem.c: Hardwire first step, move it last.

details:   /var/hg/gmp/rev/48cbff6d466d
changeset: 16691:48cbff6d466d
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 00:23:59 2015 +0200
description:
mpn/generic/rootrem.c: Hardwire second step, move it last.

details:   /var/hg/gmp/rev/72f4970bc407
changeset: 16692:72f4970bc407
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 00:27:47 2015 +0200
description:
mpn/generic/rootrem.c: Hardcode third step, move it last.

details:   /var/hg/gmp/rev/a97e01f7c3ce
changeset: 16693:a97e01f7c3ce
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 00:31:13 2015 +0200
description:
mpn/generic/rootrem.c: Hardcode sixth step, move it last.

details:   /var/hg/gmp/rev/f4d21a1a55f6
changeset: 16694:f4d21a1a55f6
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 00:35:52 2015 +0200
description:
mpn/generic/rootrem.c: fuse step 4 and step 5.

details:   /var/hg/gmp/rev/a79897f38d35
changeset: 16695:a79897f38d35
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 00:39:20 2015 +0200
description:
mpn/generic/rootrem.c: fuse step 4, 5, and 7.

details:   /var/hg/gmp/rev/8c7fb6c38fd0
changeset: 16696:8c7fb6c38fd0
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 01:01:44 2015 +0200
description:
mpn/generic/rootrem.c: Don't divide if result will be discarded

details:   /var/hg/gmp/rev/343444ab162a
changeset: 16697:343444ab162a
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jun 10 01:03:22 2015 +0200
description:
mpn/generic/rootrem.c: Indent

diffstat:

 mpn/generic/rootrem.c |  239 +++++++++++++++++++++++++++----------------------
 1 files changed, 131 insertions(+), 108 deletions(-)

diffs (truncated from 321 to 300 lines):

diff -r a79ef80f3630 -r 343444ab162a mpn/generic/rootrem.c

--- a/mpn/generic/rootrem.c	Tue Jun 09 17:16:08 2015 +0200
+++ b/mpn/generic/rootrem.c	Wed Jun 10 01:03:22 2015 +0200
@@ -139,16 +139,14 @@
   unsigned long b, kk;
   unsigned long sizes[GMP_NUMB_BITS + 1];
   int ni, i;
-  int c;
+  int c, perf_pow;
   int logk;
   TMP_DECL;
 
   MPN_SIZEINBASE_2EXP(unb, up, un, 1);
   /* unb is the number of bits of the input U */
-  xnb = (unb - 1) / k + 1;	/* ceil (unb / k) */
-  /* xnb is the number of bits of the root R */
 
-  if (xnb == 1) /* root is 1 */
+  if (unb <= k) /* root is 1 */
     {
       if (remp == NULL)
 	un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
@@ -161,12 +159,17 @@
       rootp[0] = 1;
       return un;
     }
+  /* if (unb - k <= k/2) // root is 2 */
+
+  xnb = (unb - 1) / k + 1;	/* ceil (unb / k) */
+  /* xnb is the number of bits of the root R */
 
   /* We initialize the algorithm with a 1-bit approximation to zero: since we
      know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
      r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
   kk = k * (xnb - 1);		/* number of truncated bits in the input */
   rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
+  --kk;
 
   for (logk = 1; ((k - 1) >> logk) != 0; logk++)
     ;
@@ -190,7 +193,7 @@
 	b = sizes[ni] - 1;	/* add just one bit at a time */
       ni++;
     } while (b != 0);
-  sizes[ni] = 0;
+
   ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
   /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
      sizes[i] <= 2 * sizes[i+1].
@@ -219,15 +222,16 @@
   sp = rootp;
 
   MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
-  mpn_sub_1 (rp, rp, rn, 1);	/* subtract the initial approximation: since
+  mpn_sub_1 (rp, rp, rn, 2);	/* subtract the initial approximation: since
 				   the non-truncated part is less than 2^k, it
 				   is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
-  sp[0] = 1;			/* initial approximation */
+  sp[0] = save = 2;		/* initial approximation */
   sn = 1;			/* it has one limb */
 
-  wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
+  wp[0] = k; /* k * {sp,sn}^(k-1) = 1 */
   wn = 1;
   i = ni;
+  b = bn = 1;
   do
     {
       /* 1: loop invariant:
@@ -237,15 +241,103 @@
 	 {wp, wn} = {sp, sn}^(k-1)
 	 kk = number of truncated bits of the input
       */
-      b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
-				      iteration */
 
+      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
+      if (UNLIKELY (rn < wn))
+	{
+	  MPN_ZERO (sp, bn);
+	}
+      else
+	{
+	  qn = rn - wn; /* expected quotient size */
+	  if (qn <= bn) { /* Divide only if result is not too big. */
+	    mpn_div_q (qp, rp, rn, wp, wn, scratch);
+	    qn += qp[qn] != 0;
+	  }
+
+      /* 5: current buffers: {sp,sn}, {qp,qn}.
+	 Note: {rp,rn} is not needed any more since we'll compute it from
+	 scratch at the end of the loop.
+       */
+
+      /* the quotient should be smaller than 2^b, since the previous
+	 approximation was correctly rounded toward zero */
+	  if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
+			  qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
+	    {
+	      for (qn = 1; qn < bn; ++qn)
+		sp[qn - 1] = GMP_NUMB_MAX;
+	      sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS-1 - ((b-1) % GMP_NUMB_BITS));
+	    }
+	  else
+	    {
+      /* 7: current buffers: {sp,sn}, {qp,qn} */
+
+      /* Combine sB and q to form sB + q.  */
+	      MPN_COPY (sp, qp, qn);
+	      MPN_ZERO (sp + qn, bn - qn);
+	    }
+	}
+      sp[b / GMP_NUMB_BITS] |= save;
+
+      /* 8: current buffer: {sp,sn} */
+
+      if (--i == 0)
+	break;
+
+      /* Since each iteration treats b bits from the root and thus k*b bits
+	 from the input, and we already considered b bits from the input,
+	 we now have to take another (k-1)*b bits from the input. */
+      kk -= (k - 1) * b; /* remaining input bits */
+      /* {rp, rn} = floor({up, un} / 2^kk) */
+      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
+      rn = un - kk / GMP_NUMB_BITS;
+      rn -= rp[rn - 1] == 0;
+
+      /* 9: current buffers: {sp,sn}, {rp,rn} */
+
+     for (c = 0;; c++)
+	{
+	  /* Compute S^k in {qp,qn}. */
+	  /* W <- S^(k-1) for the next iteration,
+	     and S^k = W * S. */
+	  wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
+	  mpn_mul (qp, wp, wn, sp, sn);
+	  qn = wn + sn;
+	  qn -= qp[qn - 1] == 0;
+
+	  perf_pow = 1;
+	  /* if S^k > floor(U/2^kk), the root approximation was too large */
+	  if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
+	    MPN_DECR_U (sp, sn, 1);
+	  else
+	    break;
+	}
+
+      /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
+
+      ASSERT_ALWAYS (c <= 1);
+      ASSERT_ALWAYS (rn >= qn);
+
+      /* R = R - Q = floor(U/2^kk) - S^k */
+      if (perf_pow != 0)
+	{
+	  mpn_sub (rp, rp, rn, qp, qn);
+	  MPN_NORMALIZE (rp, rn);
+	}
+      else
+	{
+	  rn = 1;
+	  rp[0] = 0;
+	}
+      /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
       /* Reinsert a low zero limb if we normalized away the entire remainder */
-      if (rn == 0)
-	{
-	  rp[0] = 0;
-	  rn = 1;
-	}
+      rn += (rn == 0);
+
+      b = sizes[i - 1] - sizes[i]; /* number of bits to compute in the
+				      next iteration */
 
       /* first multiply the remainder by 2^b */
       MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
@@ -292,41 +384,6 @@
       wp[wn] = cy;
       wn += cy != 0;
 
-      /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
-
-      /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
-      if (rn < wn)
-	{
-	  qn = 0;
-	}
-      else
-	{
-	  qn = rn - wn; /* expected quotient size */
-	  mpn_div_q (qp, rp, rn, wp, wn, scratch);
-	  qn += qp[qn] != 0;
-	}
-
-      /* 5: current buffers: {sp,sn}, {qp,qn}.
-	 Note: {rp,rn} is not needed any more since we'll compute it from
-	 scratch at the end of the loop.
-       */
-
-      /* Number of limbs used by b bits, when least significant bit is
-	 aligned to least limb */
-      bn = (b - 1) / GMP_NUMB_BITS + 1;
-
-      /* the quotient should be smaller than 2^b, since the previous
-	 approximation was correctly rounded toward zero */
-      if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
-		      qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
-	{
-	  qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
-	  MPN_ZERO (qp, qn);
-	  qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
-	  MPN_DECR_U (qp, qn, 1);
-	  qn -= qp[qn - 1] == 0;
-	}
-
       /* 6: current buffers: {sp,sn}, {qp,qn} */
 
       /* multiply the root approximation by 2^b */
@@ -338,74 +395,40 @@
 	  sn++;
 	}
 
-      /* 7: current buffers: {sp,sn}, {qp,qn} */
+      save = sp[b / GMP_NUMB_BITS];
 
-      ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
-				   above, q is set to 2^b-1, which has
-				   exactly bn limbs */
+      /* Number of limbs used by b bits, when least significant bit is
+	 aligned to least limb */
+      bn = (b - 1) / GMP_NUMB_BITS + 1;
 
-      /* Combine sB and q to form sB + q.  */
-      save = sp[b / GMP_NUMB_BITS];
-      MPN_COPY (sp, qp, qn);
-      MPN_ZERO (sp + qn, bn - qn);
-      sp[b / GMP_NUMB_BITS] |= save;
+    } while (1);
 
-      /* 8: current buffer: {sp,sn} */
-
-      /* Since each iteration treats b bits from the root and thus k*b bits
-	 from the input, and we already considered b bits from the input,
-	 we now have to take another (k-1)*b bits from the input. */
-      kk -= (k - 1) * b; /* remaining input bits */
-      /* {rp, rn} = floor({up, un} / 2^kk) */
-      MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
-      rn = un - kk / GMP_NUMB_BITS;
-      rn -= rp[rn - 1] == 0;
-
-      /* 9: current buffers: {sp,sn}, {rp,rn} */
-
-     for (c = 0;; c++)
+  /* otherwise we have rn > 0, thus the return value is ok */
+  if (!approx || sp[0] <= CNST_LIMB (1))
+    {
+      for (c = 0;; c++)
 	{
 	  /* Compute S^k in {qp,qn}. */
-	  if (i == 1)
-	    {
-	      /* Last iteration: we don't need W anymore. */
-	      /* mpn_pow_1 requires that both qp and wp have enough space to
-		 store the result {sp,sn}^k + 1 limb */
-	      approx = approx && (sp[0] > 1);
-	      qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
-	    }
-	  else
-	    {
-	      /* W <- S^(k-1) for the next iteration,
-		 and S^k = W * S. */
-	      wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
-	      mpn_mul (qp, wp, wn, sp, sn);
-	      qn = wn + sn;
-	      qn -= qp[qn - 1] == 0;
-	    }
+	  /* Last iteration: we don't need W anymore. */
+	  /* mpn_pow_1 requires that both qp and wp have enough
+	     space to store the result {sp,sn}^k + 1 limb */
+	  qn = mpn_pow_1 (qp, sp, sn, k, wp);
 
-	  /* if S^k > floor(U/2^kk), the root approximation was too large */
-	  if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
+	  perf_pow = 1;
+	  if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
 	    MPN_DECR_U (sp, sn, 1);
 	  else
 	    break;
+	};
+
+      rn = perf_pow != 0;
+      if (rn != 0 && remp != NULL)
+	{
+	  mpn_sub (remp, up, un, qp, qn);
+	  rn = un;
+	  MPN_NORMALIZE (remp, rn);