[Gmp-commit] /var/hg/gmp: 2 new changesets

Sat Oct 8 20:15:16 CEST 2011

details:   /var/hg/gmp/rev/2d3bdff1bc32
changeset: 14280:2d3bdff1bc32
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Sat Oct 08 20:05:27 2011 +0200
description:
Fixed comment typo.

details:   /var/hg/gmp/rev/6dc7b095171a
changeset: 14281:6dc7b095171a
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Sat Oct 08 20:15:06 2011 +0200
description:
Fixed extra_bits book-keeping in hgcd_appr.

diffstat:

 ChangeLog               |  11 +++++
 mpn/generic/hgcd2.c     |   2 +-
 mpn/generic/hgcd_appr.c |  91 ++++++++++++++++++++++++++++++++----------------
 tests/mpn/t-hgcd_appr.c |  42 ++++-----------------
 4 files changed, 82 insertions(+), 64 deletions(-)

diffs (247 lines):

diff -r d9cbbb07c55c -r 6dc7b095171a ChangeLog

--- a/ChangeLog	Fri Oct 07 15:35:38 2011 +0200
+++ b/ChangeLog	Sat Oct 08 20:15:06 2011 +0200
@@ -1,3 +1,14 @@
+2011-10-08  Niels Möller  <nisse at lysator.liu.se>
+
+	* tests/mpn/t-hgcd_appr.c (hgcd_appr_valid_p): Adjusted the
+	allowed margin of non-minimality for hgcd_appr.
+
+	* mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Fixed handling of
+	extra_bits, starting at zero, to ensure that we don't produce too
+	small remainders. Added a final reduction loop when we we
+	otherwise terminate with extra_bits > 0, to make the returned
+	remainders closer to minimal.
+
 2011-10-07  Torbjorn Granlund  <tege at gmplib.org>
 
 	* longlong.h (s390): Add 32-bit zarch umul_ppmm and udiv_qrnnd.
diff -r d9cbbb07c55c -r 6dc7b095171a mpn/generic/hgcd2.c
--- a/mpn/generic/hgcd2.c	Fri Oct 07 15:35:38 2011 +0200
+++ b/mpn/generic/hgcd2.c	Sat Oct 08 20:15:06 2011 +0200
@@ -199,7 +199,7 @@
 
 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
    matrix M. Returns 1 if we make progress, i.e. can perform at least
-   one subtraction. Otherwise returns zero.. */
+   one subtraction. Otherwise returns zero. */
 
 /* FIXME: Possible optimizations:
 
diff -r d9cbbb07c55c -r 6dc7b095171a mpn/generic/hgcd_appr.c
--- a/mpn/generic/hgcd_appr.c	Fri Oct 07 15:35:38 2011 +0200
+++ b/mpn/generic/hgcd_appr.c	Sat Oct 08 20:15:06 2011 +0200
@@ -29,13 +29,6 @@
 
 #define TRACE 0
 
-#define MPN_BITCOUNT(bits, n, high) do {			\
-    int __cnt;							\
-    ASSERT ((n) > 0);						\
-    count_leading_zeros (__cnt, (high));			\
-    (bits) = (n) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);	\
-  } while (0)
-
 /* Scratch need: Copies of inputs, and n limbs for hgcd_appr_step,
    total 3*n limbs. */
 mp_size_t
@@ -48,7 +41,6 @@
 mpn_hgcd_appr (mp_srcptr ap, mp_srcptr bp, mp_size_t n,
 	       struct hgcd_matrix *M, mp_ptr tp)
 {
-  mp_bitcnt_t nbits;
   unsigned extra_bits;
   mp_size_t s;
   mp_limb_t mask;
@@ -61,20 +53,18 @@
 
   ASSERT (mask > 0);
 
-  MPN_BITCOUNT (nbits, n, mask);
-
-  /* We aim for reduction of to size sbits, where sbits = nbits/2 + 1
-     = GMP_NUMB_BITS * s - extra_bits. We keep track of extra_bits
-     (and implicitly sbits) to decide when to discard the least
-     significant limbs, but we can't actually reduce to size less then
-     s limbs, since that may give matrix entries exceeding n-s limb
-     and overflow the current allocation for the matrix entries. */
+  if (n <= 2)
+    /* Implies s = n. A fairly uninteresting case but exercised by the
+       random inputs of the testsuite. */
+    return 0;
+    
+  /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
+     we discard some of the least significant limbs, we must keep one
+     additional bit to account for the truncation error. We maintain
+     the GMP_NUMB_BITS * s - extra_bits as the current target size. */
      
   s = n/2 + 1;
-  extra_bits = GMP_NUMB_BITS * s - (nbits/2 + 1);
-
-  ASSERT (extra_bits >= GMP_NUMB_BITS/2 - 1);
-  ASSERT (extra_bits < 3*GMP_NUMB_BITS/2);
+  extra_bits = 0;
 
   /* FIXME: Could avoid copying now, and do it when applying the first
      hgcd2 matrix. */
@@ -83,8 +73,8 @@
   MPN_COPY (vp, bp, n);
 
 #if TRACE
-  fprintf (stderr, "hgcd_appr: In: n = %u, s = %u, nbits = %u, extra = %u\n",
-	   (unsigned) n, (unsigned) s, (unsigned) nbits, (unsigned) extra_bits);
+  fprintf (stderr, "hgcd_appr: In: n = %u, s = %u\n",
+	   (unsigned) n, (unsigned) s);
 #endif
   while (n > 2)
     {
@@ -94,10 +84,11 @@
       fprintf (stderr, "loop: n = %u\n", (unsigned) n);
 #endif
       ASSERT (n > s);
+      ASSERT (n <= 2*s);
 
       nn = mpn_hgcd_step (n, up, vp, s, M, tp);
       if (!nn)
-	return success;
+	break;
 
       n = nn;
       success = 1;
@@ -111,10 +102,6 @@
 	 rather than just sbits <-- sbits - p. This adjustment makes
 	 the produced matrix sligthly smaller than it could be. */
 
-      /* FIXME: Simplify condition. For a start, do we always have
-
-	   2*s > n?
-      */
       if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
 	{
 	  mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
@@ -134,8 +121,6 @@
 		  || mpn_zero_p (vp + s + 1, n - s - 1))
 		continue;
 
-	      /* FIXME: We should arrange to reduce by extra_bits at
-		 the end, before returning. */
 	      extra_bits = GMP_NUMB_BITS - 1;
 	      s++;
 	    }
@@ -160,15 +145,61 @@
 	   (unsigned) n, (unsigned) s, (unsigned) extra_bits);
 #endif
 
-  if (n == 2 && s == 1)
+  if (extra_bits > 0)
+    {
+      /* We can get here only of we have dropped at least one of the
+	 least significant bits, so we can decrement up and vp. We can
+	 then shift left extra bits using mpn_shiftr. */
+      /* NOTE: In the unlikely case that n is large, it would be
+	 preferable to do an initial subdiv step to reduce the size
+	 before shifting, but that would mean duplicating
+	 mpn_gcd_subdiv_step with a bit count rather than a limb
+	 count. */
+      up--; vp--;
+      up[0] = mpn_rshift (up+1, up+1, n, GMP_NUMB_BITS - extra_bits);
+      vp[0] = mpn_rshift (vp+1, vp+1, n, GMP_NUMB_BITS - extra_bits);
+      n += (up[n] | vp[n]) > 0;
+
+      ASSERT (success);
+
+      while (n > 2)
+	{
+	  mp_size_t nn;
+
+	  ASSERT (n > s);
+	  ASSERT (n <= 2*s);
+
+#if TRACE
+      fprintf (stderr, "extra loop: n = %u\n", (unsigned) n);
+#endif
+	  nn = mpn_hgcd_step (n, up, vp, s, M, tp);
+#if TRACE
+	  fprintf (stderr, "extra bit reduction: %s\n",
+		   nn ? "success" : "fail");
+#endif
+	  
+	  if (!nn)
+	    return 1;
+
+	  n = nn;
+	}
+    }
+
+  if (n == 2)
     {
       struct hgcd_matrix1 M1;
+      ASSERT (s == 1);
+
       if (mpn_hgcd2 (up[1], up[0], vp[1], vp[0], &M1))
 	{
+#if TRACE
+	  fprintf (stderr, "final hgcd2 succeeded\n");
+#endif
 	  /* Multiply M <- M * M1 */
 	  mpn_hgcd_matrix_mul_1 (M, &M1, tp);
 	  success = 1;
 	}
     }
+
   return success;
 }
diff -r d9cbbb07c55c -r 6dc7b095171a tests/mpn/t-hgcd_appr.c
--- a/tests/mpn/t-hgcd_appr.c	Fri Oct 07 15:35:38 2011 +0200
+++ b/tests/mpn/t-hgcd_appr.c	Sat Oct 08 20:15:06 2011 +0200
@@ -492,41 +492,17 @@
       return 0;
     }
 
+  /* We lose one bit each time we discard the least significant limbs.
+     That can happen at most s * (GMP_NUMB_BITS) / (GMP_NUMB_BITS - 1)
+     times. */
+
+  margin = (n/2+1) * GMP_NUMB_BITS / (GMP_NUMB_BITS - 1);
+
   if (abits > dbits)
-    fprintf (stderr, "n = %u: sbits = %u: ref #(r0-r1): %u, appr #(r0-r1): %u excess: %d\n",
+    fprintf (stderr, "n = %u: sbits = %u: ref #(r0-r1): %u, appr #(r0-r1): %u excess: %d, margin: %u\n",
 	     (unsigned) n, (unsigned) s*GMP_NUMB_BITS,
-	     (unsigned) dbits, (unsigned) abits, (int) abits - s * GMP_NUMB_BITS);
-
-    
-  /* We discard the low limb at most about n/2 times. Each time we
-     lose one bit, but we start with at least GMP_NUMB_BITS/2 - 1
-     extra bits. After consuming these, the number of lost bits are
-     rounded up to a limb.
-     
-     FIXME: Currently, it seems we some times get a bit
-     larger abits than this, so either the analysis or the
-     implementation gets something wrong. */
-
-  /* problematic cases:
-
-       Seed GMP_CHECK_RANDOMIZE=682513484 (include this in bug reports)
-       n = 881: sbits = 28224: ref #(r0-r1): 27999, appr #(r0-r1): 32264
-       appr |r0 - r1| much larger than minimal (by 4040 bits, margin = 1762 bits)
-
-       Seed GMP_CHECK_RANDOMIZE=1469435584 (include this in bug reports)
-       n = 465: sbits = 14912: ref #(r0-r1): 14066, appr #(r0-r1): 17991
-       appr |r0 - r1| much larger than minimal (by 3079 bits, margin = 930 bits)
-
-       Seed GMP_CHECK_RANDOMIZE=2485812869 (include this in bug reports)
-       n = 77: sbits = 2496: ref #(r0-r1): 2495, appr #(r0-r1): 2554 excess: 58
-       appr |r0 - r1| much larger than minimal (by 58 bits, margin = 6 bits)
-
-       
-  */
-  if (n < GMP_NUMB_BITS)
-    margin = 0;
-  else
-    margin = n/2 + GMP_NUMB_BITS/2;
+	     (unsigned) dbits, (unsigned) abits,
+	     (int) abits - s * GMP_NUMB_BITS, (unsigned) margin);
 
   if (abits > s*GMP_NUMB_BITS + margin)
     {