[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

Wed Jan 6 18:45:02 CET 2010

details:   /home/hgfiles/gmp/rev/0565206e7a2e
changeset: 13331:0565206e7a2e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jan 06 18:43:50 2010 +0100
description:
Properly handle np=scratch.  Fix critical typo.

details:   /home/hgfiles/gmp/rev/745b770b85b2
changeset: 13332:745b770b85b2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jan 06 18:44:56 2010 +0100
description:
Use mpn_div_q.

diffstat:

 ChangeLog             |  10 ++++++-
 mpn/generic/div_q.c   |  50 +++++++++++++++++++++++++-----------
 mpn/generic/rootrem.c |  69 +++++++++-----------------------------------------
 mpz/tdiv_q.c          |  22 ++++++++--------
 4 files changed, 67 insertions(+), 84 deletions(-)

diffs (truncated from 319 to 300 lines):

diff -r b18efb1708be -r 745b770b85b2 ChangeLog

--- a/ChangeLog	Wed Jan 06 09:53:32 2010 +0100
+++ b/ChangeLog	Wed Jan 06 18:44:56 2010 +0100
@@ -1,6 +1,14 @@
+2010-01-06  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/div_q.c: Properly handle np=scratch.  Fix critical typo
+	in final adjustment code.  Misc cleanups.
+
+	* mpn/generic/rootrem.c: Use mpn_div_q.
+	* mpz/tdiv_q.c: Likewise.
+
 2010-01-06 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
-	* mpn/generic/toom8h_mul.c: Avoid overlflows of mp_size_t.
+	* mpn/generic/toom8h_mul.c: Avoid overflows of mp_size_t.
 
 2010-01-06  Torbjorn Granlund  <tege at gmplib.org>
 
diff -r b18efb1708be -r 745b770b85b2 mpn/generic/div_q.c
--- a/mpn/generic/div_q.c	Wed Jan 06 09:53:32 2010 +0100
+++ b/mpn/generic/div_q.c	Wed Jan 06 18:44:56 2010 +0100
@@ -43,27 +43,41 @@
      No overlap between the N, D, and Q areas.
 
    This division function does not clobber its input operands, since it is
-   intended to support average-O(qn) divison, and for that to be effective, it
+   intended to support average-O(qn) division, and for that to be effective, it
    cannot put requirements on callers to copy a O(nn) operand.
 
    If a caller does not care about the value of {np,nn+1} after calling this
    function, it should pass np also for the scratch argument.  This function
    will then save some time and space by avoiding allocation and copying.
-   (FIXME: Is this a good design?)
+   (FIXME: Is this a good design?  We only really save any copying for
+   already-normalised divisors, which should be rare.  It also prevents us from
+   reasonably asking for all scratch space we need.)
 
    We write nn-dn+1 limbs for the quotient, but return void.  Why not return
    the most significant quotient limb?  Look at the 4 main code blocks below
    (consisting of an outer if-else where each arm contains an if-else). It is
    tricky for the first code block, since the mpn_*_div_q calls will typically
-   generate all nn-dn+1 and return 0.  I don't see how to fix that unless we
-   generate the most significant quotient limb here, before calling
-   mpn_*_div_q.
+   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
+   we generate the most significant quotient limb here, before calling
+   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
+   critical division case (the SB sub-case in particular) copying is not a good
+   idea.
+
+   It might make sense to split the if-else parts of the (qn + FUDGE
+   >= dn) blocks into separate functions, since we could promise quite
+   different things to callers in these two cases.  The 'then' case
+   benefits from np=scratch, and it could perhaps even tolerate qp=np,
+   saving some headache for many callers.
+
+   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
+   operands, we do not reuse the huge scratch for adjustments.  This can be a
+   serious waste of memory for the largest operands.
 */
 
 /* FUDGE determines when to try getting an approximate quotient from the upper
    parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
    for the code to be correct.  */
-#define FUDGE 2
+#define FUDGE 5			/* FIXME: tune this */
 
 #define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
 #define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
@@ -106,18 +120,18 @@
     {
       /* |________________________|
                           |_______|  */
+      new_np = scratch;
+
       dh = dp[dn - 1];
       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
 	{
 	  count_leading_zeros (cnt, dh);
 
-	  new_np = scratch;
-	  new_dp = TMP_ALLOC_LIMBS (dn);
-
 	  cy = mpn_lshift (new_np, np, nn, cnt);
 	  new_np[nn] = cy;
 	  new_nn = nn + (cy != 0);
 
+	  new_dp = TMP_ALLOC_LIMBS (dn);
 	  mpn_lshift (new_dp, dp, dn, cnt);
 
 	  if (dn == 2)
@@ -148,7 +162,6 @@
 	}
       else  /* divisior is already normalised */
 	{
-	  new_np = scratch;
 	  if (new_np != np)
 	    MPN_COPY (new_np, np, nn);
 
@@ -184,19 +197,24 @@
                 |_________________|  */
       tp = TMP_ALLOC_LIMBS (qn + 1);
 
+      new_np = scratch;
+      new_nn = 2 * qn + 1;
+      if (new_np == np)
+	/* We need {np,nn} to remain untouched until the final adjustment, so
+	   we need to allocate separate space for new_np.  */
+	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
+
       dh = dp[dn - 1];
       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
 	{
 	  count_leading_zeros (cnt, dh);
 
-	  new_np = scratch;
-	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
-	  new_nn = 2 * qn + 1;
-
 	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
 	  new_np[new_nn] = cy;
+
 	  new_nn += (cy != 0);
 
+	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
 	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
 	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
 
@@ -258,11 +276,11 @@
 	}
 
       MPN_COPY (qp, tp + 1, qn);
-      if (tp[0] <= 2)
+      if (tp[0] <= 4)
         {
 	  mp_size_t rn;
 
-          rp = scratch;
+          rp = TMP_ALLOC_LIMBS (dn + qn);
           mpn_mul (rp, dp, dn, tp + 1, qn);
 	  rn = dn + qn;
 	  rn -= rp[rn - 1] == 0;
diff -r b18efb1708be -r 745b770b85b2 mpn/generic/rootrem.c
--- a/mpn/generic/rootrem.c	Wed Jan 06 09:53:32 2010 +0100
+++ b/mpn/generic/rootrem.c	Wed Jan 06 18:44:56 2010 +0100
@@ -8,7 +8,7 @@
    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -26,11 +26,8 @@
 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* FIXME:
-   (a) Once there is a native mpn_tdiv_q function in GMP (division without
-       remainder), replace the quick-and-dirty implementation below by it.
-   (b) The implementation below is not optimal when remp == NULL, since the
-       complexity is M(n) where n is the input size, whereas it should be
-       only M(n/k) on average.
+     This implementation is not optimal when remp == NULL, since the complexity
+     is M(n), whereas it should be M(n/k) on average.
 */
 
 #include <stdio.h>		/* for NULL */
@@ -41,8 +38,6 @@
 
 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
 				       mp_limb_t, int);
-static void mpn_tdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t,
-			mp_srcptr, mp_size_t);
 
 #define MPN_RSHIFT(cy,rp,up,un,cnt) \
   do {									\
@@ -124,7 +119,7 @@
 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 		      mp_limb_t k, int approx)
 {
-  mp_ptr qp, rp, sp, wp;
+  mp_ptr qp, rp, sp, wp, scratch;
   mp_size_t qn, rn, sn, wn, nl, bn;
   mp_limb_t save, save2, cy;
   unsigned long int unb; /* number of significant bits of {up,un} */
@@ -150,9 +145,15 @@
   qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
 					of R/(k*S^(k-1)), and S^k */
   if (remp == NULL)
-    rp = TMP_ALLOC_LIMBS (un);     /* will contain the remainder */
+    {
+      rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
+      scratch = rp;			 /* used by mpn_div_q */
+    }
   else
-    rp = remp;
+    {
+      scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
+      rp = remp;
+    }
   sp = rootp;
   wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
 					and temporary for mpn_pow_1 */
@@ -297,7 +298,7 @@
 	     The quotient needs rn-wn+1 limbs, thus quotient+remainder
 	     need altogether rn+1 limbs. */
 	  tp = qp + qn + 1;	/* put remainder in Q buffer */
-	  mpn_tdiv_q (qp, tp, 0, rp, rn, wp, wn);
+	  mpn_div_q (qp, rp, rn, wp, wn, scratch);
 	  qn += qp[qn] != 0;
 	}
 
@@ -405,47 +406,3 @@
   TMP_FREE;
   return rn;
 }
-
-/* return the quotient Q = {np, nn} divided by {dp, dn} only */
-static void
-mpn_tdiv_q (mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_srcptr np, mp_size_t nn,
-	    mp_srcptr dp, mp_size_t dn)
-{
-  mp_size_t qn = nn - dn; /* expected quotient size is qn+1 */
-  mp_size_t cut;
-
-  ASSERT_ALWAYS (qxn == 0);
-  if (dn <= qn + 3)
-    {
-      mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn);
-    }
-  else
-    {
-      mp_ptr tp;
-      TMP_DECL;
-      TMP_MARK;
-      tp = TMP_ALLOC_LIMBS (qn + 2);
-      cut = dn - (qn + 3);
-      /* perform a first division with divisor cut to dn-cut=qn+3 limbs
-	 and dividend to nn-(cut-1) limbs, i.e. the quotient will be one
-	 limb more than the final quotient.
-	 The quotient will have qn+2 < dn-cut limbs,
-	 and the remainder dn-cut = qn+3 limbs. */
-      mpn_tdiv_qr (tp, rp, 0, np + cut - 1, nn - cut + 1, dp + cut, dn - cut);
-      /* let Q' be the quotient of B * {np, nn} by {dp, dn} [qn+2 limbs]
-	 and T  be the approximation of Q' computed above, where
-	 B = 2^GMP_NUMB_BITS.
-	 We have Q' <= T <= Q'+1, and since floor(Q'/B) = Q, we have
-	 Q = floor(T/B), unless the last limb of T only consists of zeroes. */
-      if (tp[0] != 0)
-	{
-	  /* simply truncate one limb of T */
-	  MPN_COPY (qp, tp + 1, qn + 1);
-	}
-      else /* too bad: perform the expensive division */
-	{
-	  mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn);
-	}
-      TMP_FREE;
-    }
-}
diff -r b18efb1708be -r 745b770b85b2 mpz/tdiv_q.c
--- a/mpz/tdiv_q.c	Wed Jan 06 09:53:32 2010 +0100
+++ b/mpz/tdiv_q.c	Wed Jan 06 18:44:56 2010 +0100
@@ -1,6 +1,6 @@
 /* mpz_tdiv_q -- divide two integers and produce a quotient.
 
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2010 Free Software Foundation,
 Inc.
 
 This file is part of the GNU MP Library.
@@ -27,7 +27,7 @@
 {
   mp_size_t ql;
   mp_size_t ns, ds, nl, dl;
-  mp_ptr np, dp, qp, rp;
+  mp_ptr np, dp, qp;
   TMP_DECL;
 
   ns = SIZ (num);
@@ -49,14 +49,9 @@
 
   TMP_MARK;
   qp = PTR (quot);
-  rp = TMP_ALLOC_LIMBS (dl);
   np = PTR (num);
   dp = PTR (den);
 
-  /* FIXME: We should think about how to handle the temporary allocation.
-     Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
-     allocate temp space.  */
-
   /* Copy denominator to temporary space if it overlaps with the quotient.  */
   if (dp == qp)
     {
@@ -69,12 +64,17 @@
   if (np == qp)
     {