[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Dec 22 01:48:03 CET 2009


details:   /home/hgfiles/gmp/rev/49c36e32d2ae
changeset: 13171:49c36e32d2ae
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Dec 22 01:42:14 2009 +0100
description:
Simplify, trim allocation.

details:   /home/hgfiles/gmp/rev/9c995e3f6f20
changeset: 13172:9c995e3f6f20
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Dec 22 01:46:02 2009 +0100
description:
Rewrite resp simplify itch functions.

diffstat:

 ChangeLog                |   8 ++++++++
 mpn/generic/bdiv_qr.c    |  21 +++++----------------
 mpn/generic/mu_bdiv_q.c  |  46 ++++++++++++++++++++++++++++++++++------------
 mpn/generic/mu_bdiv_qr.c |  39 ++++++++++++++++++---------------------
 4 files changed, 65 insertions(+), 49 deletions(-)

diffs (219 lines):

diff -r b75c1e49113f -r 9c995e3f6f20 ChangeLog
--- a/ChangeLog	Mon Dec 21 22:09:09 2009 +0100
+++ b/ChangeLog	Tue Dec 22 01:46:02 2009 +0100
@@ -1,3 +1,11 @@
+2009-12-22  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Rewrite.
+	* mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Simplify.
+
+	* mpn/generic/bdiv_qr.c (mpn_bdiv_qr): Simplify, don't allocate.
+	(mpn_bdiv_qr_itch): Conditionalise on MU_BDIV_QR_THRESHOLD.
+
 2009-12-21  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/sbpi1_div_q.c: Fix fixup code for to work for qn = 0.
diff -r b75c1e49113f -r 9c995e3f6f20 mpn/generic/bdiv_qr.c
--- a/mpn/generic/bdiv_qr.c	Mon Dec 21 22:09:09 2009 +0100
+++ b/mpn/generic/bdiv_qr.c	Tue Dec 22 01:46:02 2009 +0100
@@ -44,7 +44,6 @@
       BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
     {
       MPN_COPY (tp, np, nn);
-      tp[nn] = 0;
       binvert_limb (di, dp[0]);  di = -di;
       rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
       MPN_COPY (rp, tp + nn - dn, dn);
@@ -52,23 +51,13 @@
   else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
     {
       MPN_COPY (tp, np, nn);
-      tp[nn] = 0;
       binvert_limb (di, dp[0]);  di = -di;
       rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
       MPN_COPY (rp, tp + nn - dn, dn);
     }
   else
     {
-      mp_size_t itch;
-      mp_ptr scratch_out;
-      TMP_DECL;
-      TMP_MARK;
-      itch = mpn_mu_bdiv_qr_itch (nn, dn);
-      scratch_out = TMP_BALLOC_LIMBS (itch);
-      MPN_COPY (tp, np, nn);
-      tp[nn] = 0;
-      rh = mpn_mu_bdiv_qr (qp, rp, tp, nn, dp, dn, scratch_out);
-      TMP_FREE;
+      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
     }
 
   return rh;
@@ -77,8 +66,8 @@
 mp_size_t
 mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
 {
-  mp_size_t itch;
-  itch = mpn_mu_bdiv_qr_itch (nn, dn);
-  itch = MAX (itch, nn + 1);
-  return itch;
+  if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+    return nn;
+  else
+    return  mpn_mu_bdiv_qr_itch (nn, dn);
 }
diff -r b75c1e49113f -r 9c995e3f6f20 mpn/generic/mu_bdiv_q.c
--- a/mpn/generic/mu_bdiv_q.c	Mon Dec 21 22:09:09 2009 +0100
+++ b/mpn/generic/mu_bdiv_q.c	Tue Dec 22 01:46:02 2009 +0100
@@ -101,12 +101,11 @@
 	 should reduce itch to perhaps 3dn.
        */
 
-      ip = scratch;			/* in limbs */
-      rp = scratch + in;		/* dn limbs */
-      tp = scratch + in + dn;		/* dn + in limbs FIXME: mpn_fft_next_size */
-      scratch += in;			/* Roughly 2in+1 limbs */
+      ip = scratch;		/* in limbs */
+      rp = scratch + in;	/* MAX(binvert_itch(in),dn) limbs */
+      tp = scratch + in + dn;	/* MAX(next_size(dn,k),dn+in) limbs */
 
-      mpn_binvert (ip, dp, in, scratch);
+      mpn_binvert (ip, dp, in, rp);
 
       cy = 0;
 
@@ -209,11 +208,10 @@
       /* Compute half-sized inverse.  */
       in = qn - (qn >> 1);
 
-      ip = scratch;			/* ceil(qn/2) limbs */
-      rp = scratch + in;		/* ceil(qn/2)+qn limbs */
-      scratch += in;			/* 2*ceil(qn/2)+2 */
+      ip = scratch;		/* in limbs */
+      rp = scratch + in;	/* MAX(binvert_itch(in),next_size(qn,k),qn+in) limbs */
 
-      mpn_binvert (ip, dp, in, scratch);
+      mpn_binvert (ip, dp, in, rp);
 
       mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
 #if WANT_FFT
@@ -240,16 +238,40 @@
 mp_size_t
 mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
 {
-  mp_size_t qn;
+  mp_size_t qn, in, m, itch_invert;
+  mp_size_t b;
 
   qn = nn;
 
   if (qn > dn)
     {
-      return 4 * dn;		/* FIXME FIXME FIXME need mpn_fft_next_size */
+      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+        {
+          m = mpn_fft_next_size (dn, mpn_fft_best_k (dn, 0));
+          m = MAX (dn + in, m);
+        }
+      else
+#endif
+	m = dn + in;
+      itch_invert = mpn_binvert_itch (in);
+      return in + MAX (itch_invert, dn + m);
     }
   else
     {
-      return 2 * qn + 1 + 2;	/* FIXME FIXME FIXME need mpn_fft_next_size */
+      in = qn - (qn >> 1);
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (qn, MUL_FFT_MODF_THRESHOLD))
+        {
+          m = mpn_fft_next_size (qn, mpn_fft_best_k (qn, 0));
+          m = MAX (qn + in, m);
+        }
+      else
+#endif
+        m = qn + in;
+      itch_invert = mpn_binvert_itch (in);
+      return in + MAX (itch_invert, m);
     }
 }
diff -r b75c1e49113f -r 9c995e3f6f20 mpn/generic/mu_bdiv_qr.c
--- a/mpn/generic/mu_bdiv_qr.c	Mon Dec 21 22:09:09 2009 +0100
+++ b/mpn/generic/mu_bdiv_qr.c	Tue Dec 22 01:46:02 2009 +0100
@@ -100,11 +100,10 @@
 	 should reduce itch to perhaps 3dn.
        */
 
-      ip = scratch;			/* in limbs */
-      tp = scratch + in;		/* dn + in limbs FIXME: mpn_fft_next_size */
-      scratch += in;			/* Roughly 2in+1 limbs */
+      ip = scratch;		/* in limbs */
+      tp = scratch + in;	/* MAX(binvert_itch(in),next_size(dn,k),dn+in) limbs */
 
-      mpn_binvert (ip, dp, in, scratch);
+      mpn_binvert (ip, dp, in, tp);
 
       MPN_COPY (rp, np, dn);
       np += dn;
@@ -205,11 +204,10 @@
       /* Compute half-sized inverse.  */
       in = qn - (qn >> 1);
 
-      ip = scratch;			/* ceil(qn/2) limbs */
-      tp = scratch + in;		/* dn + in limbs FIXME: mpn_fft_next_size */
-      scratch += in;			/* 2*ceil(qn/2)+2 */
+      ip = scratch;		/* in limbs */
+      tp = scratch + in;	/* MAX(binvert_itch(in),next_size(dn,k),dn+in) limbs */
 
-      mpn_binvert (ip, dp, in, scratch);
+      mpn_binvert (ip, dp, in, tp);
 
       mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
 #if WANT_FFT
@@ -269,7 +267,7 @@
 mp_size_t
 mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
 {
-  mp_size_t qn, in, m, itch_invert, itch;
+  mp_size_t qn, in, m, itch_invert;
   mp_size_t b;
 
   qn = nn - dn;
@@ -278,22 +276,21 @@
     {
       b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
       in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-      return in + mpn_binvert_itch (in);
     }
   else
     {
       in = qn - (qn >> 1);
+    }
+
 #if WANT_FFT
-      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
-	{
-	  m = mpn_fft_next_size (dn, mpn_fft_best_k (dn, 0));
-	  m = MAX (dn + in, m);
-	}
-      else
+  if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+    {
+      m = mpn_fft_next_size (dn, mpn_fft_best_k (dn, 0));
+      m = MAX (dn + in, m);
+    }
+  else
 #endif
-	m = dn + in;
-      itch_invert = mpn_binvert_itch (in);
-      itch = MAX (itch_invert, m);
-      return in + itch;
-    }
+    m = dn + in;
+  itch_invert = mpn_binvert_itch (in);
+  return in + MAX (itch_invert, m);
 }


More information about the gmp-commit mailing list