[Gmp-commit] /home/hgfiles/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Dec 21 22:00:31 CET 2009


details:   /home/hgfiles/gmp/rev/eb85af8f7f2b
changeset: 13165:eb85af8f7f2b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 21 20:12:43 2009 +0100
description:
Remove a spurious mpn_sub_1.

details:   /home/hgfiles/gmp/rev/3df646ab3b85
changeset: 13166:3df646ab3b85
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 21 20:14:31 2009 +0100
description:
Trivial merge.

details:   /home/hgfiles/gmp/rev/fb0247ca2316
changeset: 13167:fb0247ca2316
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 21 21:57:23 2009 +0100
description:
Make mpn_dcpi1_div* work for all intended operands.

details:   /home/hgfiles/gmp/rev/a801906739e5
changeset: 13168:a801906739e5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 21 22:00:07 2009 +0100
description:
Merge: Make mpn_dcpi1_div* work for all intended operands.

diffstat:

 ChangeLog                     |  21 ++++++++-
 gmp-impl.h                    |  27 +++++++++++-
 mpn/generic/dcpi1_bdiv_q.c    |   1 -
 mpn/generic/dcpi1_div_q.c     |   4 +-
 mpn/generic/dcpi1_div_qr.c    |  22 ++-------
 mpn/generic/dcpi1_divappr_q.c |  95 ++++++++++++++++++++++++++++++++++--------
 mpn/generic/toom6h_mul.c      |  22 +--------
 tune/common.c                 |  10 ++++
 tune/speed.c                  |   2 +
 tune/speed.h                  |  13 +++++
 10 files changed, 157 insertions(+), 60 deletions(-)

diffs (truncated from 430 to 300 lines):

diff -r bb6e89b2f60d -r a801906739e5 ChangeLog
--- a/ChangeLog	Mon Dec 21 18:04:06 2009 +0100
+++ b/ChangeLog	Mon Dec 21 22:00:07 2009 +0100
@@ -1,13 +1,21 @@
 2009-12-21  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/generic/dcpi1_divappr_q.c: Handle qn = 1 and qn = 2 for initial
+	quotient block (code block copied from dcpi1_div_qr.c).
+
+	* mpn/generic/dcpi1_div_qr.c: Rewrite singular case giving q limb of
+	GMP_NUMB_MAX.  Remove an impossible qn = 0 case.
+
+	* mpn/generic/dcpi1_bdiv_q.c: Remove a spurious mpn_sub_1.
+
 	* mpn/generic/mul.c: Put back call to mpn_mul_n.
 
 	* tune/tuneup.c (all): Call tune_mulmod_bnm1 before tuning fft due to
 	dependency on mulmod_bnm1 from both mul_fft_mul and from mullo_n.
 
-	* mpn/generic/dcpi1_divappr_q.c: Add parameter ASSERTs.
-	* mpn/generic/dcpi1_div_q.c: Likewise.
-	* mpn/generic/dcpi1_div_qr.c: Change ASSERT to enforce dn >= 4.
+	* mpn/generic/dcpi1_divappr_q.c: ASSERT that dn >= 6 and nn > dn.
+	* mpn/generic/dcpi1_div_q.c: ASSERT that dn >= 6 and nn-dn >= 3.
+	* mpn/generic/dcpi1_div_qr.c: ASSERT that dn >= 6 and nn-dn >= 3.
 
 	* mpn/generic/bdiv_q_1.c (mpn_pi1_bdiv_q_1): Renamed from
 	mpn_bdiv_q_1_pi1.
@@ -23,6 +31,13 @@
 
 2009-12-21  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
+	* gmp-impl.h (mpn_toom6h_mul_itch): New inline function.
+	(MUL_TOOM6H_THRESHOLD): Default value.
+	(SQR_TOOM6_THRESHOLD): Default value.
+	* mpn/generic/toom6h_mul.c: Remove definitions moved to gmp-impl.h.
+	* tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+	mpn_toom6h_mul and mpn_toom6_sqr speed.
+
 	* mpn/generic/toom63_mul.c: Remove unused TMP_*.
 
 	* mpn/generic/toom_eval_pm2rexp.c: New file.
diff -r bb6e89b2f60d -r a801906739e5 gmp-impl.h
--- a/gmp-impl.h	Mon Dec 21 18:04:06 2009 +0100
+++ b/gmp-impl.h	Mon Dec 21 22:00:07 2009 +0100
@@ -1048,6 +1048,9 @@
 #define MPN_TOOM44_MUL_MINSIZE   30
 #define MPN_TOOM4_SQR_MINSIZE    30
 
+#define MPN_TOOM6H_MUL_MINSIZE   46
+#define MPN_TOOM6_SQR_MINSIZE    46
+
 #define MPN_TOOM32_MUL_MINSIZE   10
 #define MPN_TOOM42_MUL_MINSIZE   10
 #define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
@@ -1137,8 +1140,8 @@
 #define   mpn_toom6h_mul __MPN(toom6h_mul)
 __GMP_DECLSPEC void      mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
 
-#define   mpn_toom6h_sqr __MPN(toom6h_sqr)
-__GMP_DECLSPEC void      mpn_toom6h_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_toom6_sqr __MPN(toom6_sqr)
+__GMP_DECLSPEC void      mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 
 #define   mpn_fft_best_k __MPN(fft_best_k)
 __GMP_DECLSPEC int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
@@ -1669,6 +1672,14 @@
 #define MUL_TOOM44_THRESHOLD            300
 #endif
 
+#ifndef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD            350
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
 #endif
@@ -4439,6 +4450,18 @@
 #define mpn_toom4_sqr_itch(an) \
   (3 * (an) + GMP_NUMB_BITS)
 
+#define mpn_toom6_sqr_itch(n)					\
+( ((n) - MUL_TOOM6H_THRESHOLD)*2 +					\
+   MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,			\
+       mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 10 + 1;
+  return mpn_toom6_sqr_itch( estimatedN * 6 );
+}
+
 static inline mp_size_t
 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
 {
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_bdiv_q.c
--- a/mpn/generic/dcpi1_bdiv_q.c	Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_bdiv_q.c	Mon Dec 21 22:00:07 2009 +0100
@@ -135,7 +135,6 @@
 	  np += dn;
 	  qn -= dn;
 	}
-      mpn_sub_1 (np + dn, np + dn, qn, cy);
       mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
     }
   else
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_div_q.c
--- a/mpn/generic/dcpi1_div_q.c	Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_div_q.c	Mon Dec 21 22:00:07 2009 +0100
@@ -39,8 +39,8 @@
 
   TMP_MARK;
 
-  ASSERT (dn >= 4);
-  ASSERT (nn > dn);
+  ASSERT (dn >= 6);
+  ASSERT (nn - dn >= 3);
   ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
 
   tp = TMP_SALLOC_LIMBS (nn + 1);
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_div_qr.c
--- a/mpn/generic/dcpi1_div_qr.c	Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_div_qr.c	Mon Dec 21 22:00:07 2009 +0100
@@ -89,8 +89,8 @@
 
   TMP_MARK;
 
-  ASSERT (dn >= 4);
-  ASSERT (nn > dn);
+  ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
+  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
   ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
 
   tp = TMP_SALLOC_LIMBS (dn);
@@ -132,10 +132,9 @@
 
 	  if (UNLIKELY (n2 == d1) && n1 == d0)
 	    {
-	      q = GMP_NUMB_MAX;
-	      cy = mpn_submul_1 (np - dn, dp, dn, q);
-	      ASSERT (cy == n1);
-	      ASSERT (np[-1] <= d1);
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+	      ASSERT (cy == n2);
 	    }
 	  else
 	    {
@@ -170,7 +169,7 @@
 	{
 	  /* Do a 2qn / qn division */
 	  if (qn == 2)
-	    qh = mpn_divrem_2 (qp, 0L, np - qn, 4, dp - qn); /* FIXME: obsolete function. Use 5/3 division? */
+	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
 	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
 	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
 	  else
@@ -207,15 +206,6 @@
     }
   else
     {
-      if (qn == 0)
-	{
-	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
-	  if (qh)
-	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
-	  TMP_FREE;
-	  return qh;
-	}
-
       qp -= qn;			/* point at low limb of next quotient block */
       np -= qn;			/* point in the middle of partial remainder */
 
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_divappr_q.c
--- a/mpn/generic/dcpi1_divappr_q.c	Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_divappr_q.c	Mon Dec 21 22:00:07 2009 +0100
@@ -26,6 +26,7 @@
 
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
 
 
 mp_limb_t
@@ -81,7 +82,7 @@
 
   TMP_MARK;
 
-  ASSERT (dn >= 4);
+  ASSERT (dn >= 6);
   ASSERT (nn > dn);
   ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
 
@@ -104,29 +105,87 @@
       np -= qn;			/* point in the middle of partial remainder */
 
       /* Perform the typically smaller block first.  */
-      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+      if (qn == 1)
+	{
+	  mp_limb_t q, n2, n1, n0, d1, d0;
+
+	  /* Handle qh up front, for simplicity. */
+	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+	  if (qh)
+	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+	  /* A single iteration of schoolbook: One 3/2 division,
+	     followed by the bignum update and adjustment. */
+	  n2 = np[0];
+	  n1 = np[-1];
+	  n0 = np[-2];
+	  d1 = dp[-1];
+	  d0 = dp[-2];
+
+	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+	  if (UNLIKELY (n2 == d1) && n1 == d0)
+	    {
+	      q = GMP_NUMB_MASK;
+	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+	      ASSERT (cy == n2);
+	    }
+	  else
+	    {
+	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+	      if (dn > 2)
+		{
+		  mp_limb_t cy, cy1;
+		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+		  cy1 = n0 < cy;
+		  n0 = (n0 - cy) & GMP_NUMB_MASK;
+		  cy = n1 < cy1;
+		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
+		  np[-2] = n0;
+
+		  if (UNLIKELY (cy != 0))
+		    {
+		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+		      qh -= (q == 0);
+		      q = (q - 1) & GMP_NUMB_MASK;
+		    }
+		}
+	      else
+		np[-2] = n0;
+
+	      np[-1] = n1;
+	    }
+	  qp[0] = q;
+	}
       else
-	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+	{
+	  if (qn == 2)
+	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
+	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+	  else
+	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
 
-      if (qn != dn)
-	{
-	  if (qn > dn - qn)
-	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
-	  else
-	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+	  if (qn != dn)
+	    {
+	      if (qn > dn - qn)
+		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	      else
+		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
 
-	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
-	  if (qh != 0)
-	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	      if (qh != 0)
+		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
 
-	  while (cy != 0)
-	    {
-	      qh -= mpn_sub_1 (qp, qp, qn, 1);
-	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	      while (cy != 0)
+		{
+		  qh -= mpn_sub_1 (qp, qp, qn, 1);
+		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+		}
 	    }
 	}
-


More information about the gmp-commit mailing list