[Gmp-commit] /home/hgfiles/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Dec 21 22:00:31 CET 2009
details: /home/hgfiles/gmp/rev/eb85af8f7f2b
changeset: 13165:eb85af8f7f2b
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 21 20:12:43 2009 +0100
description:
Remove a spurious mpn_sub_1.
details: /home/hgfiles/gmp/rev/3df646ab3b85
changeset: 13166:3df646ab3b85
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 21 20:14:31 2009 +0100
description:
Trivial merge.
details: /home/hgfiles/gmp/rev/fb0247ca2316
changeset: 13167:fb0247ca2316
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 21 21:57:23 2009 +0100
description:
Make mpn_dcpi1_div* work for all intended operands.
details: /home/hgfiles/gmp/rev/a801906739e5
changeset: 13168:a801906739e5
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 21 22:00:07 2009 +0100
description:
Merge: Make mpn_dcpi1_div* work for all intended operands.
diffstat:
ChangeLog | 21 ++++++++-
gmp-impl.h | 27 +++++++++++-
mpn/generic/dcpi1_bdiv_q.c | 1 -
mpn/generic/dcpi1_div_q.c | 4 +-
mpn/generic/dcpi1_div_qr.c | 22 ++-------
mpn/generic/dcpi1_divappr_q.c | 95 ++++++++++++++++++++++++++++++++++--------
mpn/generic/toom6h_mul.c | 22 +--------
tune/common.c | 10 ++++
tune/speed.c | 2 +
tune/speed.h | 13 +++++
10 files changed, 157 insertions(+), 60 deletions(-)
diffs (truncated from 430 to 300 lines):
diff -r bb6e89b2f60d -r a801906739e5 ChangeLog
--- a/ChangeLog Mon Dec 21 18:04:06 2009 +0100
+++ b/ChangeLog Mon Dec 21 22:00:07 2009 +0100
@@ -1,13 +1,21 @@
2009-12-21 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/generic/dcpi1_divappr_q.c: Handle qn = 1 and qn = 2 for initial
+ quotient block (code block copied from dcpi1_div_qr.c).
+
+ * mpn/generic/dcpi1_div_qr.c: Rewrite singular case giving q limb of
+ GMP_NUMB_MAX. Remove an impossible qn = 0 case.
+
+ * mpn/generic/dcpi1_bdiv_q.c: Remove a spurious mpn_sub_1.
+
* mpn/generic/mul.c: Put back call to mpn_mul_n.
* tune/tuneup.c (all): Call tune_mulmod_bnm1 before tuning fft due to
dependency on mulmod_bnm1 from both mul_fft_mul and from mullo_n.
- * mpn/generic/dcpi1_divappr_q.c: Add parameter ASSERTs.
- * mpn/generic/dcpi1_div_q.c: Likewise.
- * mpn/generic/dcpi1_div_qr.c: Change ASSERT to enforce dn >= 4.
+ * mpn/generic/dcpi1_divappr_q.c: ASSERT that dn >= 6 and nn > dn.
+ * mpn/generic/dcpi1_div_q.c: ASSERT that dn >= 6 and nn-dn >= 3.
+ * mpn/generic/dcpi1_div_qr.c: ASSERT that dn >= 6 and nn-dn >= 3.
* mpn/generic/bdiv_q_1.c (mpn_pi1_bdiv_q_1): Renamed from
mpn_bdiv_q_1_pi1.
@@ -23,6 +31,13 @@
2009-12-21 Marco Bodrato <bodrato at mail.dm.unipi.it>
+ * gmp-impl.h (mpn_toom6h_mul_itch): New inline function.
+ (MUL_TOOM6H_THRESHOLD): Default value.
+ (SQR_TOOM6_THRESHOLD): Default value.
+ * mpn/generic/toom6h_mul.c: Remove definitions moved to gmp-impl.h.
+ * tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+ mpn_toom6h_mul and mpn_toom6_sqr speed.
+
* mpn/generic/toom63_mul.c: Remove unused TMP_*.
* mpn/generic/toom_eval_pm2rexp.c: New file.
diff -r bb6e89b2f60d -r a801906739e5 gmp-impl.h
--- a/gmp-impl.h Mon Dec 21 18:04:06 2009 +0100
+++ b/gmp-impl.h Mon Dec 21 22:00:07 2009 +0100
@@ -1048,6 +1048,9 @@
#define MPN_TOOM44_MUL_MINSIZE 30
#define MPN_TOOM4_SQR_MINSIZE 30
+#define MPN_TOOM6H_MUL_MINSIZE 46
+#define MPN_TOOM6_SQR_MINSIZE 46
+
#define MPN_TOOM32_MUL_MINSIZE 10
#define MPN_TOOM42_MUL_MINSIZE 10
#define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */
@@ -1137,8 +1140,8 @@
#define mpn_toom6h_mul __MPN(toom6h_mul)
__GMP_DECLSPEC void mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
-#define mpn_toom6h_sqr __MPN(toom6h_sqr)
-__GMP_DECLSPEC void mpn_toom6h_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_toom6_sqr __MPN(toom6_sqr)
+__GMP_DECLSPEC void mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_fft_best_k __MPN(fft_best_k)
__GMP_DECLSPEC int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
@@ -1669,6 +1672,14 @@
#define MUL_TOOM44_THRESHOLD 300
#endif
+#ifndef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD 350
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
#endif
@@ -4439,6 +4450,18 @@
#define mpn_toom4_sqr_itch(an) \
(3 * (an) + GMP_NUMB_BITS)
+#define mpn_toom6_sqr_itch(n) \
+( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
+ MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
+ mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
+ mp_size_t estimatedN;
+ estimatedN = (an + bn) / (size_t) 10 + 1;
+ return mpn_toom6_sqr_itch( estimatedN * 6 );
+}
+
static inline mp_size_t
mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
{
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_bdiv_q.c
--- a/mpn/generic/dcpi1_bdiv_q.c Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_bdiv_q.c Mon Dec 21 22:00:07 2009 +0100
@@ -135,7 +135,6 @@
np += dn;
qn -= dn;
}
- mpn_sub_1 (np + dn, np + dn, qn, cy);
mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
}
else
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_div_q.c
--- a/mpn/generic/dcpi1_div_q.c Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_div_q.c Mon Dec 21 22:00:07 2009 +0100
@@ -39,8 +39,8 @@
TMP_MARK;
- ASSERT (dn >= 4);
- ASSERT (nn > dn);
+ ASSERT (dn >= 6);
+ ASSERT (nn - dn >= 3);
ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
tp = TMP_SALLOC_LIMBS (nn + 1);
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_div_qr.c
--- a/mpn/generic/dcpi1_div_qr.c Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_div_qr.c Mon Dec 21 22:00:07 2009 +0100
@@ -89,8 +89,8 @@
TMP_MARK;
- ASSERT (dn >= 4);
- ASSERT (nn > dn);
+ ASSERT (dn >= 6); /* to adhere to mpn_sbpi1_div_qr's limits */
+ ASSERT (nn - dn >= 3); /* to adhere to mpn_sbpi1_div_qr's limits */
ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
tp = TMP_SALLOC_LIMBS (dn);
@@ -132,10 +132,9 @@
if (UNLIKELY (n2 == d1) && n1 == d0)
{
- q = GMP_NUMB_MAX;
- cy = mpn_submul_1 (np - dn, dp, dn, q);
- ASSERT (cy == n1);
- ASSERT (np[-1] <= d1);
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+ ASSERT (cy == n2);
}
else
{
@@ -170,7 +169,7 @@
{
/* Do a 2qn / qn division */
if (qn == 2)
- qh = mpn_divrem_2 (qp, 0L, np - qn, 4, dp - qn); /* FIXME: obsolete function. Use 5/3 division? */
+ qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
else
@@ -207,15 +206,6 @@
}
else
{
- if (qn == 0)
- {
- qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
- if (qh)
- mpn_sub_n (np - dn, np - dn, dp - dn, dn);
- TMP_FREE;
- return qh;
- }
-
qp -= qn; /* point at low limb of next quotient block */
np -= qn; /* point in the middle of partial remainder */
diff -r bb6e89b2f60d -r a801906739e5 mpn/generic/dcpi1_divappr_q.c
--- a/mpn/generic/dcpi1_divappr_q.c Mon Dec 21 18:04:06 2009 +0100
+++ b/mpn/generic/dcpi1_divappr_q.c Mon Dec 21 22:00:07 2009 +0100
@@ -26,6 +26,7 @@
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
mp_limb_t
@@ -81,7 +82,7 @@
TMP_MARK;
- ASSERT (dn >= 4);
+ ASSERT (dn >= 6);
ASSERT (nn > dn);
ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
@@ -104,29 +105,87 @@
np -= qn; /* point in the middle of partial remainder */
/* Perform the typically smaller block first. */
- if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
- qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+ if (qn == 1)
+ {
+ mp_limb_t q, n2, n1, n0, d1, d0;
+
+ /* Handle qh up front, for simplicity. */
+ qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
+ if (qh)
+ ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
+
+ /* A single iteration of schoolbook: One 3/2 division,
+ followed by the bignum update and adjustment. */
+ n2 = np[0];
+ n1 = np[-1];
+ n0 = np[-2];
+ d1 = dp[-1];
+ d0 = dp[-2];
+
+ ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
+
+ if (UNLIKELY (n2 == d1) && n1 == d0)
+ {
+ q = GMP_NUMB_MASK;
+ cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
+ ASSERT (cy == n2);
+ }
+ else
+ {
+ udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
+
+ if (dn > 2)
+ {
+ mp_limb_t cy, cy1;
+ cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
+
+ cy1 = n0 < cy;
+ n0 = (n0 - cy) & GMP_NUMB_MASK;
+ cy = n1 < cy1;
+ n1 = (n1 - cy1) & GMP_NUMB_MASK;
+ np[-2] = n0;
+
+ if (UNLIKELY (cy != 0))
+ {
+ n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
+ qh -= (q == 0);
+ q = (q - 1) & GMP_NUMB_MASK;
+ }
+ }
+ else
+ np[-2] = n0;
+
+ np[-1] = n1;
+ }
+ qp[0] = q;
+ }
else
- qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
+ {
+ if (qn == 2)
+ qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
+ else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+ qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
+ else
+ qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
- if (qn != dn)
- {
- if (qn > dn - qn)
- mpn_mul (tp, qp, qn, dp - dn, dn - qn);
- else
- mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+ if (qn != dn)
+ {
+ if (qn > dn - qn)
+ mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+ else
+ mpn_mul (tp, dp - dn, dn - qn, qp, qn);
- cy = mpn_sub_n (np - dn, np - dn, tp, dn);
- if (qh != 0)
- cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+ cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+ if (qh != 0)
+ cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
- while (cy != 0)
- {
- qh -= mpn_sub_1 (qp, qp, qn, 1);
- cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+ while (cy != 0)
+ {
+ qh -= mpn_sub_1 (qp, qp, qn, 1);
+ cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+ }
}
}
-
More information about the gmp-commit
mailing list