[Gmp-commit] /home/hgfiles/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Dec 8 22:15:03 CET 2009
details: /home/hgfiles/gmp/rev/5d223deacdd5
changeset: 13014:5d223deacdd5
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Dec 08 22:08:44 2009 +0100
description:
(mpn_mulmod_bnm1_next_size): Rewrite.
details: /home/hgfiles/gmp/rev/f83433191794
changeset: 13015:f83433191794
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Dec 08 22:14:35 2009 +0100
description:
Add measuring of mpn_mulmod_bnm1 at rounded size.
details: /home/hgfiles/gmp/rev/93009728474a
changeset: 13016:93009728474a
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Dec 08 22:15:00 2009 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 34 ++++++
mpn/generic/bdiv_q.c | 12 +-
mpn/generic/bdiv_qr.c | 12 +-
mpn/generic/gcdext.c | 76 ++++++++++++--
mpn/generic/gcdext_1.c | 238 +++++++++++++++++++++++++++++++++++++++++++++++
mpn/generic/mulmod_bnm1.c | 23 ++--
tests/mpn/Makefile.am | 2 +-
tests/mpn/t-bdiv.c | 167 +++++++++++++++++++++++++++++++++
tune/common.c | 6 +
tune/speed.c | 1 +
tune/speed.h | 35 ++++++-
11 files changed, 567 insertions(+), 39 deletions(-)
diffs (truncated from 786 to 300 lines):
diff -r 415fa264bb46 -r 93009728474a ChangeLog
--- a/ChangeLog Tue Dec 08 10:54:47 2009 +0100
+++ b/ChangeLog Tue Dec 08 22:15:00 2009 +0100
@@ -1,5 +1,33 @@
+2009-12-08 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]: Use
+ table lookup for count_trailing_zeros. Binary algorithm still
+ disabled by default.
+
+ * mpn/generic/gcdext.c (divexact): Local definition of divexact,
+ using mpn_bdiv_q.
+ (compute_v): Use it.
+
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-bdiv.
+
+ * tests/mpn/t-bdiv.c: New file.
+
+ * mpn/generic/bdiv_q.c (mpn_bdiv_q): Fixed bad quotient length,
+ should have qn == nn.
+
+ * mpn/generic/bdiv_qr.c (mpn_bdiv_qr): Pass correct nn length to
+ the lower-level functions.
+
2009-12-08 Torbjorn Granlund <tege at gmplib.org>
+ * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED): New define.
+ * tune/common.c (speed_mpn_mulmod_bnm1_rounded): New function.
+ * tune/speed.c (routine): Add mpn_mulmod_bnm1_rounded for measuring
+ mpn_mulmod_bnm1 at recommended sizes.
+
+ * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1_next_size): Rewrite.
+ (mpn_bc_mulmod_bnm1): Use mpn_add_n instead of mpn_add.
+
* tune/speed.c (routine): Add mpn_invert.
* tune/tuneup.c (tune_invert): New function.
@@ -103,6 +131,12 @@
2009-12-03 Niels Möller <nisse at lysator.liu.se>
+ * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]:
+ Added various masking tricks.
+
+ * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]:
+ Reimplemented binary gcdext, with proper canonicalization.
+
* mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Handle v == 0
from mpn_gcdext_1.
* mpn/generic/gcdext_1.c (mpn_gcdext_1): Allow inputs with a < b,
diff -r 415fa264bb46 -r 93009728474a mpn/generic/bdiv_q.c
--- a/mpn/generic/bdiv_q.c Tue Dec 08 10:54:47 2009 +0100
+++ b/mpn/generic/bdiv_q.c Tue Dec 08 22:15:00 2009 +0100
@@ -37,24 +37,22 @@
mp_ptr tp)
{
mp_limb_t di;
- mp_size_t qn = nn - dn + 1;
- dn = MIN (qn, dn);
if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
{
- MPN_COPY (tp, np, qn);
+ MPN_COPY (tp, np, nn);
binvert_limb (di, dp[0]); di = -di;
- mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+ mpn_sbpi1_bdiv_q (qp, tp, nn, dp, dn, di);
}
else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
{
- MPN_COPY (tp, np, qn);
+ MPN_COPY (tp, np, nn);
binvert_limb (di, dp[0]); di = -di;
- mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+ mpn_dcpi1_bdiv_q (qp, tp, nn, dp, dn, di);
}
else
{
- mpn_mu_bdiv_q (qp, np, qn, dp, dn, tp);
+ mpn_mu_bdiv_q (qp, np, nn, dp, dn, tp);
}
return;
}
diff -r 415fa264bb46 -r 93009728474a mpn/generic/bdiv_qr.c
--- a/mpn/generic/bdiv_qr.c Tue Dec 08 10:54:47 2009 +0100
+++ b/mpn/generic/bdiv_qr.c Tue Dec 08 22:15:00 2009 +0100
@@ -46,16 +46,16 @@
MPN_COPY (tp, np, nn);
tp[nn] = 0;
binvert_limb (di, dp[0]); di = -di;
- rh = mpn_sbpi1_bdiv_qr (qp, tp, nn + 1, dp, dn, di);
- MPN_COPY (rp, tp + nn + 1 - dn, dn);
+ rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+ MPN_COPY (rp, tp + nn - dn, dn);
}
else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
{
MPN_COPY (tp, np, nn);
tp[nn] = 0;
binvert_limb (di, dp[0]); di = -di;
- rh = mpn_dcpi1_bdiv_qr (qp, tp, nn + 1, dp, dn, di);
- MPN_COPY (rp, tp + nn + 1 - dn, dn);
+ rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
+ MPN_COPY (rp, tp + nn - dn, dn);
}
else
{
@@ -63,11 +63,11 @@
mp_ptr scratch_out;
TMP_DECL;
TMP_MARK;
- itch = mpn_mu_bdiv_qr_itch (nn + 1, dn);
+ itch = mpn_mu_bdiv_qr_itch (nn, dn);
scratch_out = TMP_BALLOC_LIMBS (itch);
MPN_COPY (tp, np, nn);
tp[nn] = 0;
- rh = mpn_mu_bdiv_qr (qp, rp, tp, nn + 1, dp, dn, scratch_out);
+ rh = mpn_mu_bdiv_qr (qp, rp, tp, nn, dp, dn, scratch_out);
TMP_FREE;
}
diff -r 415fa264bb46 -r 93009728474a mpn/generic/gcdext.c
--- a/mpn/generic/gcdext.c Tue Dec 08 10:54:47 2009 +0100
+++ b/mpn/generic/gcdext.c Tue Dec 08 22:15:00 2009 +0100
@@ -85,6 +85,65 @@
return n;
}
+static void
+divexact (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn)
+{
+ unsigned shift;
+ mp_size_t qn;
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (dn > 0);
+ ASSERT (nn >= dn);
+ ASSERT (dp[dn-1] > 0);
+ ASSERT (np[nn-1] > 0);
+
+ qn = nn + 1 - dn;
+
+ while (dp[0] == 0)
+ {
+ ASSERT (np[0] == 0);
+ dp++;
+ np++;
+ dn--;
+ nn--;
+ }
+ count_trailing_zeros (shift, dp[0]);
+
+ TMP_MARK;
+ if (shift > 0)
+ {
+ tp = TMP_ALLOC_LIMBS (dn);
+ mpn_rshift (tp, dp, dn, shift);
+ dp = tp;
+
+ /* FIXME: It's sufficient to get the qn least significant
+ limbs. */
+ tp = TMP_ALLOC_LIMBS (nn);
+ mpn_rshift (tp, np, nn, shift);
+ np = tp;
+ }
+ else
+ {
+ mp_ptr tp = TMP_ALLOC_LIMBS (qn);
+ MPN_COPY (tp, np, qn);
+ np = tp;
+ }
+ if (nn > qn)
+ nn = qn;
+ if (dn > qn)
+ dn = qn;
+
+ if (qn > nn)
+ MPN_ZERO (qp + nn, qn - nn);
+
+ tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (nn, dn));
+ mpn_bdiv_q (qp, np, nn, dp, dn, tp);
+ TMP_FREE;
+}
+
#define COMPUTE_V_ITCH(n) (2*(n) + 1)
/* Computes |v| = |(g - u a)| / b, where u may be positive or
@@ -146,21 +205,9 @@
vn = size + 1 - bn;
ASSERT (vn <= n + 1);
- /* FIXME: Use divexact. Or do the entire calculation mod 2^{n *
- GMP_NUMB_BITS}. */
- mpn_tdiv_qr (vp, tp, 0, tp, size, bp, bn);
+ divexact (vp, tp, size, bp, bn);
vn -= (vp[vn-1] == 0);
- /* Remainder must be zero */
-#if WANT_ASSERT
- {
- mp_size_t i;
- for (i = 0; i < bn; i++)
- {
- ASSERT (tp[i] == 0);
- }
- }
-#endif
return vn;
}
@@ -181,7 +228,8 @@
For the lehmer call after the loop, Let T denote
GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
- + 1 for v and 2T + 1 scratch space. In all, 7T + 3 is sufficient.
+ for u, T+1 for v and 2T + 1 scratch space. In all, 7T + 3 is
+ sufficient for both operations.
*/
diff -r 415fa264bb46 -r 93009728474a mpn/generic/gcdext_1.c
--- a/mpn/generic/gcdext_1.c Tue Dec 08 10:54:47 2009 +0100
+++ b/mpn/generic/gcdext_1.c Tue Dec 08 22:15:00 2009 +0100
@@ -22,6 +22,243 @@
#include "gmp-impl.h"
#include "longlong.h"
+#ifndef GCDEXT_1_USE_BINARY
+#define GCDEXT_1_USE_BINARY 0
+#endif
+
+#ifndef GCDEXT_1_BINARY_METHOD
+#define GCDEXT_1_BINARY_METHOD 2
+#endif
+
+#ifndef USE_ZEROTAB
+#define USE_ZEROTAB 1
+#endif
+
+#if GCDEXT_1_USE_BINARY
+
+#if USE_ZEROTAB
+static unsigned char zerotab[0x40] = {
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+#endif
+
+mp_limb_t
+mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
+ mp_limb_t u, mp_limb_t v)
+{
+ /* Maintain
+
+ U = t1 u + t0 v
+ V = s1 u + s0 v
+
+ where U, V are the inputs (without any shared power of two),
+ and the matris has determinant ± 2^{shift}.
+ */
+ mp_limb_t s0 = 1;
+ mp_limb_t t0 = 0;
+ mp_limb_t s1 = 0;
+ mp_limb_t t1 = 1;
+ mp_limb_t ug;
+ mp_limb_t vg;
+ mp_limb_t ugh;
+ mp_limb_t vgh;
+ unsigned zero_bits;
+ unsigned shift;
+ unsigned i;
+#if GCDEXT_1_BINARY_METHOD == 2
+ mp_limb_t det_sign;
+#endif
+
+ ASSERT (u > 0);
+ ASSERT (v > 0);
+
+ count_trailing_zeros (zero_bits, u | v);
+ u >>= zero_bits;
+ v >>= zero_bits;
+
+ if ((u & 1) == 0)
+ {
+ count_trailing_zeros (shift, u);
+ u >>= shift;
+ t1 <<= shift;
+ }
+ else if ((v & 1) == 0)
+ {
+ count_trailing_zeros (shift, v);
+ v >>= shift;
+ s0 <<= shift;
+ }
+ else
More information about the gmp-commit
mailing list