[Gmp-commit] /home/hgfiles/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Dec 27 00:30:14 CET 2009
details: /home/hgfiles/gmp/rev/4f8840c98c7e
changeset: 13229:4f8840c98c7e
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Dec 26 12:42:48 2009 +0100
description:
(TOOM22_MUL_REC): New name for TOOM22_MUL_MN_REC.
details: /home/hgfiles/gmp/rev/40248ad41cab
changeset: 13230:40248ad41cab
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Dec 27 00:25:58 2009 +0100
description:
Tune BMOD_1_TO_MOD_1_THRESHOLD instead of MODEXACT_1_ODD_THRESHOLD.
details: /home/hgfiles/gmp/rev/b3a07d48af77
changeset: 13231:b3a07d48af77
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Dec 27 00:30:10 2009 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 29 ++++++++++++++
gmp-impl.h | 11 ++--
mpn/generic/divis.c | 2 +-
mpn/generic/sbpi1_divappr_q.c | 5 +-
mpn/generic/toom22_mul.c | 4 +-
mpn/generic/toom32_mul.c | 88 ++++++++++++++++++++++--------------------
mpz/cong.c | 2 +-
mpz/cong_ui.c | 2 +-
mpz/divis_ui.c | 2 +-
tests/mpn/t-bdiv.c | 2 +-
tests/mpn/t-toom32.c | 6 +-
tune/tuneup.c | 19 ++------
12 files changed, 98 insertions(+), 74 deletions(-)
diffs (truncated from 440 to 300 lines):
diff -r 2f7eab694ee9 -r b3a07d48af77 ChangeLog
--- a/ChangeLog Sat Dec 26 12:39:10 2009 +0100
+++ b/ChangeLog Sun Dec 27 00:30:10 2009 +0100
@@ -1,3 +1,32 @@
+2009-12-27 Torbjorn Granlund <tege at gmplib.org>
+
+ * gmp-impl.h (MODEXACT_1_ODD_THRESHOLD): Remove.
+ (BMOD_1_TO_MOD_1_THRESHOLD): New parameter, with the reverse meaning of
+ MODEXACT_1_ODD_THRESHOLD.
+ (MPN_MOD_OR_MODEXACT_1_ODD): Use BMOD_1_TO_MOD_1_THRESHOLD.
+ * mpn/generic/divis.c, mpz/{cong.c,cong_ui.c,divis_ui.c}: Likewise.
+ * tune/tuneup.c (tune_modexact_1_odd): Tune BMOD_1_TO_MOD_1_THRESHOLD;
+ Do not assume native mpn_modexact_1_odd is faster than mpn_mod_1.
+ (tuned_speed_mpn_mod_1): Remove variable.
+
+ * mpn/generic/toom22_mul.c (TOOM22_MUL_REC): New name for
+ TOOM22_MUL_MN_REC.
+
+2009-12-26 Niels Möller <nisse at lysator.liu.se>
+
+ * tests/mpn/t-toom32.c (MIN_AN, MIN_BN, MAX_BN): Relax
+ requirements a bit.
+
+ * mpn/generic/toom32_mul.c (mpn_toom32_mul): Relax requirement on
+ input sizes, to support s+t>=n (used to be s+t>=n+2). Keep high
+ limbs of the evaluated values in scalar variables.
+
+ * mpn/generic/sbpi1_divappr_q.c (mpn_sbpi1_divappr_q): Remove
+ unused variables.
+
+ * mpn/generic/toom32_mul.c (mpn_toom32_mul): Fixed left-over use
+ of mpn_addsub_n which should be mpn_add_n_sub_n.
+
2009-12-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add new toom files (spotted by Torbjorn).
diff -r 2f7eab694ee9 -r b3a07d48af77 gmp-impl.h
--- a/gmp-impl.h Sat Dec 26 12:39:10 2009 +0100
+++ b/gmp-impl.h Sun Dec 27 00:30:10 2009 +0100
@@ -591,7 +591,6 @@
#undef USE_PREINV_DIVREM_1
#undef DIVREM_2_THRESHOLD
#undef DIVEXACT_1_THRESHOLD
-#undef MODEXACT_1_ODD_THRESHOLD
#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
@@ -2816,15 +2815,15 @@
/* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
- plain mpn_divrem_1. Likewise MODEXACT_1_ODD_THRESHOLD for
+ plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for
mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
modexact are faster at all sizes, so the defaults are 0. Those CPUs
where this is not right have a tuned threshold. */
#ifndef DIVEXACT_1_THRESHOLD
#define DIVEXACT_1_THRESHOLD 0
#endif
-#ifndef MODEXACT_1_ODD_THRESHOLD
-#define MODEXACT_1_ODD_THRESHOLD 0
+#ifndef BMOD_1_TO_MOD_1_THRESHOLD
+#define BMOD_1_TO_MOD_1_THRESHOLD 10
#endif
#ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */
@@ -2857,7 +2856,7 @@
#endif
#define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
- (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD) \
+ (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \
? mpn_modexact_1_odd (src, size, divisor) \
: mpn_mod_1 (src, size, divisor))
@@ -3675,7 +3674,7 @@
ASSERT (__b & 1); \
\
if ((GMP_NUMB_BITS % 2) != 0 \
- || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD)) \
+ || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
{ \
(a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
} \
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/divis.c
--- a/mpn/generic/divis.c Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/divis.c Sun Dec 27 00:30:10 2009 +0100
@@ -96,7 +96,7 @@
if (dn == 1)
{
- if (BELOW_THRESHOLD (an, MODEXACT_1_ODD_THRESHOLD))
+ if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
return mpn_mod_1 (ap, an, dlow) == 0;
count_trailing_zeros (twos, dlow);
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/sbpi1_divappr_q.c
--- a/mpn/generic/sbpi1_divappr_q.c Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/sbpi1_divappr_q.c Sun Dec 27 00:30:10 2009 +0100
@@ -41,9 +41,8 @@
mp_limb_t n1, n0;
mp_limb_t d1, d0;
mp_limb_t cy, cy1;
- mp_limb_t q, q0;
- mp_limb_t t1, t0;
- mp_limb_t mask, flag;
+ mp_limb_t q;
+ mp_limb_t flag;
ASSERT (dn > 2);
ASSERT (nn >= dn);
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/toom22_mul.c
--- a/mpn/generic/toom22_mul.c Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/toom22_mul.c Sun Dec 27 00:30:10 2009 +0100
@@ -63,7 +63,7 @@
each recursion (the difference s-t will be invariant over recursive calls).
Therefore, we need to call toom32_mul. FIXME: Suppress depending on
MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD. */
-#define TOOM22_MUL_MN_REC(p, a, an, b, bn, ws) \
+#define TOOM22_MUL_REC(p, a, an, b, bn, ws) \
do { \
if (! MAYBE_mul_toom22 \
|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD)) \
@@ -167,7 +167,7 @@
/* vm1, 2n limbs */
TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
- if (s > t) TOOM22_MUL_MN_REC (vinf, a1, s, b1, t, scratch_out);
+ if (s > t) TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
else TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
/* v0, 2n limbs */
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/toom32_mul.c
--- a/mpn/generic/toom32_mul.c Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/toom32_mul.c Sun Dec 27 00:30:10 2009 +0100
@@ -61,6 +61,7 @@
int vm1_neg;
mp_limb_t cy;
int hi;
+ mp_limb_t ap1_hi, bp1_hi;
#define a0 ap
#define a1 (ap + n)
@@ -68,14 +69,8 @@
#define b0 bp
#define b1 (bp + n)
- /* Required, to guarantee that s + t >= n + 2.
-
- Also implies that bn >= 9
-
- FIXME: Too strict? E.g., an = 15, bn = 9, ==> n = 5, s = 5, t = 4
- seem to be ok. */
-
- ASSERT (bn + 4 <= an && an + 14 <= 3*bn);
+ /* Required, to ensure that s + t >= n. */
+ ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
@@ -84,39 +79,39 @@
ASSERT (0 < s && s <= n);
ASSERT (0 < t && t <= n);
- ASSERT (s + t >= n + 2);
+ ASSERT (s + t >= n);
/* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
-#define ap1 (pp) /* n + 1 */
-#define bp1 (pp + n + 1) /* n + 1 */
-#define am1 (pp + 2*n + 2) /* n, most significant bit stored elsewhere */
-#define bm1 (pp + 3*n + 2) /* n */
+#define ap1 (pp) /* n, most significant limb in ap1_hi */
+#define bp1 (pp + n) /* n, most significant bit in bp1_hi */
+#define am1 (pp + 2*n) /* n, most significant bit in hi */
+#define bm1 (pp + 3*n) /* n */
#define v1 (scratch) /* 2n + 1 */
#define vm1 (pp) /* 2n + 1 */
-#define scratch_out (scratch + 2*n + 1) /* Currently unused. *(
+#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
/* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
- /* FIXME: Keep ap1[n] and bp1[n] in scalar variables. */
+ /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
/* Compute ap1 = a0 + a1 + a3, am1 = a0 - a1 + a3 */
- ap1[n] = mpn_add (ap1, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_addsub_n
- if (ap1[n] == 0 && mpn_cmp (ap1, a1, n) < 0)
+ ap1_hi = mpn_add (ap1, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
{
- ap1[n] = mpn_addsub_n (ap1, am1, a1, ap1, n) >> 1;
+ ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
hi = 0;
vm1_neg = 1;
}
else
{
- cy = mpn_addsub_n (ap1, am1, ap1, a1, n);
- hi = ap1[n] - (cy & 1);
- ap1[n] += (cy >> 1);
+ cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
+ hi = ap1_hi - (cy & 1);
+ ap1_hi += (cy >> 1);
vm1_neg = 0;
}
#else
- if (ap1[n] == 0 && mpn_cmp (ap1, a1, n) < 0)
+ if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
{
ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
hi = 0;
@@ -124,28 +119,28 @@
}
else
{
- hi = ap1[n] - mpn_sub_n (am1, ap1, a1, n);
+ hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
vm1_neg = 0;
}
- ap1[n] += mpn_add_n (ap1, ap1, a1, n);
+ ap1_hi += mpn_add_n (ap1, ap1, a1, n);
#endif
/* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
if (t == n)
{
-#if HAVE_NATIVE_mpn_addsub_n
+#if HAVE_NATIVE_mpn_add_n_sub_n
if (mpn_cmp (b0, b1, n) < 0)
{
- cy = mpn_addsub_n (bp1, bm1, b1, b0, n);
+ cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
vm1_neg ^= 1;
}
else
{
- cy = mpn_addsub_n (bp1, bm1, b0, b1, n);
+ cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
}
- bp1[n] = cy >> 1;
+ bp1_hi = cy >> 1;
#else
- bp1[n] = mpn_add_n (bp1, b0, b1, n);
+ bp1_hi = mpn_add_n (bp1, b0, b1, n);
if (mpn_cmp (b0, b1, n) < 0)
{
@@ -160,8 +155,8 @@
}
else
{
- /* FIXME: Should still use addsub for the main part. */
- bp1[n] = mpn_add (bp1, b0, n, b1, t);
+ /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
+ bp1_hi = mpn_add (bp1, b0, n, b1, t);
if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
{
@@ -176,21 +171,21 @@
}
TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
- if (ap1[n] == 1)
+ if (ap1_hi == 1)
{
- cy = bp1[n] + mpn_add_n (v1 + n, v1 + n, bp1, n);
+ cy = bp1_hi + mpn_add_n (v1 + n, v1 + n, bp1, n);
}
- else if (ap1[n] == 2)
+ else if (ap1_hi == 2)
{
#if HAVE_NATIVE_mpn_addlsh1_n
- cy = 2 * bp1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
+ cy = 2 * bp1_hi + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
#else
- cy = 2 * bp1[n] + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+ cy = 2 * bp1_hi + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
#endif
}
else
cy = 0;
- if (bp1[n] != 0)
+ if (bp1_hi != 0)
cy += mpn_add_n (v1 + n, v1 + n, ap1, n);
v1[2 * n] = cy;
@@ -250,6 +245,8 @@
cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
+ /* FIXME: Can we get rid of this second vm1_neg conditional by
+ swapping the location of +1 and -1 values? */
if (vm1_neg)
{
cy = mpn_add_n (v1, v1, vm1, n);
@@ -299,10 +296,17 @@
More information about the gmp-commit
mailing list