[Gmp-commit] /home/hgfiles/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Dec 24 23:42:22 CET 2009
details: /home/hgfiles/gmp/rev/611a9d0b331e
changeset: 13212:611a9d0b331e
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Dec 24 11:14:38 2009 +0100
description:
(nodist_EXTRA_libmpn_la_SOURCES): Add missing division files.
details: /home/hgfiles/gmp/rev/6e2403ec58e2
changeset: 13213:6e2403ec58e2
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Dec 24 12:17:14 2009 +0100
description:
Get ASSERT right.
details: /home/hgfiles/gmp/rev/849ad411525a
changeset: 13214:849ad411525a
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Dec 24 23:12:44 2009 +0100
description:
Trivial merge.
details: /home/hgfiles/gmp/rev/eba295d509c7
changeset: 13215:eba295d509c7
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Dec 24 23:34:31 2009 +0100
description:
Remove declarations of some unused speed_* variables.
details: /home/hgfiles/gmp/rev/7443ba3eacc7
changeset: 13216:7443ba3eacc7
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Dec 24 23:42:19 2009 +0100
description:
Completely respin tuning of mod_1 function family.
diffstat:
ChangeLog | 31 +++++++++++++++
gmp-impl.h | 91 +++++++++++++++++++++++++------------------
mpn/Makefile.am | 10 +++-
mpn/generic/mod_1.c | 24 ++++++----
mpn/generic/mod_1_2.c | 2 +-
mpn/generic/mod_1_3.c | 2 +-
mpn/generic/mod_1_4.c | 2 +-
mpn/generic/mul_n.c | 36 +++++++++--------
mpn/generic/perfsqr.c | 24 +++++-----
mpn/generic/sqr_n.c | 36 +++++++++--------
tune/mod_1_div.c | 4 +
tune/mod_1_inv.c | 4 +
tune/speed.h | 2 -
tune/tuneup.c | 103 ++++++++++++++++++++++++++++++++++++++++---------
14 files changed, 249 insertions(+), 122 deletions(-)
diffs (truncated from 703 to 300 lines):
diff -r 4689c4513d05 -r 7443ba3eacc7 ChangeLog
--- a/ChangeLog Thu Dec 24 11:09:20 2009 +0100
+++ b/ChangeLog Thu Dec 24 23:42:19 2009 +0100
@@ -1,5 +1,36 @@
2009-12-24 Torbjorn Granlund <tege at gmplib.org>
+ * tune/mod_1_div.c (MOD_1N_TO_MOD_1_1_THRESHOLD,
+ (MOD_1U_TO_MOD_1_1_THRESHOLD): Set.
+ * tune/mod_1_inv.c (MOD_1N_TO_MOD_1_1_THRESHOLD,
+ (MOD_1U_TO_MOD_1_1_THRESHOLD): Set.
+
+ * gmp-impl.h (USE_PREINV_MOD_1): Remove.
+ (MPN_MOD_OR_PREINV_MOD_1): Define to choose functions dynamically in
+ terms of PREINV_MOD_1_TO_MOD_1_THRESHOLD (used to choose statically
+ using USE_PREINV_MOD_1).
+ * mpn/generic/perfsqr.c (PERFSQR_MOD_PP): Corresponding updates.
+
+ * tune/tuneup.c (tune_mod_1): Rewrite.
+ * gmp-impl.h (MOD_1N_TO_MOD_1_1_THRESHOLD): New.
+ (MOD_1U_TO_MOD_1_1_THRESHOLD): New name for MOD_1_1_THRESHOLD.
+ (MOD_1_1_TO_MOD_1_2_THRESHOLD): Mew name for MOD_1_2_THRESHOLD.
+ (MOD_1_2_TO_MOD_1_4_THRESHOLD): New name for MOD_1_4_THRESHOLD.
+ * mpn/generic/mod_1.c: Corresponding updates.
+
+2009-12-24 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/generic/mul_n.c: Use also toom6h and toom8h.
+ * mpn/generic/sqr_n.c: Use also toom6 and toom8.
+ * gmp-impl.h: Initial support for tuning of Toom-6half and Toom-8half.
+ * tune/tuneup.c: Tune Toom-6half and Toom-8half thresholds.
+
+2009-12-24 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/mod_1_4.c: Get ASSERT right.
+ * mpn/generic/mod_1_3.c: Likewise.
+ * mpn/generic/mod_1_2.c: Likewise.
+
* mpn/generic/powm_sec.c: Use SQR_TOOM2_THRESHOLD as limit for a native
mpn_sqr_basecase, not TUNE_SQR_TOOM2_MAX.
diff -r 4689c4513d05 -r 7443ba3eacc7 gmp-impl.h
--- a/gmp-impl.h Thu Dec 24 11:09:20 2009 +0100
+++ b/gmp-impl.h Thu Dec 24 23:42:19 2009 +0100
@@ -589,7 +589,6 @@
#undef MOD_1_NORM_THRESHOLD
#undef MOD_1_UNNORM_THRESHOLD
#undef USE_PREINV_DIVREM_1
-#undef USE_PREINV_MOD_1
#undef DIVREM_2_THRESHOLD
#undef DIVEXACT_1_THRESHOLD
#undef MODEXACT_1_ODD_THRESHOLD
@@ -598,7 +597,6 @@
#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
#define USE_PREINV_DIVREM_1 0 /* no preinv */
-#define USE_PREINV_MOD_1 0 /* no preinv */
#define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
/* mpn/generic/mul_fft.c is not nails-capable. */
@@ -1035,10 +1033,6 @@
&& (size) >= (thresh)))
#define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
-#if WANT_FFT
-#define MPN_TOOM44_MAX_N 285405
-#endif /* WANT_FFT */
-
#define MPN_TOOM22_MUL_MINSIZE 4
#define MPN_TOOM2_SQR_MINSIZE 4
@@ -2753,7 +2747,7 @@
NOTE: Output variables are updated multiple times. Only some inputs
- and outputs may overlap.
+ and outputs may overlap.
*/
#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
do { \
@@ -2789,16 +2783,12 @@
#endif
-/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
- the plain mpn_divrem_1. Likewise USE_PREINV_MOD_1 chooses between
- mpn_preinv_mod_1 and plain mpn_mod_1. The default for both is yes, since
- the few CISC chips where preinv is not good have defines saying so. */
+/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
+ plain mpn_divrem_1. The default is yes, since the few CISC chips where
+ preinv is not good have defines saying so. */
#ifndef USE_PREINV_DIVREM_1
#define USE_PREINV_DIVREM_1 1
#endif
-#ifndef USE_PREINV_MOD_1
-#define USE_PREINV_MOD_1 1
-#endif
#if USE_PREINV_DIVREM_1
#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
@@ -2808,13 +2798,16 @@
mpn_divrem_1 (qp, xsize, ap, size, d)
#endif
-#if USE_PREINV_MOD_1
-#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
- mpn_preinv_mod_1 (src, size, divisor, inverse)
-#else
-#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
- mpn_mod_1 (src, size, divisor)
-#endif
+#ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
+#endif
+
+/* This selection may seem backwards. The reason mpn_mod_1 typically takes
+ over for larger sizes is that it uses the mod_1_1 function. */
+#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
+ (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \
+ ? mpn_preinv_mod_1 (src, size, divisor, inverse) \
+ : mpn_mod_1 (src, size, divisor))
#ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
@@ -4207,6 +4200,14 @@
#define MUL_TOOM44_THRESHOLD mul_toom44_threshold
extern mp_size_t mul_toom44_threshold;
+#undef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold
+extern mp_size_t mul_toom6h_threshold;
+
+#undef MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold
+extern mp_size_t mul_toom8h_threshold;
+
#undef MUL_TOOM32_TO_TOOM43_THRESHOLD
#define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
extern mp_size_t mul_toom32_to_toom43_threshold;
@@ -4259,6 +4260,14 @@
#define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
extern mp_size_t sqr_toom4_threshold;
+#undef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD sqr_toom6_threshold
+extern mp_size_t sqr_toom6_threshold;
+
+#undef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD sqr_toom8_threshold
+extern mp_size_t sqr_toom8_threshold;
+
#undef SQR_FFT_THRESHOLD
#define SQR_FFT_THRESHOLD sqr_fft_threshold
extern mp_size_t sqr_fft_threshold;
@@ -4369,29 +4378,29 @@
#define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
extern mp_size_t divrem_1_unnorm_threshold;
-#undef MOD_1_NORM_THRESHOLD
+#undef MOD_1_NORM_THRESHOLD
#define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
extern mp_size_t mod_1_norm_threshold;
-#undef MOD_1_UNNORM_THRESHOLD
+#undef MOD_1_UNNORM_THRESHOLD
#define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
extern mp_size_t mod_1_unnorm_threshold;
-#undef MOD_1_1_THRESHOLD
-#define MOD_1_1_THRESHOLD mod_1_1_threshold
-extern mp_size_t mod_1_1_threshold;
-
-#undef MOD_1_2_THRESHOLD
-#define MOD_1_2_THRESHOLD mod_1_2_threshold
-extern mp_size_t mod_1_2_threshold;
-
-#undef MOD_1_3_THRESHOLD
-#define MOD_1_3_THRESHOLD mod_1_3_threshold
-extern mp_size_t mod_1_3_threshold;
-
-#undef MOD_1_4_THRESHOLD
-#define MOD_1_4_THRESHOLD mod_1_4_threshold
-extern mp_size_t mod_1_4_threshold;
+#undef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold
+extern mp_size_t mod_1n_to_mod_1_1_threshold;
+
+#undef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold
+extern mp_size_t mod_1u_to_mod_1_1_threshold;
+
+#undef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold
+extern mp_size_t mod_1_1_to_mod_1_2_threshold;
+
+#undef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold
+extern mp_size_t mod_1_2_to_mod_1_4_threshold;
#if ! UDIV_PREINV_ALWAYS
#undef DIVREM_2_THRESHOLD
@@ -4442,6 +4451,10 @@
#define SQR_TOOM3_THRESHOLD_LIMIT 400
#define MUL_TOOM44_THRESHOLD_LIMIT 1000
#define SQR_TOOM4_THRESHOLD_LIMIT 1000
+#define MUL_TOOM6H_THRESHOLD_LIMIT 1100
+#define SQR_TOOM6_THRESHOLD_LIMIT 1100
+#define MUL_TOOM8H_THRESHOLD_LIMIT 1200
+#define SQR_TOOM8_THRESHOLD_LIMIT 1200
#define MULLO_BASECASE_THRESHOLD_LIMIT 200
#define GET_STR_THRESHOLD_LIMIT 150
@@ -4478,7 +4491,7 @@
#define mpn_toom4_sqr_itch(an) \
(3 * (an) + GMP_NUMB_BITS)
-#define mpn_toom6_sqr_itch(n) \
+#define mpn_toom6_sqr_itch(n) \
( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
diff -r 4689c4513d05 -r 7443ba3eacc7 mpn/Makefile.am
--- a/mpn/Makefile.am Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/Makefile.am Thu Dec 24 23:42:19 2009 +0100
@@ -36,7 +36,11 @@
addmul_7.c addmul_8.c \
and_n.c andn_n.c \
cmp.c com_n.c copyd.c copyi.c \
- dc_divrem_n.c dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c \
+ dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c \
+ sbpi1_bdiv_qr.c sbpi1_bdiv_q.c \
+ sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c \
+ dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
+ dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c \
dump.c fib2_ui.c gcd.c \
gcd_1.c gcdext.c get_d.c get_str.c \
hamdist.c hgcd2.c hgcd.c invert_limb.c \
@@ -50,7 +54,7 @@
toom44_mul.c \
toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c \
toom_couple_handling.c \
- sqr_toom2.c sqr_toom3.c sqr_toom4.c \
+ toom2_sqr.c toom3_sqr.c toom4_sqr.c \
toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c \
toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c \
toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c \
@@ -58,7 +62,7 @@
invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c \
mullo_n.c mullo_basecase.c nand_n.c neg_n.c nior_n.c perfsqr.c \
popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
- rootrem.c sb_divrem_mn.c scan0.c scan1.c set_str.c \
+ rootrem.c scan0.c scan1.c set_str.c \
sqr_basecase.c sqr_diagonal.c \
sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c \
tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
diff -r 4689c4513d05 -r 7443ba3eacc7 mpn/generic/mod_1.c
--- a/mpn/generic/mod_1.c Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/generic/mod_1.c Thu Dec 24 23:42:19 2009 +0100
@@ -42,16 +42,20 @@
#define MOD_1_UNNORM_THRESHOLD 0
#endif
-#ifndef MOD_1_1_THRESHOLD
-#define MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
#endif
-#ifndef MOD_1_2_THRESHOLD
-#define MOD_1_2_THRESHOLD 10
+#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
#endif
-#ifndef MOD_1_4_THRESHOLD
-#define MOD_1_4_THRESHOLD 120
+#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
+#endif
+
+#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
#endif
@@ -207,7 +211,7 @@
if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
{
- if (BELOW_THRESHOLD (n, MOD_1_1_THRESHOLD))
+ if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
{
return mpn_mod_1_norm (ap, n, b);
}
@@ -220,17 +224,17 @@
}
else
{
- if (BELOW_THRESHOLD (n, MOD_1_1_THRESHOLD))
+ if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
{
More information about the gmp-commit
mailing list