[Gmp-commit] /home/hgfiles/gmp: Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRES...
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 2 02:01:19 CET 2009
details: /home/hgfiles/gmp/rev/4c2a39802af2
changeset: 12956:4c2a39802af2
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Dec 02 02:01:15 2009 +0100
description:
Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRESHOLD.
diffstat:
ChangeLog | 13 ++++++
gmp-impl.h | 8 ++++
tune/Makefile.am | 3 +-
tune/common.c | 22 +++++++++++
tune/speed.h | 82 +++++++++++++++++++++++++++++++++++++++++
tune/tuneup.c | 29 +++++++++++++-
6 files changed, 153 insertions(+), 4 deletions(-)
diffs (249 lines):
diff -r 68caff9d2bf8 -r 4c2a39802af2 ChangeLog
--- a/ChangeLog Tue Dec 01 21:17:22 2009 +0100
+++ b/ChangeLog Wed Dec 02 02:01:15 2009 +0100
@@ -1,3 +1,16 @@
+2009-12-02 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/tuneup.c: Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRESHOLD.
+ (tune_dc_bdiv): New function.
+ (tune_dc_div): New name for tune_dc.
+ * tune/speed.h (SPEED_ROUTINE_PI1_BDIV_QR, SPEED_ROUTINE_PI1_BDIV_Q):
+ New macros.
+ * tune/common.c (speed_mpn_sbpi1_bdiv_qr, speed_mpn_dcpi1_bdiv_qr,
+ speed_mpn_sbpi1_bdiv_q, speed_mpn_dcpi1_bdiv_q): New functions.
+ * gmp-impl.h: Provide declarations for corresponding threshold vars.
+ * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add dcpi1_bdiv_qr.c and
+ dcpi1_bdiv_q.c.
+
2009-12-01 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/toom53_mul.c: Removed double computation of vinf.
diff -r 68caff9d2bf8 -r 4c2a39802af2 gmp-impl.h
--- a/gmp-impl.h Tue Dec 01 21:17:22 2009 +0100
+++ b/gmp-impl.h Wed Dec 02 02:01:15 2009 +0100
@@ -4146,6 +4146,14 @@
#define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
extern mp_size_t dc_div_qr_threshold;
+#undef DC_BDIV_Q_THRESHOLD
+#define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold
+extern mp_size_t dc_bdiv_q_threshold;
+
+#undef DC_BDIV_QR_THRESHOLD
+#define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold
+extern mp_size_t dc_bdiv_qr_threshold;
+
#undef REDC_1_TO_REDC_2_THRESHOLD
#define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold
extern mp_size_t redc_1_to_redc_2_threshold;
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/Makefile.am
--- a/tune/Makefile.am Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/Makefile.am Wed Dec 02 02:01:15 2009 +0100
@@ -122,7 +122,8 @@
# recompiled object will be rebuilt if that file changes.
TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c divrem_2.c gcd.c gcdext.c get_str.c \
+TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
+ divrem_2.c gcd.c gcdext.c get_str.c \
set_str.c matrix22_mul.c hgcd.c mul_n.c sqr_n.c \
mullow_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c \
toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/common.c
--- a/tune/common.c Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/common.c Wed Dec 02 02:01:15 2009 +0100
@@ -736,6 +736,28 @@
{
SPEED_ROUTINE_MPZ_MOD (mpz_mod);
}
+
+double
+speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
+}
+double
+speed_mpn_dcpi1_bdiv_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr);
+}
+double
+speed_mpn_sbpi1_bdiv_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_PI1_BDIV_Q (mpn_sbpi1_bdiv_q);
+}
+double
+speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
+}
+
double
speed_mpn_redc_1 (struct speed_params *s)
{
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/speed.h
--- a/tune/speed.h Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/speed.h Wed Dec 02 02:01:15 2009 +0100
@@ -235,6 +235,10 @@
double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_redc_2 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_redc_n __GMP_PROTO ((struct speed_params *s));
@@ -1298,6 +1302,84 @@
return speed_endtime (); \
}
+#define SPEED_ROUTINE_PI1_BDIV_QR(function) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, ap, qp; \
+ mp_limb_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ binvert_limb (inv, dp[0]); \
+ \
+ speed_operand_src (s, ap, 2*s->size); \
+ speed_operand_dst (s, tp, 2*s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, qp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (qp, tp, 2*s->size, dp, s->size, inv); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_PI1_BDIV_Q(function) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, qp; \
+ mp_limb_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ binvert_limb (inv, dp[0]); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, tp, s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, qp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, s->xp, s->size); \
+ function (qp, tp, s->size, dp, s->size, inv); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
#define SPEED_ROUTINE_REDC_1(function) \
{ \
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/tuneup.c
--- a/tune/tuneup.c Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/tuneup.c Wed Dec 02 02:01:15 2009 +0100
@@ -165,6 +165,8 @@
mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
mp_size_t div_sb_preinv_threshold = MP_SIZE_T_MAX;
mp_size_t dc_div_qr_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_bdiv_q_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_bdiv_qr_threshold = MP_SIZE_T_MAX;
mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX;
mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX;
mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX;
@@ -993,7 +995,7 @@
void
-tune_dc (void)
+tune_dc_div (void)
{
static struct param_t param;
param.name = "DC_DIV_QR_THRESHOLD";
@@ -1009,6 +1011,27 @@
#endif
void
+tune_dc_bdiv (void)
+{
+ {
+ static struct param_t param;
+ param.name = "DC_BDIV_QR_THRESHOLD";
+ param.function = speed_mpn_sbpi1_bdiv_qr;
+ param.function2 = speed_mpn_dcpi1_bdiv_qr;
+ param.min_size = 2;
+ one (&dc_bdiv_qr_threshold, ¶m);
+ }
+ {
+ static struct param_t param;
+ param.name = "DC_BDIV_Q_THRESHOLD";
+ param.function = speed_mpn_sbpi1_bdiv_q;
+ param.function2 = speed_mpn_dcpi1_bdiv_q;
+ param.min_size = 4;
+ one (&dc_bdiv_q_threshold, ¶m);
+ }
+}
+
+void
tune_redc (void)
{
#if WANT_REDC_2
@@ -1051,7 +1074,6 @@
one (&redc_1_to_redc_n_threshold, ¶m);
}
#endif
-
}
void
@@ -1860,7 +1882,8 @@
tune_mulmod_bnm1 ();
printf("\n");
- tune_dc ();
+ tune_dc_div ();
+ tune_dc_bdiv ();
tune_redc ();
printf("\n");
More information about the gmp-commit
mailing list