[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Dec 3 15:00:09 CET 2009
details: /home/hgfiles/gmp/rev/b66af9e72b19
changeset: 12963:b66af9e72b19
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Dec 03 14:59:26 2009 +0100
description:
Tune DC_DIVAPPR_Q_THRESHOLD. Rewrite DC_DIV_QR_THRESHOLD tuning code.
details: /home/hgfiles/gmp/rev/5f710b3ff845
changeset: 12964:5f710b3ff845
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Dec 03 15:00:04 2009 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 11 ++++++++++-
doc/gmp.texi | 4 ++--
gmp-impl.h | 8 ++++++++
tune/Makefile.am | 2 +-
tune/common.c | 22 ++++++++++++++++++++++
tune/speed.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
tune/tuneup.c | 35 +++++++++++++++++++++++------------
7 files changed, 114 insertions(+), 17 deletions(-)
diffs (246 lines):
diff -r e31f9b520313 -r 5f710b3ff845 ChangeLog
--- a/ChangeLog Thu Dec 03 11:19:55 2009 +0100
+++ b/ChangeLog Thu Dec 03 15:00:04 2009 +0100
@@ -1,5 +1,14 @@
2009-12-03 Torbjorn Granlund <tege at gmplib.org>
+ * tune/tuneup.c: Tune DC_DIVAPPR_Q_THRESHOLD. Rewrite
+ DC_DIV_QR_THRESHOLD tuning code.
+ (tune_dc_div): Rewrite.
+ * tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV): New macro.
+ * tune/common.c (speed_mpn_sbpi1_div_qr, speed_mpn_dcpi1_div_qr,
+ speed_mpn_sbpi1_divappr_q, speed_mpn_sbpi1_bdiv_qr): New functions.
+ * gmp-impl.h: Provide declarations for corresponding threshold vars.
+ * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add dcpi1_divappr_q.c.
+
* tune/tuneup.c (tune_binvert): Up max_size.
2009-12-02 Marco Bodrato <bodrato at mail.dm.unipi.it>
@@ -17,7 +26,7 @@
* tune/tuneup.c: Tune BINV_NEWTON_THRESHOLD.
(tune_binvert): New function.
- * tune/speed.h (SPEED_ROUTINE_MPN_BINVERT): New macros.
+ * tune/speed.h (SPEED_ROUTINE_MPN_BINVERT): New macro.
* tune/common.c (speed_mpn_binvert): New function.
* gmp-impl.h: Provide declarations for corresponding threshold vars.
* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add binvert.c.
diff -r e31f9b520313 -r 5f710b3ff845 doc/gmp.texi
--- a/doc/gmp.texi Thu Dec 03 11:19:55 2009 +0100
+++ b/doc/gmp.texi Thu Dec 03 15:00:04 2009 +0100
@@ -5177,9 +5177,9 @@
Compute the square of @{@var{s1p}, @var{n}@} and write the 2*@var{n}-limb
result to @var{rp}.
-The destination has to have space for 2*@var{n} limbs, even if the product's
+The destination has to have space for 2*@var{n} limbs, even if the result's
most significant limb is zero. No overlap is permitted between the
-destination and either source.
+destination and the source.
@end deftypefun
@deftypefun mp_limb_t mpn_mul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})
diff -r e31f9b520313 -r 5f710b3ff845 gmp-impl.h
--- a/gmp-impl.h Thu Dec 03 11:19:55 2009 +0100
+++ b/gmp-impl.h Thu Dec 03 15:00:04 2009 +0100
@@ -4146,6 +4146,14 @@
#define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
extern mp_size_t dc_div_qr_threshold;
+#undef DC_DIVAPPR_Q_THRESHOLD
+#define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
+extern mp_size_t dc_divappr_q_threshold;
+
+#undef DC_DIV_Q_THRESHOLD
+#define DC_DIV_Q_THRESHOLD dc_div_q_threshold
+extern mp_size_t dc_div_q_threshold;
+
#undef DC_BDIV_Q_THRESHOLD
#define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold
extern mp_size_t dc_bdiv_q_threshold;
diff -r e31f9b520313 -r 5f710b3ff845 tune/Makefile.am
--- a/tune/Makefile.am Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/Makefile.am Thu Dec 03 15:00:04 2009 +0100
@@ -122,7 +122,7 @@
# recompiled object will be rebuilt if that file changes.
TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
+TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
binvert.c divrem_2.c gcd.c gcdext.c get_str.c \
set_str.c matrix22_mul.c hgcd.c mul_n.c sqr_n.c \
mullow_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c \
diff -r e31f9b520313 -r 5f710b3ff845 tune/common.c
--- a/tune/common.c Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/common.c Thu Dec 03 15:00:04 2009 +0100
@@ -738,6 +738,27 @@
}
double
+speed_mpn_sbpi1_div_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32);
+}
+double
+speed_mpn_dcpi1_div_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv);
+}
+double
+speed_mpn_sbpi1_divappr_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32);
+}
+double
+speed_mpn_dcpi1_divappr_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv);
+}
+
+double
speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
{
SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
@@ -757,6 +778,7 @@
{
SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
}
+
double
speed_mpn_binvert (struct speed_params *s)
{
diff -r e31f9b520313 -r 5f710b3ff845 tune/speed.h
--- a/tune/speed.h Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/speed.h Thu Dec 03 15:00:04 2009 +0100
@@ -235,6 +235,10 @@
double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
@@ -1077,7 +1081,7 @@
#define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
- mpn_toom42_mul_itch (s->size, s->size/2), \
+ mpn_toom42_mul_itch (s->size, s->size/2), \
MPN_TOOM42_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_SQR_CALL(call) \
@@ -1303,6 +1307,49 @@
return speed_endtime (); \
}
+#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, ap, qp; \
+ gmp_pi1_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* normalize the data */ \
+ dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ ap[2*s->size-1] = dp[s->size-1] - 1; \
+ \
+ invert_pi1 (inv, dp[s->size-1], dp[s->size-2]); \
+ \
+ speed_operand_src (s, ap, 2*s->size); \
+ speed_operand_dst (s, tp, 2*s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, qp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (qp, tp, 2*s->size, dp, s->size, INV); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function) \
{ \
unsigned i; \
diff -r e31f9b520313 -r 5f710b3ff845 tune/tuneup.c
--- a/tune/tuneup.c Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/tuneup.c Thu Dec 03 15:00:04 2009 +0100
@@ -165,8 +165,9 @@
mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
mp_size_t div_sb_preinv_threshold = MP_SIZE_T_MAX;
mp_size_t dc_div_qr_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_divappr_q_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_bdiv_qr_threshold = MP_SIZE_T_MAX;
mp_size_t dc_bdiv_q_threshold = MP_SIZE_T_MAX;
-mp_size_t dc_bdiv_qr_threshold = MP_SIZE_T_MAX;
mp_size_t binv_newton_threshold = MP_SIZE_T_MAX;
mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX;
mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX;
@@ -998,19 +999,24 @@
void
tune_dc_div (void)
{
- static struct param_t param;
- param.name = "DC_DIV_QR_THRESHOLD";
- param.function = speed_mpn_dc_tdiv_qr;
- param.step_factor = 0.02;
- one (&dc_div_qr_threshold, ¶m);
+ {
+ static struct param_t param;
+ param.name = "DC_DIV_QR_THRESHOLD";
+ param.function = speed_mpn_sbpi1_div_qr;
+ param.function2 = speed_mpn_dcpi1_div_qr;
+ param.min_size = 4;
+ one (&dc_div_qr_threshold, ¶m);
+ }
+ {
+ static struct param_t param;
+ param.name = "DC_DIVAPPR_Q_THRESHOLD";
+ param.function = speed_mpn_sbpi1_divappr_q;
+ param.function2 = speed_mpn_dcpi1_divappr_q;
+ param.min_size = 4;
+ one (&dc_divappr_q_threshold, ¶m);
+ }
}
-
-#define TUNE_REDC_2_MAX 100
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
-
void
tune_dc_bdiv (void)
{
@@ -1048,6 +1054,11 @@
void
tune_redc (void)
{
+#define TUNE_REDC_2_MAX 100
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
#if WANT_REDC_2
{
static struct param_t param;
More information about the gmp-commit
mailing list