[Gmp-commit] /home/hgfiles/gmp: Tune MUL_TOOM32_TO_TOOM43, MUL_TOOM32_TO_TOOM...
mercurial at gmplib.org
mercurial at gmplib.org
Sun Dec 20 03:22:30 CET 2009
details: /home/hgfiles/gmp/rev/83af94ff428b
changeset: 13136:83af94ff428b
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Dec 20 03:22:24 2009 +0100
description:
Tune MUL_TOOM32_TO_TOOM43, MUL_TOOM32_TO_TOOM53, MUL_TOOM42_TO_TOOM53, MUL_TOOM42_TO_TOOM63.
diffstat:
ChangeLog | 20 ++++++++++++++++++++
gmp-impl.h | 35 +++++++++++++++++++++++++++++++++++
tune/common.c | 35 +++++++++++++++++++++++++++++++++++
tune/speed.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
tune/tuneup.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
5 files changed, 184 insertions(+), 4 deletions(-)
diffs (truncated from 303 to 300 lines):
diff -r 1a1bf750fe9f -r 83af94ff428b ChangeLog
--- a/ChangeLog Sat Dec 19 15:27:30 2009 +0100
+++ b/ChangeLog Sun Dec 20 03:22:24 2009 +0100
@@ -1,3 +1,23 @@
+2009-12-20 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/speed.h (SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL): New macro.
+ (SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL): New macro.
+ (SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL): New macro.
+ (SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL): New macro.
+ (SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL): New macro.
+ (SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL): New macro.
+ * tune/common.c (speed_mpn_toom63_mul): New function.
+ (speed_mpn_toom32_for_toom43_mul): New function.
+ (speed_mpn_toom43_for_toom32_mul): New function.
+ (speed_mpn_toom32_for_toom53_mul): New function.
+ (speed_mpn_toom53_for_toom32_mul): New function.
+ (speed_mpn_toom42_for_toom53_mul): New function.
+ (speed_mpn_toom53_for_toom42_mul): New function.
+ * tune/tuneup.c (tune_mul_n): New name for old tune_mul.
+ (tune_sqr_n): New name for old tune_sqr.
+ (tune_mul): New function, for unbalanced multiplication.
+ * gmp-impl.h: Provide declarations for corresponding threshold vars.
+
2009-12-19 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/toom_interpolate_8pts.c: Nailify.
diff -r 1a1bf750fe9f -r 83af94ff428b gmp-impl.h
--- a/gmp-impl.h Sat Dec 19 15:27:30 2009 +0100
+++ b/gmp-impl.h Sun Dec 20 03:22:24 2009 +0100
@@ -1050,6 +1050,9 @@
#define MPN_TOOM32_MUL_MINSIZE 10
#define MPN_TOOM42_MUL_MINSIZE 10
+#define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */
+#define MPN_TOOM53_MUL_MINSIZE 49 /* ??? */
+#define MPN_TOOM63_MUL_MINSIZE 49
#define mpn_sqr_diagonal __MPN(sqr_diagonal)
__GMP_DECLSPEC void mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
@@ -1651,6 +1654,22 @@
#define MUL_TOOM44_THRESHOLD 300
#endif
+#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 140
+#endif
+
+#ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 170
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 190
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 230
+#endif
+
/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a
normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat
binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
@@ -4134,6 +4153,22 @@
#define MUL_TOOM44_THRESHOLD mul_toom44_threshold
extern mp_size_t mul_toom44_threshold;
+#undef MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
+extern mp_size_t mul_toom32_to_toom43_threshold;
+
+#undef MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
+extern mp_size_t mul_toom32_to_toom53_threshold;
+
+#undef MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
+extern mp_size_t mul_toom42_to_toom53_threshold;
+
+#undef MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
+extern mp_size_t mul_toom42_to_toom63_threshold;
+
#undef MUL_FFT_THRESHOLD
#define MUL_FFT_THRESHOLD mul_fft_threshold
extern mp_size_t mul_fft_threshold;
diff -r 1a1bf750fe9f -r 83af94ff428b tune/common.c
--- a/tune/common.c Sat Dec 19 15:27:30 2009 +0100
+++ b/tune/common.c Sun Dec 20 03:22:24 2009 +0100
@@ -1034,6 +1034,41 @@
{
SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul);
}
+double
+speed_mpn_toom63_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
+}
+double
+speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
+}
+double
+speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
+}
+double
+speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
+}
double
speed_mpn_nussbaumer_mul (struct speed_params *s)
diff -r 1a1bf750fe9f -r 83af94ff428b tune/speed.h
--- a/tune/speed.h Sat Dec 19 15:27:30 2009 +0100
+++ b/tune/speed.h Sun Dec 20 03:22:24 2009 +0100
@@ -279,6 +279,13 @@
double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom32_for_toom43_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom43_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom32_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom53_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom42_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom53_for_toom42_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_bc_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_mulmod_bnm1_rounded __GMP_PROTO ((struct speed_params *s));
@@ -1125,6 +1132,47 @@
mpn_toom42_mul_itch (s->size, s->size/2), \
MPN_TOOM42_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
+ mpn_toom63_mul_itch (s->size, s->size/2), \
+ MPN_TOOM63_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \
+ mpn_toom32_mul_itch (s->size, 17*s->size/24), \
+ MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \
+ mpn_toom43_mul_itch (s->size, 17*s->size/24), \
+ MPN_TOOM43_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \
+ mpn_toom32_mul_itch (s->size, 19*s->size/30), \
+ MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \
+ mpn_toom53_mul_itch (s->size, 19*s->size/30), \
+ MPN_TOOM53_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \
+ mpn_toom42_mul_itch (s->size, 11*s->size/20), \
+ MPN_TOOM42_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \
+ mpn_toom53_mul_itch (s->size, 11*s->size/20), \
+ MPN_TOOM53_MUL_MINSIZE)
+
+
+
#define SPEED_ROUTINE_MPN_SQR_CALL(call) \
{ \
mp_ptr wp; \
diff -r 1a1bf750fe9f -r 83af94ff428b tune/tuneup.c
--- a/tune/tuneup.c Sat Dec 19 15:27:30 2009 +0100
+++ b/tune/tuneup.c Sun Dec 20 03:22:24 2009 +0100
@@ -150,6 +150,10 @@
mp_size_t mul_toom22_threshold = MP_SIZE_T_MAX;
mp_size_t mul_toom33_threshold = MUL_TOOM33_THRESHOLD_LIMIT;
mp_size_t mul_toom44_threshold = MUL_TOOM44_THRESHOLD_LIMIT;
+mp_size_t mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
mp_size_t mul_fft_threshold = MP_SIZE_T_MAX;
mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX;
mp_size_t sqr_basecase_threshold = MP_SIZE_T_MAX;
@@ -404,7 +408,7 @@
}
-#define PRINT_WIDTH 28
+#define PRINT_WIDTH 30
void
print_define_start (const char *name)
@@ -831,7 +835,7 @@
/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
giving wrong results. */
void
-tune_mul (void)
+tune_mul_n (void)
{
static struct param_t param;
@@ -856,6 +860,37 @@
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
}
+void
+tune_mul (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpn_toom32_for_toom43_mul;
+ param.function2 = speed_mpn_toom43_for_toom32_mul;
+ param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
+ param.min_size = MPN_TOOM43_MUL_MINSIZE;
+ one (&mul_toom32_to_toom43_threshold, ¶m);
+
+ param.function = speed_mpn_toom32_for_toom53_mul;
+ param.function2 = speed_mpn_toom53_for_toom32_mul;
+ param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
+ param.min_size = MPN_TOOM53_MUL_MINSIZE;
+ one (&mul_toom32_to_toom53_threshold, ¶m);
+
+ param.function = speed_mpn_toom42_for_toom53_mul;
+ param.function2 = speed_mpn_toom53_for_toom42_mul;
+ param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
+ param.min_size = MPN_TOOM53_MUL_MINSIZE;
+ one (&mul_toom42_to_toom53_threshold, ¶m);
+
+ param.function = speed_mpn_toom42_mul;
+ param.function2 = speed_mpn_toom63_mul;
+ param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
+ param.min_size = MPN_TOOM63_MUL_MINSIZE;
+ one (&mul_toom42_to_toom63_threshold, ¶m);
+
+}
+
void
tune_mullo (void)
@@ -932,7 +967,7 @@
just for that. Start karatsuba from 4 same as MUL above. */
void
-tune_sqr (void)
+tune_sqr_n (void)
{
/* disabled until tuned */
SQR_FFT_THRESHOLD = MP_SIZE_T_MAX;
@@ -1917,10 +1952,13 @@
}
printf ("\n");
+ tune_mul_n ();
+ printf("\n");
+
tune_mul ();
printf("\n");
- tune_sqr ();
+ tune_sqr_n ();
printf("\n");
tune_fft_mul ();
@@ -1938,8 +1976,12 @@
tune_dc_div ();
tune_dc_bdiv ();
+
+ printf("\n");
tune_invertappr ();
tune_invert ();
+ printf("\n");
+
More information about the gmp-commit
mailing list