[Gmp-commit] /home/hgfiles/gmp: Toom-6half and Toom-8half activation.
mercurial at gmplib.org
mercurial at gmplib.org
Thu Dec 24 18:11:33 CET 2009
details: /home/hgfiles/gmp/rev/0b1971e1a987
changeset: 13211:0b1971e1a987
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Dec 24 18:11:24 2009 +0100
description:
Toom-6half and Toom-8half activation.
diffstat:
ChangeLog | 7 +++++++
gmp-impl.h | 26 +++++++++++++++++++++-----
mpn/generic/mul_n.c | 36 +++++++++++++++++++-----------------
mpn/generic/sqr_n.c | 36 +++++++++++++++++++-----------------
tune/tuneup.c | 24 ++++++++++++++++++++++++
5 files changed, 90 insertions(+), 39 deletions(-)
diffs (247 lines):
diff -r 4689c4513d05 -r 0b1971e1a987 ChangeLog
--- a/ChangeLog Thu Dec 24 11:09:20 2009 +0100
+++ b/ChangeLog Thu Dec 24 18:11:24 2009 +0100
@@ -1,3 +1,10 @@
+2009-12-24 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/generic/mul_n.c: Use also toom6h and toom8h.
+ * mpn/generic/sqr_n.c: Use also toom6 and toom8.
+ * gmp-impl.h: Initial support for tuning of Toom-6half and Toom-8half.
+ * tune/tuneup.c: Tune Toom-6half and Toom-8half thresholds.
+
2009-12-24 Torbjorn Granlund <tege at gmplib.org>
* mpn/generic/powm_sec.c: Use SQR_TOOM2_THRESHOLD as limit for a native
diff -r 4689c4513d05 -r 0b1971e1a987 gmp-impl.h
--- a/gmp-impl.h Thu Dec 24 11:09:20 2009 +0100
+++ b/gmp-impl.h Thu Dec 24 18:11:24 2009 +0100
@@ -1035,10 +1035,6 @@
&& (size) >= (thresh)))
#define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
-#if WANT_FFT
-#define MPN_TOOM44_MAX_N 285405
-#endif /* WANT_FFT */
-
#define MPN_TOOM22_MUL_MINSIZE 4
#define MPN_TOOM2_SQR_MINSIZE 4
@@ -4207,6 +4203,14 @@
#define MUL_TOOM44_THRESHOLD mul_toom44_threshold
extern mp_size_t mul_toom44_threshold;
+#undef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold
+extern mp_size_t mul_toom6h_threshold;
+
+#undef MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold
+extern mp_size_t mul_toom8h_threshold;
+
#undef MUL_TOOM32_TO_TOOM43_THRESHOLD
#define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
extern mp_size_t mul_toom32_to_toom43_threshold;
@@ -4259,6 +4263,14 @@
#define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
extern mp_size_t sqr_toom4_threshold;
+#undef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD sqr_toom6_threshold
+extern mp_size_t sqr_toom6_threshold;
+
+#undef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD sqr_toom8_threshold
+extern mp_size_t sqr_toom8_threshold;
+
#undef SQR_FFT_THRESHOLD
#define SQR_FFT_THRESHOLD sqr_fft_threshold
extern mp_size_t sqr_fft_threshold;
@@ -4442,6 +4454,10 @@
#define SQR_TOOM3_THRESHOLD_LIMIT 400
#define MUL_TOOM44_THRESHOLD_LIMIT 1000
#define SQR_TOOM4_THRESHOLD_LIMIT 1000
+#define MUL_TOOM6H_THRESHOLD_LIMIT 1100
+#define SQR_TOOM6_THRESHOLD_LIMIT 1100
+#define MUL_TOOM8H_THRESHOLD_LIMIT 1200
+#define SQR_TOOM8_THRESHOLD_LIMIT 1200
#define MULLO_BASECASE_THRESHOLD_LIMIT 200
#define GET_STR_THRESHOLD_LIMIT 150
@@ -4478,7 +4494,7 @@
#define mpn_toom4_sqr_itch(an) \
(3 * (an) + GMP_NUMB_BITS)
-#define mpn_toom6_sqr_itch(n) \
+#define mpn_toom6_sqr_itch(n) \
( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
diff -r 4689c4513d05 -r 0b1971e1a987 mpn/generic/mul_n.c
--- a/mpn/generic/mul_n.c Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/generic/mul_n.c Thu Dec 24 18:11:24 2009 +0100
@@ -50,11 +50,7 @@
mpn_toom33_mul (p, a, n, b, n, ws);
TMP_SFREE;
}
-#if WANT_FFT || TUNE_PROGRAM_BUILD
- else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
-#else
- else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
-#endif
+ else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
{
mp_ptr ws;
TMP_SDECL;
@@ -63,22 +59,28 @@
mpn_toom44_mul (p, a, n, b, n, ws);
TMP_SFREE;
}
+ else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+ mpn_toom6h_mul (p, a, n, b, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_DECL;
+ TMP_MARK;
+ ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+ mpn_toom8h_mul (p, a, n, b, n, ws);
+ TMP_FREE;
+ }
else
-#if WANT_FFT || TUNE_PROGRAM_BUILD
{
/* The current FFT code allocates its own space. That should probably
change. */
mpn_fft_mul (p, a, n, b, n);
}
-#else
- {
- /* Toom4 for large operands. */
- mp_ptr ws;
- TMP_DECL;
- TMP_MARK;
- ws = TMP_BALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
- mpn_toom44_mul (p, a, n, b, n, ws);
- TMP_FREE;
- }
-#endif
}
diff -r 4689c4513d05 -r 0b1971e1a987 mpn/generic/sqr_n.c
--- a/mpn/generic/sqr_n.c Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/generic/sqr_n.c Thu Dec 24 18:11:24 2009 +0100
@@ -52,11 +52,7 @@
mpn_toom3_sqr (p, a, n, ws);
TMP_SFREE;
}
-#if WANT_FFT || TUNE_PROGRAM_BUILD
- else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
-#else
- else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
-#endif
+ else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
{
mp_ptr ws;
TMP_SDECL;
@@ -65,22 +61,28 @@
mpn_toom4_sqr (p, a, n, ws);
TMP_SFREE;
}
+ else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_SDECL;
+ TMP_SMARK;
+ ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+ mpn_toom6_sqr (p, a, n, ws);
+ TMP_SFREE;
+ }
+ else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+ {
+ mp_ptr ws;
+ TMP_DECL;
+ TMP_MARK;
+ ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+ mpn_toom8_sqr (p, a, n, ws);
+ TMP_FREE;
+ }
else
-#if WANT_FFT || TUNE_PROGRAM_BUILD
{
/* The current FFT code allocates its own space. That should probably
change. */
mpn_fft_mul (p, a, n, a, n);
}
-#else
- {
- /* Toom4 for large operands. */
- mp_ptr ws;
- TMP_DECL;
- TMP_MARK;
- ws = TMP_BALLOC_LIMBS (mpn_toom4_sqr_itch (n));
- mpn_toom4_sqr (p, a, n, ws);
- TMP_FREE;
- }
-#endif
}
diff -r 4689c4513d05 -r 0b1971e1a987 tune/tuneup.c
--- a/tune/tuneup.c Thu Dec 24 11:09:20 2009 +0100
+++ b/tune/tuneup.c Thu Dec 24 18:11:24 2009 +0100
@@ -150,6 +150,8 @@
mp_size_t mul_toom22_threshold = MP_SIZE_T_MAX;
mp_size_t mul_toom33_threshold = MUL_TOOM33_THRESHOLD_LIMIT;
mp_size_t mul_toom44_threshold = MUL_TOOM44_THRESHOLD_LIMIT;
+mp_size_t mul_toom6h_threshold = MUL_TOOM6H_THRESHOLD_LIMIT;
+mp_size_t mul_toom8h_threshold = MUL_TOOM8H_THRESHOLD_LIMIT;
mp_size_t mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
mp_size_t mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
mp_size_t mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
@@ -161,6 +163,8 @@
= (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
mp_size_t sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
mp_size_t sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
+mp_size_t sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
+mp_size_t sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
mp_size_t sqr_fft_threshold = MP_SIZE_T_MAX;
mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX;
@@ -856,6 +860,16 @@
param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
one (&mul_toom44_threshold, ¶m);
+ param.name = "MUL_TOOM6H_THRESHOLD";
+ param.min_size = MAX (mul_toom44_threshold, MPN_TOOM6H_MUL_MINSIZE);
+ param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
+ one (&mul_toom6h_threshold, ¶m);
+
+ param.name = "MUL_TOOM8H_THRESHOLD";
+ param.min_size = MAX (mul_toom6h_threshold, MPN_TOOM8H_MUL_MINSIZE);
+ param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
+ one (&mul_toom8h_threshold, ¶m);
+
/* disabled until tuned */
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
}
@@ -1044,6 +1058,16 @@
param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_MINSIZE);
param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
one (&sqr_toom4_threshold, ¶m);
+
+ param.name = "SQR_TOOM6_THRESHOLD";
+ param.min_size = MAX (sqr_toom4_threshold, MPN_TOOM6_SQR_MINSIZE);
+ param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
+ one (&sqr_toom6_threshold, ¶m);
+
+ param.name = "SQR_TOOM8_THRESHOLD";
+ param.min_size = MAX (sqr_toom6_threshold, MPN_TOOM8_SQR_MINSIZE);
+ param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
+ one (&sqr_toom8_threshold, ¶m);
}
}
More information about the gmp-commit
mailing list