[Gmp-commit] /home/hgfiles/gmp: Speed support for both mpn_toom6h_mul and mpn...
mercurial at gmplib.org
mercurial at gmplib.org
Mon Dec 21 17:31:44 CET 2009
details: /home/hgfiles/gmp/rev/506e2ca52488
changeset: 13162:506e2ca52488
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Dec 21 17:31:39 2009 +0100
description:
Speed support for both mpn_toom6h_mul and mpn_toom6_sqr.
diffstat:
ChangeLog | 7 +++++++
gmp-impl.h | 27 +++++++++++++++++++++++++--
mpn/generic/toom6h_mul.c | 22 ++++------------------
tune/common.c | 10 ++++++++++
tune/speed.c | 2 ++
tune/speed.h | 13 +++++++++++++
6 files changed, 61 insertions(+), 20 deletions(-)
diffs (202 lines):
diff -r ca8735ffba47 -r 506e2ca52488 ChangeLog
--- a/ChangeLog Mon Dec 21 16:52:23 2009 +0100
+++ b/ChangeLog Mon Dec 21 17:31:39 2009 +0100
@@ -21,6 +21,13 @@
2009-12-21 Marco Bodrato <bodrato at mail.dm.unipi.it>
+ * gmp-impl.h (mpn_toom6h_mul_itch): New inline function.
+ (MUL_TOOM6H_THRESHOLD): Default value.
+ (SQR_TOOM6_THRESHOLD): Default value.
+ * mpn/generic/toom6h_mul.c: Remove definitions moved to gmp-impl.h.
+ * tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+ mpn_toom6h_mul and mpn_toom6_sqr speed.
+
* mpn/generic/toom63_mul.c: Remove unused TMP_*.
* mpn/generic/toom_eval_pm2rexp.c: New file.
diff -r ca8735ffba47 -r 506e2ca52488 gmp-impl.h
--- a/gmp-impl.h Mon Dec 21 16:52:23 2009 +0100
+++ b/gmp-impl.h Mon Dec 21 17:31:39 2009 +0100
@@ -1048,6 +1048,9 @@
#define MPN_TOOM44_MUL_MINSIZE 30
#define MPN_TOOM4_SQR_MINSIZE 30
+#define MPN_TOOM6H_MUL_MINSIZE 46
+#define MPN_TOOM6_SQR_MINSIZE 46
+
#define MPN_TOOM32_MUL_MINSIZE 10
#define MPN_TOOM42_MUL_MINSIZE 10
#define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */
@@ -1137,8 +1140,8 @@
#define mpn_toom6h_mul __MPN(toom6h_mul)
__GMP_DECLSPEC void mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
-#define mpn_toom6h_sqr __MPN(toom6h_sqr)
-__GMP_DECLSPEC void mpn_toom6h_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_toom6_sqr __MPN(toom6_sqr)
+__GMP_DECLSPEC void mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_fft_best_k __MPN(fft_best_k)
__GMP_DECLSPEC int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
@@ -1669,6 +1672,14 @@
#define MUL_TOOM44_THRESHOLD 300
#endif
+#ifndef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD 350
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
#endif
@@ -4439,6 +4450,18 @@
#define mpn_toom4_sqr_itch(an) \
(3 * (an) + GMP_NUMB_BITS)
+#define mpn_toom6_sqr_itch(n) \
+( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
+ MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
+ mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
+ mp_size_t estimatedN;
+ estimatedN = (an + bn) / (size_t) 10 + 1;
+ return mpn_toom6_sqr_itch( estimatedN * 6 );
+}
+
static inline mp_size_t
mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
{
diff -r ca8735ffba47 -r 506e2ca52488 mpn/generic/toom6h_mul.c
--- a/mpn/generic/toom6h_mul.c Mon Dec 21 16:52:23 2009 +0100
+++ b/mpn/generic/toom6h_mul.c Mon Dec 21 17:31:39 2009 +0100
@@ -76,27 +76,16 @@
do { mpn_mul (p, a, na, b, nb); \
} while (0)
-/* S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
- since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
- */
-#define mpn_toom6h_mul_n_itch(n) \
-( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
- MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
- mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
-
-mp_size_t
-mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
- mp_size_t estimatedN;
- estimatedN = (an + bn) / (size_t) 10 + 1;
- return mpn_toom6h_mul_n_itch( estimatedN * 6 );
-}
-
/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
With: an >= bn >= 46, an*6 < bn * 17.
It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
*/
+/* Estimate on needed scratch:
+ S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
+ since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
+ */
void
mpn_toom6h_mul (mp_ptr pp,
@@ -250,9 +239,6 @@
-#ifndef SQR_TOOM6_THRESHOLD
-#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
-#endif
#ifdef SQR_TOOM8_THRESHOLD
#define SQR_TOOM6_MAX (SQR_TOOM8_THRESHOLD+6*2-1)
diff -r ca8735ffba47 -r 506e2ca52488 tune/common.c
--- a/tune/common.c Mon Dec 21 16:52:23 2009 +0100
+++ b/tune/common.c Mon Dec 21 17:31:39 2009 +0100
@@ -1009,6 +1009,11 @@
SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
}
double
+speed_mpn_toom6_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
+}
+double
speed_mpn_toom22_mul (struct speed_params *s)
{
SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
@@ -1023,6 +1028,11 @@
{
SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
}
+double
+speed_mpn_toom6h_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
+}
double
speed_mpn_toom32_mul (struct speed_params *s)
diff -r ca8735ffba47 -r 506e2ca52488 tune/speed.c
--- a/tune/speed.c Mon Dec 21 16:52:23 2009 +0100
+++ b/tune/speed.c Mon Dec 21 17:31:39 2009 +0100
@@ -294,9 +294,11 @@
{ "mpn_toom2_sqr", speed_mpn_toom2_sqr },
{ "mpn_toom3_sqr", speed_mpn_toom3_sqr },
{ "mpn_toom4_sqr", speed_mpn_toom4_sqr },
+ { "mpn_toom6_sqr", speed_mpn_toom6_sqr },
{ "mpn_toom22_mul", speed_mpn_toom22_mul },
{ "mpn_toom33_mul", speed_mpn_toom33_mul },
{ "mpn_toom44_mul", speed_mpn_toom44_mul },
+ { "mpn_toom6h_mul", speed_mpn_toom6h_mul },
{ "mpn_toom32_mul", speed_mpn_toom32_mul },
{ "mpn_toom42_mul", speed_mpn_toom42_mul },
{ "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul },
diff -r ca8735ffba47 -r 506e2ca52488 tune/speed.h
--- a/tune/speed.h Mon Dec 21 16:52:23 2009 +0100
+++ b/tune/speed.h Mon Dec 21 17:31:39 2009 +0100
@@ -276,9 +276,11 @@
double speed_mpn_toom2_sqr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom3_sqr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom4_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6_sqr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom22_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom33_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6h_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
@@ -1122,6 +1124,12 @@
mpn_toom44_mul_itch (s->size, s->size), \
MPN_TOOM44_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
+ mpn_toom6h_mul_itch (s->size, s->size), \
+ MPN_TOOM6H_MUL_MINSIZE)
+
#define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace), \
@@ -1254,6 +1262,11 @@
mpn_toom4_sqr_itch (s->size), \
MPN_TOOM4_SQR_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM6_SQR(function) \
+ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+ mpn_toom6_sqr_itch (s->size), \
+ MPN_TOOM6_SQR_MINSIZE)
+
#define SPEED_ROUTINE_MPN_MOD_CALL(call) \
{ \
unsigned i; \
More information about the gmp-commit
mailing list