[Gmp-commit] /home/hgfiles/gmp: Speed support for both mpn_toom6h_mul and mpn...

mercurial at gmplib.org mercurial at gmplib.org
Mon Dec 21 17:31:44 CET 2009


details:   /home/hgfiles/gmp/rev/506e2ca52488
changeset: 13162:506e2ca52488
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Dec 21 17:31:39 2009 +0100
description:
Speed support for both mpn_toom6h_mul and mpn_toom6_sqr.

diffstat:

 ChangeLog                |   7 +++++++
 gmp-impl.h               |  27 +++++++++++++++++++++++++--
 mpn/generic/toom6h_mul.c |  22 ++++------------------
 tune/common.c            |  10 ++++++++++
 tune/speed.c             |   2 ++
 tune/speed.h             |  13 +++++++++++++
 6 files changed, 61 insertions(+), 20 deletions(-)

diffs (202 lines):

diff -r ca8735ffba47 -r 506e2ca52488 ChangeLog
--- a/ChangeLog	Mon Dec 21 16:52:23 2009 +0100
+++ b/ChangeLog	Mon Dec 21 17:31:39 2009 +0100
@@ -21,6 +21,13 @@
 
 2009-12-21  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
+	* gmp-impl.h (mpn_toom6h_mul_itch): New inline function.
+	(MUL_TOOM6H_THRESHOLD): Default value.
+	(SQR_TOOM6_THRESHOLD): Default value.
+	* mpn/generic/toom6h_mul.c: Remove definitions moved to gmp-impl.h.
+	* tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+	mpn_toom6h_mul and mpn_toom6_sqr speed.
+
 	* mpn/generic/toom63_mul.c: Remove unused TMP_*.
 
 	* mpn/generic/toom_eval_pm2rexp.c: New file.
diff -r ca8735ffba47 -r 506e2ca52488 gmp-impl.h
--- a/gmp-impl.h	Mon Dec 21 16:52:23 2009 +0100
+++ b/gmp-impl.h	Mon Dec 21 17:31:39 2009 +0100
@@ -1048,6 +1048,9 @@
 #define MPN_TOOM44_MUL_MINSIZE   30
 #define MPN_TOOM4_SQR_MINSIZE    30
 
+#define MPN_TOOM6H_MUL_MINSIZE   46
+#define MPN_TOOM6_SQR_MINSIZE    46
+
 #define MPN_TOOM32_MUL_MINSIZE   10
 #define MPN_TOOM42_MUL_MINSIZE   10
 #define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
@@ -1137,8 +1140,8 @@
 #define   mpn_toom6h_mul __MPN(toom6h_mul)
 __GMP_DECLSPEC void      mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
 
-#define   mpn_toom6h_sqr __MPN(toom6h_sqr)
-__GMP_DECLSPEC void      mpn_toom6h_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_toom6_sqr __MPN(toom6_sqr)
+__GMP_DECLSPEC void      mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 
 #define   mpn_fft_best_k __MPN(fft_best_k)
 __GMP_DECLSPEC int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
@@ -1669,6 +1672,14 @@
 #define MUL_TOOM44_THRESHOLD            300
 #endif
 
+#ifndef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD            350
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
 #endif
@@ -4439,6 +4450,18 @@
 #define mpn_toom4_sqr_itch(an) \
   (3 * (an) + GMP_NUMB_BITS)
 
+#define mpn_toom6_sqr_itch(n)					\
+( ((n) - MUL_TOOM6H_THRESHOLD)*2 +					\
+   MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,			\
+       mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 10 + 1;
+  return mpn_toom6_sqr_itch( estimatedN * 6 );
+}
+
 static inline mp_size_t
 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
 {
diff -r ca8735ffba47 -r 506e2ca52488 mpn/generic/toom6h_mul.c
--- a/mpn/generic/toom6h_mul.c	Mon Dec 21 16:52:23 2009 +0100
+++ b/mpn/generic/toom6h_mul.c	Mon Dec 21 17:31:39 2009 +0100
@@ -76,27 +76,16 @@
   do {	mpn_mul (p, a, na, b, nb);			\
   } while (0)
 
-/* S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
-   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
- */
-#define mpn_toom6h_mul_n_itch(n)					\
-( ((n) - MUL_TOOM6H_THRESHOLD)*2 +					\
-   MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,			\
-       mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
-
-mp_size_t
-mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
-  mp_size_t estimatedN;
-  estimatedN = (an + bn) / (size_t) 10 + 1;
-  return mpn_toom6h_mul_n_itch( estimatedN * 6 );
-}
-
 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
    With: an >= bn >= 46, an*6 <  bn * 17.
    It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
 
    Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
 */
+/* Estimate on needed scratch:
+   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
+   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
+ */
 
 void
 mpn_toom6h_mul   (mp_ptr pp,
@@ -250,9 +239,6 @@
 
 
 
-#ifndef SQR_TOOM6_THRESHOLD
-#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
-#endif
 
 #ifdef  SQR_TOOM8_THRESHOLD
 #define SQR_TOOM6_MAX (SQR_TOOM8_THRESHOLD+6*2-1)
diff -r ca8735ffba47 -r 506e2ca52488 tune/common.c
--- a/tune/common.c	Mon Dec 21 16:52:23 2009 +0100
+++ b/tune/common.c	Mon Dec 21 17:31:39 2009 +0100
@@ -1009,6 +1009,11 @@
   SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
 }
 double
+speed_mpn_toom6_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
+}
+double
 speed_mpn_toom22_mul (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
@@ -1023,6 +1028,11 @@
 {
   SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
 }
+double
+speed_mpn_toom6h_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
+}
 
 double
 speed_mpn_toom32_mul (struct speed_params *s)
diff -r ca8735ffba47 -r 506e2ca52488 tune/speed.c
--- a/tune/speed.c	Mon Dec 21 16:52:23 2009 +0100
+++ b/tune/speed.c	Mon Dec 21 17:31:39 2009 +0100
@@ -294,9 +294,11 @@
   { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
   { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
+  { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
   { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
   { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
   { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
+  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
   { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
   { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
diff -r ca8735ffba47 -r 506e2ca52488 tune/speed.h
--- a/tune/speed.h	Mon Dec 21 16:52:23 2009 +0100
+++ b/tune/speed.h	Mon Dec 21 17:31:39 2009 +0100
@@ -276,9 +276,11 @@
 double speed_mpn_toom2_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom3_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom4_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom22_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom33_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6h_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
@@ -1122,6 +1124,12 @@
      mpn_toom44_mul_itch (s->size, s->size),				\
      MPN_TOOM44_MUL_MINSIZE)
 
+#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function)			\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
+     mpn_toom6h_mul_itch (s->size, s->size),				\
+     MPN_TOOM6H_MUL_MINSIZE)
+
 #define SPEED_ROUTINE_MPN_TOOM32_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
     (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace),		\
@@ -1254,6 +1262,11 @@
 				mpn_toom4_sqr_itch (s->size),		\
 				MPN_TOOM4_SQR_MINSIZE)
 
+#define SPEED_ROUTINE_MPN_TOOM6_SQR(function)				\
+  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
+				mpn_toom6_sqr_itch (s->size),		\
+				MPN_TOOM6_SQR_MINSIZE)
+
 #define SPEED_ROUTINE_MPN_MOD_CALL(call)				\
   {									\
     unsigned   i;							\


More information about the gmp-commit mailing list