[Gmp-commit] /home/hgfiles/gmp: Tune MUL_TOOM32_TO_TOOM43, MUL_TOOM32_TO_TOOM...

mercurial at gmplib.org mercurial at gmplib.org
Sun Dec 20 03:22:30 CET 2009


details:   /home/hgfiles/gmp/rev/83af94ff428b
changeset: 13136:83af94ff428b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Dec 20 03:22:24 2009 +0100
description:
Tune MUL_TOOM32_TO_TOOM43, MUL_TOOM32_TO_TOOM53, MUL_TOOM42_TO_TOOM53, MUL_TOOM42_TO_TOOM63.

diffstat:

 ChangeLog     |  20 ++++++++++++++++++++
 gmp-impl.h    |  35 +++++++++++++++++++++++++++++++++++
 tune/common.c |  35 +++++++++++++++++++++++++++++++++++
 tune/speed.h  |  48 ++++++++++++++++++++++++++++++++++++++++++++++++
 tune/tuneup.c |  50 ++++++++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 184 insertions(+), 4 deletions(-)

diffs (truncated from 303 to 300 lines):

diff -r 1a1bf750fe9f -r 83af94ff428b ChangeLog
--- a/ChangeLog	Sat Dec 19 15:27:30 2009 +0100
+++ b/ChangeLog	Sun Dec 20 03:22:24 2009 +0100
@@ -1,3 +1,23 @@
+2009-12-20  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/speed.h (SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL): New macro.
+	(SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL): New macro.
+	(SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL): New macro.
+	(SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL): New macro.
+	(SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL): New macro.
+	(SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL): New macro.
+	* tune/common.c (speed_mpn_toom63_mul): New function.
+	(speed_mpn_toom32_for_toom43_mul): New function.
+	(speed_mpn_toom43_for_toom32_mul): New function.
+	(speed_mpn_toom32_for_toom53_mul): New function.
+	(speed_mpn_toom53_for_toom32_mul): New function.
+	(speed_mpn_toom42_for_toom53_mul): New function.
+	(speed_mpn_toom53_for_toom42_mul): New function.
+	* tune/tuneup.c (tune_mul_n): New name for old tune_mul.
+	(tune_sqr_n): New name for old tune_sqr.
+	(tune_mul): New function, for unbalanced multiplication.
+	* gmp-impl.h: Provide declarations for corresponding threshold vars.
+
 2009-12-19  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/toom_interpolate_8pts.c: Nailify.
diff -r 1a1bf750fe9f -r 83af94ff428b gmp-impl.h
--- a/gmp-impl.h	Sat Dec 19 15:27:30 2009 +0100
+++ b/gmp-impl.h	Sun Dec 20 03:22:24 2009 +0100
@@ -1050,6 +1050,9 @@
 
 #define MPN_TOOM32_MUL_MINSIZE   10
 #define MPN_TOOM42_MUL_MINSIZE   10
+#define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
+#define MPN_TOOM53_MUL_MINSIZE   49 /* ??? */
+#define MPN_TOOM63_MUL_MINSIZE   49
 
 #define   mpn_sqr_diagonal __MPN(sqr_diagonal)
 __GMP_DECLSPEC void      mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
@@ -1651,6 +1654,22 @@
 #define MUL_TOOM44_THRESHOLD            300
 #endif
 
+#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD  140
+#endif
+
+#ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD  170
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD  190
+#endif
+
+#ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD  230
+#endif
+
 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
    normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
    binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
@@ -4134,6 +4153,22 @@
 #define MUL_TOOM44_THRESHOLD         mul_toom44_threshold
 extern mp_size_t                     mul_toom44_threshold;
 
+#undef  MUL_TOOM32_TO_TOOM43_THRESHOLD
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
+extern mp_size_t                       mul_toom32_to_toom43_threshold;
+
+#undef  MUL_TOOM32_TO_TOOM53_THRESHOLD
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
+extern mp_size_t                       mul_toom32_to_toom53_threshold;
+
+#undef  MUL_TOOM42_TO_TOOM53_THRESHOLD
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
+extern mp_size_t                       mul_toom42_to_toom53_threshold;
+
+#undef  MUL_TOOM42_TO_TOOM63_THRESHOLD
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
+extern mp_size_t                       mul_toom42_to_toom63_threshold;
+
 #undef  MUL_FFT_THRESHOLD
 #define MUL_FFT_THRESHOLD            mul_fft_threshold
 extern mp_size_t                     mul_fft_threshold;
diff -r 1a1bf750fe9f -r 83af94ff428b tune/common.c
--- a/tune/common.c	Sat Dec 19 15:27:30 2009 +0100
+++ b/tune/common.c	Sun Dec 20 03:22:24 2009 +0100
@@ -1034,6 +1034,41 @@
 {
   SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul);
 }
+double
+speed_mpn_toom63_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
+}
+double
+speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
+}
+double
+speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
+}
+double
+speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
+}
 
 double
 speed_mpn_nussbaumer_mul (struct speed_params *s)
diff -r 1a1bf750fe9f -r 83af94ff428b tune/speed.h
--- a/tune/speed.h	Sat Dec 19 15:27:30 2009 +0100
+++ b/tune/speed.h	Sun Dec 20 03:22:24 2009 +0100
@@ -279,6 +279,13 @@
 double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom32_for_toom43_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom43_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom32_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom53_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom42_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom53_for_toom42_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_bc_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_mulmod_bnm1_rounded __GMP_PROTO ((struct speed_params *s));
@@ -1125,6 +1132,47 @@
      mpn_toom42_mul_itch (s->size, s->size/2),				\
      MPN_TOOM42_MUL_MINSIZE)
 
+#define SPEED_ROUTINE_MPN_TOOM63_MUL(function)				\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
+     mpn_toom63_mul_itch (s->size, s->size/2),				\
+     MPN_TOOM63_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function)		\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),		\
+     mpn_toom32_mul_itch (s->size, 17*s->size/24),				\
+     MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function)		\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),		\
+     mpn_toom43_mul_itch (s->size, 17*s->size/24),				\
+     MPN_TOOM43_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function)		\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),		\
+     mpn_toom32_mul_itch (s->size, 19*s->size/30),				\
+     MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function)		\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),		\
+     mpn_toom53_mul_itch (s->size, 19*s->size/30),				\
+     MPN_TOOM53_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function)		\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),		\
+     mpn_toom42_mul_itch (s->size, 11*s->size/20),				\
+     MPN_TOOM42_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function)		\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),		\
+     mpn_toom53_mul_itch (s->size, 11*s->size/20),				\
+     MPN_TOOM53_MUL_MINSIZE)
+
+
+
 #define SPEED_ROUTINE_MPN_SQR_CALL(call)				\
   {									\
     mp_ptr    wp;							\
diff -r 1a1bf750fe9f -r 83af94ff428b tune/tuneup.c
--- a/tune/tuneup.c	Sat Dec 19 15:27:30 2009 +0100
+++ b/tune/tuneup.c	Sun Dec 20 03:22:24 2009 +0100
@@ -150,6 +150,10 @@
 mp_size_t  mul_toom22_threshold         = MP_SIZE_T_MAX;
 mp_size_t  mul_toom33_threshold         = MUL_TOOM33_THRESHOLD_LIMIT;
 mp_size_t  mul_toom44_threshold         = MUL_TOOM44_THRESHOLD_LIMIT;
+mp_size_t  mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
 mp_size_t  mul_fft_threshold            = MP_SIZE_T_MAX;
 mp_size_t  mul_fft_modf_threshold       = MP_SIZE_T_MAX;
 mp_size_t  sqr_basecase_threshold       = MP_SIZE_T_MAX;
@@ -404,7 +408,7 @@
 }
 
 
-#define PRINT_WIDTH  28
+#define PRINT_WIDTH  30
 
 void
 print_define_start (const char *name)
@@ -831,7 +835,7 @@
 /* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
    giving wrong results.  */
 void
-tune_mul (void)
+tune_mul_n (void)
 {
   static struct param_t  param;
 
@@ -856,6 +860,37 @@
   MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
 }
 
+void
+tune_mul (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpn_toom32_for_toom43_mul;
+  param.function2 = speed_mpn_toom43_for_toom32_mul;
+  param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
+  param.min_size = MPN_TOOM43_MUL_MINSIZE;
+  one (&mul_toom32_to_toom43_threshold, &param);
+
+  param.function = speed_mpn_toom32_for_toom53_mul;
+  param.function2 = speed_mpn_toom53_for_toom32_mul;
+  param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
+  param.min_size = MPN_TOOM53_MUL_MINSIZE;
+  one (&mul_toom32_to_toom53_threshold, &param);
+
+  param.function = speed_mpn_toom42_for_toom53_mul;
+  param.function2 = speed_mpn_toom53_for_toom42_mul;
+  param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
+  param.min_size = MPN_TOOM53_MUL_MINSIZE;
+  one (&mul_toom42_to_toom53_threshold, &param);
+
+  param.function = speed_mpn_toom42_mul;
+  param.function2 = speed_mpn_toom63_mul;
+  param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
+  param.min_size = MPN_TOOM63_MUL_MINSIZE;
+  one (&mul_toom42_to_toom63_threshold, &param);
+
+}
+
 
 void
 tune_mullo (void)
@@ -932,7 +967,7 @@
    just for that.  Start karatsuba from 4 same as MUL above.  */
 
 void
-tune_sqr (void)
+tune_sqr_n (void)
 {
   /* disabled until tuned */
   SQR_FFT_THRESHOLD = MP_SIZE_T_MAX;
@@ -1917,10 +1952,13 @@
   }
   printf ("\n");
 
+  tune_mul_n ();
+  printf("\n");
+
   tune_mul ();
   printf("\n");
 
-  tune_sqr ();
+  tune_sqr_n ();
   printf("\n");
 
   tune_fft_mul ();
@@ -1938,8 +1976,12 @@
 
   tune_dc_div ();
   tune_dc_bdiv ();
+
+  printf("\n");
   tune_invertappr ();
   tune_invert ();
+  printf("\n");
+


More information about the gmp-commit mailing list