[Gmp-commit] /home/hgfiles/gmp: Toom-6half and Toom-8half activation.

mercurial at gmplib.org mercurial at gmplib.org
Thu Dec 24 18:11:33 CET 2009


details:   /home/hgfiles/gmp/rev/0b1971e1a987
changeset: 13211:0b1971e1a987
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Dec 24 18:11:24 2009 +0100
description:
Toom-6half and Toom-8half activation.

diffstat:

 ChangeLog           |   7 +++++++
 gmp-impl.h          |  26 +++++++++++++++++++++-----
 mpn/generic/mul_n.c |  36 +++++++++++++++++++-----------------
 mpn/generic/sqr_n.c |  36 +++++++++++++++++++-----------------
 tune/tuneup.c       |  24 ++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 39 deletions(-)

diffs (247 lines):

diff -r 4689c4513d05 -r 0b1971e1a987 ChangeLog
--- a/ChangeLog	Thu Dec 24 11:09:20 2009 +0100
+++ b/ChangeLog	Thu Dec 24 18:11:24 2009 +0100
@@ -1,3 +1,10 @@
+2009-12-24  Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/generic/mul_n.c: Use also toom6h and toom8h.
+	* mpn/generic/sqr_n.c: Use also toom6 and toom8.
+	* gmp-impl.h: Initial support for tuning of Toom-6half and Toom-8half.
+	* tune/tuneup.c: Tune Toom-6half and Toom-8half thresholds.
+
 2009-12-24  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/powm_sec.c: Use SQR_TOOM2_THRESHOLD as limit for a native
diff -r 4689c4513d05 -r 0b1971e1a987 gmp-impl.h
--- a/gmp-impl.h	Thu Dec 24 11:09:20 2009 +0100
+++ b/gmp-impl.h	Thu Dec 24 18:11:24 2009 +0100
@@ -1035,10 +1035,6 @@
        && (size) >= (thresh)))
 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
 
-#if WANT_FFT
-#define MPN_TOOM44_MAX_N 285405
-#endif /* WANT_FFT */
-
 #define MPN_TOOM22_MUL_MINSIZE    4
 #define MPN_TOOM2_SQR_MINSIZE     4
 
@@ -4207,6 +4203,14 @@
 #define MUL_TOOM44_THRESHOLD         mul_toom44_threshold
 extern mp_size_t                     mul_toom44_threshold;
 
+#undef  MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD         mul_toom6h_threshold
+extern mp_size_t                     mul_toom6h_threshold;
+
+#undef  MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD         mul_toom8h_threshold
+extern mp_size_t                     mul_toom8h_threshold;
+
 #undef  MUL_TOOM32_TO_TOOM43_THRESHOLD
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
 extern mp_size_t                       mul_toom32_to_toom43_threshold;
@@ -4259,6 +4263,14 @@
 #define SQR_TOOM4_THRESHOLD          sqr_toom4_threshold
 extern mp_size_t                     sqr_toom4_threshold;
 
+#undef  SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD          sqr_toom6_threshold
+extern mp_size_t                     sqr_toom6_threshold;
+
+#undef  SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD          sqr_toom8_threshold
+extern mp_size_t                     sqr_toom8_threshold;
+
 #undef SQR_FFT_THRESHOLD
 #define SQR_FFT_THRESHOLD            sqr_fft_threshold
 extern mp_size_t                     sqr_fft_threshold;
@@ -4442,6 +4454,10 @@
 #define SQR_TOOM3_THRESHOLD_LIMIT       400
 #define MUL_TOOM44_THRESHOLD_LIMIT     1000
 #define SQR_TOOM4_THRESHOLD_LIMIT      1000
+#define MUL_TOOM6H_THRESHOLD_LIMIT     1100
+#define SQR_TOOM6_THRESHOLD_LIMIT      1100
+#define MUL_TOOM8H_THRESHOLD_LIMIT     1200
+#define SQR_TOOM8_THRESHOLD_LIMIT      1200
 #define MULLO_BASECASE_THRESHOLD_LIMIT  200
 #define GET_STR_THRESHOLD_LIMIT         150
 
@@ -4478,7 +4494,7 @@
 #define mpn_toom4_sqr_itch(an) \
   (3 * (an) + GMP_NUMB_BITS)
 
-#define mpn_toom6_sqr_itch(n)					\
+#define mpn_toom6_sqr_itch(n)						\
 ( ((n) - MUL_TOOM6H_THRESHOLD)*2 +					\
    MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,			\
        mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
diff -r 4689c4513d05 -r 0b1971e1a987 mpn/generic/mul_n.c
--- a/mpn/generic/mul_n.c	Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/generic/mul_n.c	Thu Dec 24 18:11:24 2009 +0100
@@ -50,11 +50,7 @@
       mpn_toom33_mul (p, a, n, b, n, ws);
       TMP_SFREE;
     }
-#if WANT_FFT || TUNE_PROGRAM_BUILD
-  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
-#else
-  else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
-#endif
+  else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
     {
       mp_ptr ws;
       TMP_SDECL;
@@ -63,22 +59,28 @@
       mpn_toom44_mul (p, a, n, b, n, ws);
       TMP_SFREE;
     }
+  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+      mpn_toom6h_mul (p, a, n, b, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+      mpn_toom8h_mul (p, a, n, b, n, ws);
+      TMP_FREE;
+    }
   else
-#if WANT_FFT || TUNE_PROGRAM_BUILD
     {
       /* The current FFT code allocates its own space.  That should probably
 	 change.  */
       mpn_fft_mul (p, a, n, b, n);
     }
-#else
-    {
-      /* Toom4 for large operands.  */
-      mp_ptr ws;
-      TMP_DECL;
-      TMP_MARK;
-      ws = TMP_BALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
-      mpn_toom44_mul (p, a, n, b, n, ws);
-      TMP_FREE;
-    }
-#endif
 }
diff -r 4689c4513d05 -r 0b1971e1a987 mpn/generic/sqr_n.c
--- a/mpn/generic/sqr_n.c	Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/generic/sqr_n.c	Thu Dec 24 18:11:24 2009 +0100
@@ -52,11 +52,7 @@
       mpn_toom3_sqr (p, a, n, ws);
       TMP_SFREE;
     }
-#if WANT_FFT || TUNE_PROGRAM_BUILD
-  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
-#else
-  else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
-#endif
+  else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
     {
       mp_ptr ws;
       TMP_SDECL;
@@ -65,22 +61,28 @@
       mpn_toom4_sqr (p, a, n, ws);
       TMP_SFREE;
     }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
+      mpn_toom6_sqr (p, a, n, ws);
+      TMP_SFREE;
+    }
+  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+    {
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
+      mpn_toom8_sqr (p, a, n, ws);
+      TMP_FREE;
+    }
   else
-#if WANT_FFT || TUNE_PROGRAM_BUILD
     {
       /* The current FFT code allocates its own space.  That should probably
 	 change.  */
       mpn_fft_mul (p, a, n, a, n);
     }
-#else
-    {
-      /* Toom4 for large operands.  */
-      mp_ptr ws;
-      TMP_DECL;
-      TMP_MARK;
-      ws = TMP_BALLOC_LIMBS (mpn_toom4_sqr_itch (n));
-      mpn_toom4_sqr (p, a, n, ws);
-      TMP_FREE;
-    }
-#endif
 }
diff -r 4689c4513d05 -r 0b1971e1a987 tune/tuneup.c
--- a/tune/tuneup.c	Thu Dec 24 11:09:20 2009 +0100
+++ b/tune/tuneup.c	Thu Dec 24 18:11:24 2009 +0100
@@ -150,6 +150,8 @@
 mp_size_t  mul_toom22_threshold         = MP_SIZE_T_MAX;
 mp_size_t  mul_toom33_threshold         = MUL_TOOM33_THRESHOLD_LIMIT;
 mp_size_t  mul_toom44_threshold         = MUL_TOOM44_THRESHOLD_LIMIT;
+mp_size_t  mul_toom6h_threshold         = MUL_TOOM6H_THRESHOLD_LIMIT;
+mp_size_t  mul_toom8h_threshold         = MUL_TOOM8H_THRESHOLD_LIMIT;
 mp_size_t  mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
 mp_size_t  mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
 mp_size_t  mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
@@ -161,6 +163,8 @@
   = (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
 mp_size_t  sqr_toom3_threshold          = SQR_TOOM3_THRESHOLD_LIMIT;
 mp_size_t  sqr_toom4_threshold          = SQR_TOOM4_THRESHOLD_LIMIT;
+mp_size_t  sqr_toom6_threshold          = SQR_TOOM6_THRESHOLD_LIMIT;
+mp_size_t  sqr_toom8_threshold          = SQR_TOOM8_THRESHOLD_LIMIT;
 mp_size_t  sqr_fft_threshold            = MP_SIZE_T_MAX;
 mp_size_t  sqr_fft_modf_threshold       = MP_SIZE_T_MAX;
 mp_size_t  mullo_basecase_threshold     = MP_SIZE_T_MAX;
@@ -856,6 +860,16 @@
   param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
   one (&mul_toom44_threshold, &param);
 
+  param.name = "MUL_TOOM6H_THRESHOLD";
+  param.min_size = MAX (mul_toom44_threshold, MPN_TOOM6H_MUL_MINSIZE);
+  param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
+  one (&mul_toom6h_threshold, &param);
+
+  param.name = "MUL_TOOM8H_THRESHOLD";
+  param.min_size = MAX (mul_toom6h_threshold, MPN_TOOM8H_MUL_MINSIZE);
+  param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
+  one (&mul_toom8h_threshold, &param);
+
   /* disabled until tuned */
   MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
 }
@@ -1044,6 +1058,16 @@
     param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_MINSIZE);
     param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
     one (&sqr_toom4_threshold, &param);
+
+    param.name = "SQR_TOOM6_THRESHOLD";
+    param.min_size = MAX (sqr_toom4_threshold, MPN_TOOM6_SQR_MINSIZE);
+    param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
+    one (&sqr_toom6_threshold, &param);
+
+    param.name = "SQR_TOOM8_THRESHOLD";
+    param.min_size = MAX (sqr_toom6_threshold, MPN_TOOM8_SQR_MINSIZE);
+    param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
+    one (&sqr_toom8_threshold, &param);
   }
 }
 


More information about the gmp-commit mailing list