[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Jan 25 14:31:32 CET 2010


details:   /home/hgfiles/gmp/rev/2cc3831b3f86
changeset: 13397:2cc3831b3f86
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Jan 25 14:30:48 2010 +0100
description:
Update FFT tuneup code to avoid requests for not-yet-measured sizes.

details:   /home/hgfiles/gmp/rev/77f525d9039a
changeset: 13398:77f525d9039a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Jan 25 14:31:26 2010 +0100
description:
Add FFT_TABLE3 tables for more machines.

diffstat:

 ChangeLog                            |   7 +++
 mpn/powerpc64/mode64/p3/gmp-mparam.h |  72 +++++++++++++++++++++++++++++++++--
 tune/tuneup.c                        |  33 +++++++++++-----
 3 files changed, 96 insertions(+), 16 deletions(-)

diffs (201 lines):

diff -r d0498fa3feb5 -r 77f525d9039a ChangeLog
--- a/ChangeLog	Mon Jan 25 00:43:54 2010 +0100
+++ b/ChangeLog	Mon Jan 25 14:31:26 2010 +0100
@@ -1,3 +1,10 @@
+2010-01-25  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/tuneup.c (INSERT_FFTTAB): New macro, like old insertion code but
+	also inserting a sentinel.
+	(fftmes): Use INSERT_FFTTAB for inserting new measurements.
+	Limit k range to best_k - 4 ... best_k + 4.
+
 2010-01-23  Torbjorn Granlund  <tege at gmplib.org>
 
 	* gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
diff -r d0498fa3feb5 -r 77f525d9039a mpn/powerpc64/mode64/p3/gmp-mparam.h
--- a/mpn/powerpc64/mode64/p3/gmp-mparam.h	Mon Jan 25 00:43:54 2010 +0100
+++ b/mpn/powerpc64/mode64/p3/gmp-mparam.h	Mon Jan 25 14:31:26 2010 +0100
@@ -53,13 +53,75 @@
 #define MULMOD_BNM1_THRESHOLD                8
 #define SQRMOD_BNM1_THRESHOLD                9
 
-#define MUL_FFT_TABLE  { 208, 416, 1088, 1792, 5120, 12288, 81920, 196608, 0 }
-#define MUL_FFT_MODF_THRESHOLD             224
+#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    220, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {     13, 8}, \
+    {      7, 7}, {     15, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     23,10}, {     15, 9}, \
+    {     35, 8}, {     71,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
+    {     79,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    127,10}, {     71, 9}, {    143, 8}, {    287,10}, \
+    {     79,11}, {     47,10}, {     95, 9}, {    191,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    143, 9}, {    287,11}, {     79,10}, \
+    {    159, 9}, {    319, 8}, {    639,10}, {    175, 9}, \
+    {    351,11}, {     95,10}, {    191, 9}, {    383,11}, \
+    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    223,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
+    {    575, 9}, {   1151,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,12}, {    191,11}, {    383,10}, \
+    {    767,12}, {    223,11}, {    447,10}, {    895,13}, \
+    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
+    {    575,10}, {   1151,12}, {    319,11}, {    639,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
+    {    447,11}, {    895,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 120
 #define MUL_FFT_THRESHOLD                 2688
 
-#define SQR_FFT_TABLE  { 208, 416, 960, 1792, 5120, 12288, 49152, 196608, 0 }
-#define SQR_FFT_MODF_THRESHOLD             224
-#define SQR_FFT_THRESHOLD                 2176
+#define SQR_FFT_MODF_THRESHOLD             188  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    188, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
+    {     13, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
+    {      9, 7}, {     19, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
+    {     15, 8}, {     31, 9}, {     19, 8}, {     39, 9}, \
+    {     23,10}, {     15, 9}, {     39,10}, {     23,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79, 8}, {    159,10}, {     47, 9}, {     95, 8}, \
+    {    191,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     71, 9}, {    143, 8}, {    287,10}, \
+    {     79, 9}, {    159,11}, {     47,10}, {     95, 9}, \
+    {    191,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511,10}, {    143, 9}, {    287,11}, \
+    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
+    {    175,11}, {     95,10}, {    191, 9}, {    383,11}, \
+    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,11}, {    223,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    287,10}, {    575,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,12}, \
+    {    191,11}, {    383,10}, {    767,12}, {    223,11}, \
+    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
+    {    511,12}, {    287,11}, {    575,10}, {   1151,12}, \
+    {    319,11}, {    639,12}, {    351,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    447,11}, {    895,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 118
+#define SQR_FFT_THRESHOLD                 1728
 
 #define MULLO_BASECASE_THRESHOLD             3
 #define MULLO_DC_THRESHOLD                  27
diff -r d0498fa3feb5 -r 77f525d9039a tune/tuneup.c
--- a/tune/tuneup.c	Mon Jan 25 00:43:54 2010 +0100
+++ b/tune/tuneup.c	Mon Jan 25 14:31:26 2010 +0100
@@ -820,17 +820,28 @@
   return t;
 }
 
+#define INSERT_FFTTAB(idx, nval, kval)					\
+  do {									\
+    fft_tab[idx].n = nval;						\
+    fft_tab[idx].k = kval;						\
+    fft_tab[idx+1].n = -1;	/* sentinel */				\
+    fft_tab[idx+1].k = -1;						\
+  } while (0)
+
 int
 fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
 {
   mp_size_t n, n1, prev_n1;
-  int k, best_k, last_best_k, start_k, kmax;
+  int k, best_k, last_best_k, kmax;
   int eff, prev_eff;
   double t0, t1;
   int n_measurements;
   mp_limb_t *ap, *bp, *rp;
   mp_size_t alloc;
   char *linepref;
+  struct fft_table_nk *fft_tab;
+
+  fft_tab = mpn_fft_table3[p->sqr];
 
   for (k = 0; k < FFT_CACHE_SIZE; k++)
     fft_cache[k].n = 0;
@@ -845,13 +856,12 @@
 
   if (idx == 0)
     {
-      mpn_fft_table3[p->sqr][0].n = nmin;
-      mpn_fft_table3[p->sqr][0].k = initial_k;
+      INSERT_FFTTAB (0, nmin, initial_k);
 
       if (print)
 	{
 	  printf ("\\\n  { ");
-	  printf ("{%7u,%2u}", mpn_fft_table3[p->sqr][0].n, mpn_fft_table3[p->sqr][0].k);
+	  printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
 	  linepref = "    ";
 	}
 
@@ -878,13 +888,16 @@
 
   while (n < nmax)
     {
+      int start_k, end_k;
+
       /* Assume the current best k is best until we hit its next FFT step.  */
       t0 = 99999;
 
       prev_n1 = n + 1;
 
       start_k = MAX (4, best_k - 4);
-      for (k = start_k; k <= 24; k++)
+      end_k = MIN (24, best_k + 4);
+      for (k = start_k; k <= end_k; k++)
 	{
           n1 = mpn_fft_next_size (prev_n1, k);
 
@@ -941,15 +954,14 @@
 	      printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
 	      abort ();
 	    }
-	  mpn_fft_table3[p->sqr][idx].n = prev_n1 >> last_best_k;
-	  mpn_fft_table3[p->sqr][idx].k = best_k;
+	  INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
 
 	  if (print)
 	    {
 	      printf (", ");
 	      if (idx % 4 == 0)
 		printf ("\\\n    ");
-	      printf ("{%7u,%2u}", mpn_fft_table3[p->sqr][idx].n, mpn_fft_table3[p->sqr][idx].k);
+	      printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
 	    }
 
 	  if (option_trace >= 2)
@@ -985,15 +997,14 @@
 	  printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
 	  abort ();
 	}
-      mpn_fft_table3[p->sqr][idx].n = ((1ul << (2*k-2)) + 1) >> (k-1);
-      mpn_fft_table3[p->sqr][idx].k = k;
+      INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
 
       if (print)
 	{
 	  printf (", ");
 	  if (idx % 4 == 0)
 	    printf ("\\\n    ");
-	  printf ("{%7u,%2u}", mpn_fft_table3[p->sqr][idx].n, mpn_fft_table3[p->sqr][idx].k);
+	  printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
 	}
 
       idx++;


More information about the gmp-commit mailing list