[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Jan 25 14:31:32 CET 2010
details: /home/hgfiles/gmp/rev/2cc3831b3f86
changeset: 13397:2cc3831b3f86
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Jan 25 14:30:48 2010 +0100
description:
Update FFT tuneup code to avoid requests for not-yet-measured sizes.
details: /home/hgfiles/gmp/rev/77f525d9039a
changeset: 13398:77f525d9039a
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Jan 25 14:31:26 2010 +0100
description:
Add FFT_TABLE3 tables for more machines.
diffstat:
ChangeLog | 7 +++
mpn/powerpc64/mode64/p3/gmp-mparam.h | 72 +++++++++++++++++++++++++++++++++--
tune/tuneup.c | 33 +++++++++++-----
3 files changed, 96 insertions(+), 16 deletions(-)
diffs (201 lines):
diff -r d0498fa3feb5 -r 77f525d9039a ChangeLog
--- a/ChangeLog Mon Jan 25 00:43:54 2010 +0100
+++ b/ChangeLog Mon Jan 25 14:31:26 2010 +0100
@@ -1,3 +1,10 @@
+2010-01-25 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/tuneup.c (INSERT_FFTTAB): New macro, like old insertion code but
+ also inserting a sentinel.
+ (fftmes): Use INSERT_FFTTAB for inserting new measurements.
+ Limit k range to best_k - 4 ... best_k + 4.
+
2010-01-23 Torbjorn Granlund <tege at gmplib.org>
* gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
diff -r d0498fa3feb5 -r 77f525d9039a mpn/powerpc64/mode64/p3/gmp-mparam.h
--- a/mpn/powerpc64/mode64/p3/gmp-mparam.h Mon Jan 25 00:43:54 2010 +0100
+++ b/mpn/powerpc64/mode64/p3/gmp-mparam.h Mon Jan 25 14:31:26 2010 +0100
@@ -53,13 +53,75 @@
#define MULMOD_BNM1_THRESHOLD 8
#define SQRMOD_BNM1_THRESHOLD 9
-#define MUL_FFT_TABLE { 208, 416, 1088, 1792, 5120, 12288, 81920, 196608, 0 }
-#define MUL_FFT_MODF_THRESHOLD 224
+#define MUL_FFT_MODF_THRESHOLD 220 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 220, 5}, { 9, 6}, { 5, 5}, { 11, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 13, 8}, \
+ { 7, 7}, { 15, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 23,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 23,10}, { 15, 9}, \
+ { 35, 8}, { 71,10}, { 23, 9}, { 47,11}, \
+ { 15,10}, { 31, 9}, { 71,10}, { 39, 9}, \
+ { 79,10}, { 55,11}, { 31,10}, { 63, 9}, \
+ { 127,10}, { 71, 9}, { 143, 8}, { 287,10}, \
+ { 79,11}, { 47,10}, { 95, 9}, { 191,12}, \
+ { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \
+ { 511,10}, { 143, 9}, { 287,11}, { 79,10}, \
+ { 159, 9}, { 319, 8}, { 639,10}, { 175, 9}, \
+ { 351,11}, { 95,10}, { 191, 9}, { 383,11}, \
+ { 111,10}, { 223,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319, 9}, { 639,11}, \
+ { 175,10}, { 351,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 223,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 287,10}, \
+ { 575, 9}, { 1151,12}, { 159,11}, { 319,10}, \
+ { 639,11}, { 351,12}, { 191,11}, { 383,10}, \
+ { 767,12}, { 223,11}, { 447,10}, { 895,13}, \
+ { 127,12}, { 255,11}, { 511,12}, { 287,11}, \
+ { 575,10}, { 1151,12}, { 319,11}, { 639,12}, \
+ { 351,11}, { 703,13}, { 191,12}, { 383,11}, \
+ { 767,12}, { 415,11}, { 831,10}, { 1663,12}, \
+ { 447,11}, { 895,14}, { 16384,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 120
#define MUL_FFT_THRESHOLD 2688
-#define SQR_FFT_TABLE { 208, 416, 960, 1792, 5120, 12288, 49152, 196608, 0 }
-#define SQR_FFT_MODF_THRESHOLD 224
-#define SQR_FFT_THRESHOLD 2176
+#define SQR_FFT_MODF_THRESHOLD 188 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 188, 5}, { 9, 6}, { 5, 5}, { 11, 6}, \
+ { 13, 7}, { 13, 8}, { 7, 7}, { 16, 8}, \
+ { 9, 7}, { 19, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 23,10}, { 7, 9}, \
+ { 15, 8}, { 31, 9}, { 19, 8}, { 39, 9}, \
+ { 23,10}, { 15, 9}, { 39,10}, { 23,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79, 8}, { 159,10}, { 47, 9}, { 95, 8}, \
+ { 191,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255,10}, { 71, 9}, { 143, 8}, { 287,10}, \
+ { 79, 9}, { 159,11}, { 47,10}, { 95, 9}, \
+ { 191,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 511,10}, { 143, 9}, { 287,11}, \
+ { 79,10}, { 159, 9}, { 319, 8}, { 639,10}, \
+ { 175,11}, { 95,10}, { 191, 9}, { 383,11}, \
+ { 111,10}, { 223,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319, 9}, { 639,11}, \
+ { 175,12}, { 95,11}, { 191,10}, { 383, 9}, \
+ { 767,11}, { 223,13}, { 63,12}, { 127,11}, \
+ { 255,10}, { 511,11}, { 287,10}, { 575,12}, \
+ { 159,11}, { 319,10}, { 639,11}, { 351,12}, \
+ { 191,11}, { 383,10}, { 767,12}, { 223,11}, \
+ { 447,10}, { 895,13}, { 127,12}, { 255,11}, \
+ { 511,12}, { 287,11}, { 575,10}, { 1151,12}, \
+ { 319,11}, { 639,12}, { 351,13}, { 191,12}, \
+ { 383,11}, { 767,12}, { 447,11}, { 895,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 118
+#define SQR_FFT_THRESHOLD 1728
#define MULLO_BASECASE_THRESHOLD 3
#define MULLO_DC_THRESHOLD 27
diff -r d0498fa3feb5 -r 77f525d9039a tune/tuneup.c
--- a/tune/tuneup.c Mon Jan 25 00:43:54 2010 +0100
+++ b/tune/tuneup.c Mon Jan 25 14:31:26 2010 +0100
@@ -820,17 +820,28 @@
return t;
}
+#define INSERT_FFTTAB(idx, nval, kval) \
+ do { \
+ fft_tab[idx].n = nval; \
+ fft_tab[idx].k = kval; \
+ fft_tab[idx+1].n = -1; /* sentinel */ \
+ fft_tab[idx+1].k = -1; \
+ } while (0)
+
int
fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
{
mp_size_t n, n1, prev_n1;
- int k, best_k, last_best_k, start_k, kmax;
+ int k, best_k, last_best_k, kmax;
int eff, prev_eff;
double t0, t1;
int n_measurements;
mp_limb_t *ap, *bp, *rp;
mp_size_t alloc;
char *linepref;
+ struct fft_table_nk *fft_tab;
+
+ fft_tab = mpn_fft_table3[p->sqr];
for (k = 0; k < FFT_CACHE_SIZE; k++)
fft_cache[k].n = 0;
@@ -845,13 +856,12 @@
if (idx == 0)
{
- mpn_fft_table3[p->sqr][0].n = nmin;
- mpn_fft_table3[p->sqr][0].k = initial_k;
+ INSERT_FFTTAB (0, nmin, initial_k);
if (print)
{
printf ("\\\n { ");
- printf ("{%7u,%2u}", mpn_fft_table3[p->sqr][0].n, mpn_fft_table3[p->sqr][0].k);
+ printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
linepref = " ";
}
@@ -878,13 +888,16 @@
while (n < nmax)
{
+ int start_k, end_k;
+
/* Assume the current best k is best until we hit its next FFT step. */
t0 = 99999;
prev_n1 = n + 1;
start_k = MAX (4, best_k - 4);
- for (k = start_k; k <= 24; k++)
+ end_k = MIN (24, best_k + 4);
+ for (k = start_k; k <= end_k; k++)
{
n1 = mpn_fft_next_size (prev_n1, k);
@@ -941,15 +954,14 @@
printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
abort ();
}
- mpn_fft_table3[p->sqr][idx].n = prev_n1 >> last_best_k;
- mpn_fft_table3[p->sqr][idx].k = best_k;
+ INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
if (print)
{
printf (", ");
if (idx % 4 == 0)
printf ("\\\n ");
- printf ("{%7u,%2u}", mpn_fft_table3[p->sqr][idx].n, mpn_fft_table3[p->sqr][idx].k);
+ printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
}
if (option_trace >= 2)
@@ -985,15 +997,14 @@
printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
abort ();
}
- mpn_fft_table3[p->sqr][idx].n = ((1ul << (2*k-2)) + 1) >> (k-1);
- mpn_fft_table3[p->sqr][idx].k = k;
+ INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
if (print)
{
printf (", ");
if (idx % 4 == 0)
printf ("\\\n ");
- printf ("{%7u,%2u}", mpn_fft_table3[p->sqr][idx].n, mpn_fft_table3[p->sqr][idx].k);
+ printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
}
idx++;
More information about the gmp-commit
mailing list