[Gmp-commit] /var/hg/gmp-5.0: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Jan 24 18:55:50 CET 2012


details:   /var/hg/gmp-5.0/rev/18ab312f00fe
changeset: 13518:18ab312f00fe
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jan 24 12:02:27 2012 +0100
description:
Merge tune/tuneup.c mod_1 changes from mainline repo.

details:   /var/hg/gmp-5.0/rev/c84a9f37b310
changeset: 13519:c84a9f37b310
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jan 24 18:46:04 2012 +0100
description:
Tuneup.

diffstat:

 ChangeLog                            |   17 ++++
 mpn/powerpc64/mode64/p7/gmp-mparam.h |  131 +++++++++++++++++++++++--------
 mpn/x86/p6/sse2/gmp-mparam.h         |   10 +-
 mpn/x86_64/atom/gmp-mparam.h         |   77 ++++++++++--------
 mpn/x86_64/bobcat/gmp-mparam.h       |  113 ++++++++++++++++++---------
 mpn/x86_64/core2/gmp-mparam.h        |  142 +++++++++++++++++-----------------
 mpn/x86_64/coreisbr/gmp-mparam.h     |  119 ++++++++++++++++++++--------
 tune/tuneup.c                        |   78 ++++++++++--------
 8 files changed, 435 insertions(+), 252 deletions(-)

diffs (truncated from 895 to 300 lines):

diff -r 3b7cf3fa616f -r c84a9f37b310 ChangeLog
--- a/ChangeLog	Mon Jan 23 20:40:07 2012 +0100
+++ b/ChangeLog	Tue Jan 24 18:46:04 2012 +0100
@@ -355,10 +355,27 @@
 
 	* mpn/generic/redc_2.c: Use asm code just for GNU C.
 
+2010-05-06  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/tuneup.c (tune_mod_1): Fix typo.
+
 2010-05-05  Torbjorn Granlund  <tege at gmplib.org>
 
 	* longlong.h: Undo 2009-03-01 change for powerpc64, it gives poor code.
 
+	* tune/tuneup.c (tune_mod_1): Use more typical divisor, for the benefit
+	of machines with early-out multipliers.
+
+2010-05-04  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/tuneup.c (tune_mod_1): Fix typo.
+
+2010-05-03  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/tuneup.c (tune_mod_1): Measure MOD_1_1_TO_MOD_1_2_THRESHOLD and
+	MOD_1_2_TO_MOD_1_4_THRESHOLD before MOD_1U_TO_MOD_1_1_THRESHOLD for
+	correctness.
+
 2010-04-10  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86/divrem_2.asm: Use "orb" instead of "or" to work around
diff -r 3b7cf3fa616f -r c84a9f37b310 mpn/powerpc64/mode64/p7/gmp-mparam.h
--- a/mpn/powerpc64/mode64/p7/gmp-mparam.h	Mon Jan 23 20:40:07 2012 +0100
+++ b/mpn/powerpc64/mode64/p7/gmp-mparam.h	Tue Jan 24 18:46:04 2012 +0100
@@ -28,57 +28,87 @@
 #define MOD_1N_TO_MOD_1_1_THRESHOLD         11
 #define MOD_1U_TO_MOD_1_1_THRESHOLD          6
 #define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        13
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
 #define USE_PREINV_DIVREM_1                  1
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           30
+#define BMOD_1_TO_MOD_1_THRESHOLD           28
 
 #define MUL_TOOM22_THRESHOLD                22
-#define MUL_TOOM33_THRESHOLD                73
-#define MUL_TOOM44_THRESHOLD               202
+#define MUL_TOOM33_THRESHOLD                64
+#define MUL_TOOM44_THRESHOLD               296
 #define MUL_TOOM6H_THRESHOLD               393
 #define MUL_TOOM8H_THRESHOLD               592
 
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD     137
 #define MUL_TOOM32_TO_TOOM53_THRESHOLD     149
 #define MUL_TOOM42_TO_TOOM53_THRESHOLD     137
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     151
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     149
 
 #define SQR_BASECASE_THRESHOLD              18
 #define SQR_TOOM2_THRESHOLD                 64
 #define SQR_TOOM3_THRESHOLD                 89
-#define SQR_TOOM4_THRESHOLD                175
-#define SQR_TOOM6_THRESHOLD                294
-#define SQR_TOOM8_THRESHOLD                406
+#define SQR_TOOM4_THRESHOLD                184
+#define SQR_TOOM6_THRESHOLD                270
+#define SQR_TOOM8_THRESHOLD                357
 
 #define MULMOD_BNM1_THRESHOLD               17
 #define SQRMOD_BNM1_THRESHOLD               13
 
-#define MUL_FFT_MODF_THRESHOLD             404  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             408  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    404, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+  { {    408, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
     {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
     {     23, 7}, {     12, 6}, {     25, 7}, {     13, 6}, \
-    {     27, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
-    {     13, 7}, {     28, 8}, {     15, 7}, {     33, 8}, \
-    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     27, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
     {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
     {     43, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
     {     15, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
     {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    135,11}, {     79,10}, {    159,11}, {     95,10}, \
-    {    191,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319,12}, {     95, 9}, \
-    {    767,11}, {    207,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 81
+    {     67,10}, {     39, 9}, {     79,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
+    {    143, 7}, {   2303,10}, {    303,11}, {    159,10}, \
+    {    319, 9}, {    639,12}, {     95,11}, {    191,10}, \
+    {    383,13}, {     63,12}, {    127,11}, {    255,10}, \
+    {    511,11}, {    271,10}, {    543,11}, {    287,10}, \
+    {    575,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671, 9}, {   1343,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    799,11}, \
+    {    415,10}, {    831,12}, {    223,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,10}, {   1087,12}, {    287,11}, {    575,10}, \
+    {   1151,11}, {    607,10}, {   1215,12}, {    319,11}, \
+    {    639,10}, {   1279,11}, {    671,10}, {   1343,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    799,10}, {   1599,12}, {    415,11}, {    831,10}, \
+    {   1663,12}, {    447,11}, {    895,14}, {    127,13}, \
+    {    255,12}, {    543,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1407,12}, {    735,13}, \
+    {    447,12}, {    959,11}, {   1919,14}, {    255,12}, \
+    {   1087,13}, {    575,12}, {   1215,13}, {    639,12}, \
+    {   1343,11}, {   2687,12}, {   1471,14}, {    383,13}, \
+    {    767,12}, {   1599,13}, {    831,10}, {   6655,12}, \
+    {   1727,13}, {    959,12}, {   1919,11}, {   3839,14}, \
+    {    511,11}, {   4095,13}, {   1087,12}, {   2303,13}, \
+    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1471,12}, {   2943,14}, {    767,13}, \
+    {   1599,12}, {   3199,13}, {   1663,14}, {    895,13}, \
+    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
+    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
+    {   1407,13}, {   2943,15}, {    767,14}, {   1663,13}, \
+    {   3327,12}, {   6655,14}, {   1919,13}, {   3839,16}, \
+    {    511,15}, {   1023,14}, {   2175,13}, {   4351,14}, \
+    {   2303,12}, {   9215,13}, {   4863,15}, {   1279,13}, \
+    {   5119,14}, {   2815,13}, {   5887,15}, {   1535,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 202
 #define MUL_FFT_THRESHOLD                 3712
 
 #define SQR_FFT_MODF_THRESHOLD             332  /* k = 5 */
@@ -91,19 +121,52 @@
     {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
     {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
     {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     79,11}, \
+    {     79,10}, {     55,11}, {     31,10}, {     79,11}, \
     {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127,11}, {     79, 9}, {    319, 8}, {    639,10}, \
-    {    175,11}, {     95,10}, {    191, 9}, {    383,12}, \
+    {    127, 9}, {    255,11}, {     79,10}, {    159, 9}, \
+    {    319,11}, {     95,10}, {    191, 9}, {    383,12}, \
     {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
     {    271, 9}, {    543,11}, {    143,10}, {    287, 9}, \
     {    575,10}, {    303, 9}, {    607,10}, {    319, 9}, \
-    {    639,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 76
-#define SQR_FFT_THRESHOLD                 2880
+    {    639,12}, {     95,11}, {    191,10}, {    383,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
+    {    271,10}, {    543, 9}, {   1087,11}, {    287,10}, \
+    {    575,11}, {    303,10}, {    607, 9}, {   1215,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,12}, {    223,11}, \
+    {    447,10}, {    895,11}, {    479,10}, {    959,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
+    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,10}, {   1535,11}, \
+    {    831,10}, {   1663,12}, {    447,11}, {    895,12}, \
+    {    479,11}, {    959,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    831,11}, {   1663,13}, \
+    {    447,12}, {    959,11}, {   1919,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    639,12}, {   1343,13}, \
+    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    959,12}, \
+    {   1919,15}, {    255,13}, {   1151,12}, {   2303,13}, \
+    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,11}, \
+    {   5887,13}, {   1663,14}, {    895,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,13}, {   2687,14}, {   1407,13}, \
+    {   2815,15}, {    767,14}, {   1663,13}, {   3455,14}, \
+    {   1791,13}, {   3583,14}, {   1919,13}, {   3839,16}, \
+    {    511,15}, {   1023,14}, {   2175,13}, {   4351,15}, \
+    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 206
+#define SQR_FFT_THRESHOLD                 2752
 
 #define MULLO_BASECASE_THRESHOLD            10
 #define MULLO_DC_THRESHOLD                  23
diff -r 3b7cf3fa616f -r c84a9f37b310 mpn/x86/p6/sse2/gmp-mparam.h
--- a/mpn/x86/p6/sse2/gmp-mparam.h	Mon Jan 23 20:40:07 2012 +0100
+++ b/mpn/x86/p6/sse2/gmp-mparam.h	Tue Jan 24 18:46:04 2012 +0100
@@ -34,8 +34,8 @@
 #define MOD_1_UNNORM_THRESHOLD               5
 #define MOD_1N_TO_MOD_1_1_THRESHOLD         10
 #define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        10
 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD     23
 #define USE_PREINV_DIVREM_1                  1  /* native */
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
@@ -151,9 +151,9 @@
 #define DC_BDIV_QR_THRESHOLD                60
 #define DC_BDIV_Q_THRESHOLD                132
 
-#define INV_MULMOD_BNM1_THRESHOLD          117
-#define INV_NEWTON_THRESHOLD                81
-#define INV_APPR_THRESHOLD                  61
+#define INV_MULMOD_BNM1_THRESHOLD           38
+#define INV_NEWTON_THRESHOLD                69
+#define INV_APPR_THRESHOLD                  65
 
 #define BINV_NEWTON_THRESHOLD              276
 #define REDC_1_TO_REDC_N_THRESHOLD          63
diff -r 3b7cf3fa616f -r c84a9f37b310 mpn/x86_64/atom/gmp-mparam.h
--- a/mpn/x86_64/atom/gmp-mparam.h	Mon Jan 23 20:40:07 2012 +0100
+++ b/mpn/x86_64/atom/gmp-mparam.h	Tue Jan 24 18:46:04 2012 +0100
@@ -53,49 +53,58 @@
 #define MULMOD_BNM1_THRESHOLD                9
 #define SQRMOD_BNM1_THRESHOLD                9
 
-#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             208  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    212, 5}, {      7, 4}, {     15, 5}, {     11, 6}, \
+  { {    208, 5}, {      7, 4}, {     15, 5}, {     11, 6}, \
     {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
     {     13, 7}, {      7, 6}, {     15, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
+    {     19, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
     {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 9}, {      7, 8}, {     17, 9}, {     11,10}, \
-    {      7, 9}, {     15, 8}, {     31, 9}, {     19, 8}, \
-    {     39, 9}, {     23,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     39, 9}, {     79,10}, {     47, 9}, \
-    {     95, 8}, {    191,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47, 9}, {    191,12}, \
+    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23,10}, {     15, 9}, {     39,10}, {     23, 9}, \
+    {     47,11}, {     15,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     67,10}, {     39, 9}, {     79, 8}, \
+    {    159,10}, {     47,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255, 7}, {    511,10}, {     71, 9}, \
+    {    143, 8}, {    287, 7}, {    575,10}, {     79, 9}, \
+    {    159, 8}, {    319,11}, {     47, 9}, {    191,12}, \
     {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
     {    511,10}, {    143, 9}, {    287, 8}, {    575,11}, \
     {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
-    {    351, 8}, {    703,10}, {    191, 9}, {    383,10}, \
-    {    207, 9}, {    415,11}, {    111,10}, {    223,12}, \
+    {    351, 8}, {    703, 7}, {   1407,10}, {    191, 9}, \
+    {    415,11}, {    111,10}, {    223, 9}, {    447,12}, \
     {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319,11}, {    175,10}, {    351, 9}, {    703,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,11}, \
-    {    223,10}, {    447,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    287,10}, {    575,12}, \
-    {    159,11}, {    351,10}, {    703,12}, {    191,11}, \
-    {    383,12}, {    223,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    511,12}, {    287,11}, {    575,12}, \
-    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,12}, {    479,13}, {    255,12}, {    511,11}, \
-    {   1023,12}, {    575,11}, {   1151,13}, {    319,12}, \
-    {    703,13}, {    383,12}, {    831,13}, {    447,12}, \
+    {    143,10}, {    287, 9}, {    575, 8}, {   1151,10}, \
+    {    319,11}, {    175,10}, {    351, 9}, {    703, 8}, \
+    {   1407,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    415,11}, {    223,10}, {    447, 9}, {    895,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
+    {    287,10}, {    575, 9}, {   1151,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
+    {   1407,12}, {    191,11}, {    383,10}, {    767,11}, \


More information about the gmp-commit mailing list