[Gmp-commit] /var/hg/gmp: Retune (mainly for DIV_1_VS_MUL_1_PERCENT).

mercurial at gmplib.org mercurial at gmplib.org
Tue Jan 3 19:41:07 UTC 2017


details:   /var/hg/gmp/rev/f7e1104aaaa5
changeset: 17201:f7e1104aaaa5
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jan 03 20:41:02 2017 +0100
description:
Retune (mainly for DIV_1_VS_MUL_1_PERCENT).

diffstat:

 mpn/arm/v7a/cora5/gmp-mparam.h   |    2 +
 mpn/powerpc32/p7/gmp-mparam.h    |    2 +
 mpn/x86_64/coreibwl/gmp-mparam.h |  228 +++++++++++++++-----------------------
 mpn/x86_64/skylake/gmp-mparam.h  |    2 +
 4 files changed, 94 insertions(+), 140 deletions(-)

diffs (truncated from 310 to 300 lines):

diff -r 5825c659d76a -r f7e1104aaaa5 mpn/arm/v7a/cora5/gmp-mparam.h
--- a/mpn/arm/v7a/cora5/gmp-mparam.h	Tue Jan 03 20:05:28 2017 +0100
+++ b/mpn/arm/v7a/cora5/gmp-mparam.h	Tue Jan 03 20:41:02 2017 +0100
@@ -50,6 +50,8 @@
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
 #define BMOD_1_TO_MOD_1_THRESHOLD           51
 
+#define DIV_1_VS_MUL_1_PERCENT             178
+
 #define MUL_TOOM22_THRESHOLD                45
 #define MUL_TOOM33_THRESHOLD               149
 #define MUL_TOOM44_THRESHOLD               242
diff -r 5825c659d76a -r f7e1104aaaa5 mpn/powerpc32/p7/gmp-mparam.h
--- a/mpn/powerpc32/p7/gmp-mparam.h	Tue Jan 03 20:05:28 2017 +0100
+++ b/mpn/powerpc32/p7/gmp-mparam.h	Tue Jan 03 20:41:02 2017 +0100
@@ -48,6 +48,8 @@
 #define DIVEXACT_1_THRESHOLD                 0  /* always */
 #define BMOD_1_TO_MOD_1_THRESHOLD           34
 
+#define DIV_1_VS_MUL_1_PERCENT             318
+
 #define MUL_TOOM22_THRESHOLD                20
 #define MUL_TOOM33_THRESHOLD                89
 #define MUL_TOOM44_THRESHOLD               130
diff -r 5825c659d76a -r f7e1104aaaa5 mpn/x86_64/coreibwl/gmp-mparam.h
--- a/mpn/x86_64/coreibwl/gmp-mparam.h	Tue Jan 03 20:05:28 2017 +0100
+++ b/mpn/x86_64/coreibwl/gmp-mparam.h	Tue Jan 03 20:41:02 2017 +0100
@@ -35,9 +35,9 @@
 #undef HAVE_NATIVE_mpn_mul_2
 #undef HAVE_NATIVE_mpn_addmul_2
 
-/* 2100 MHz Intel i3-5100 */
-/* FFT tuning limit = 200 M */
-/* Generated by tuneup.c, 2015-10-12, gcc 4.8 */
+/* 3400 MHz Intel Xeon Broadwell E3-1285Lv4 */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2017-01-03, gcc 6.2 */
 
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
@@ -53,172 +53,120 @@
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
 #define BMOD_1_TO_MOD_1_THRESHOLD           20
 
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                75
-#define MUL_TOOM44_THRESHOLD               218
-#define MUL_TOOM6H_THRESHOLD               354
-#define MUL_TOOM8H_THRESHOLD               434
+#define DIV_1_VS_MUL_1_PERCENT             485
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      75
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     163
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     147
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     149
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     215
+#define MUL_TOOM22_THRESHOLD                29
+#define MUL_TOOM33_THRESHOLD                38
+#define MUL_TOOM44_THRESHOLD               324
+#define MUL_TOOM6H_THRESHOLD               430
+#define MUL_TOOM8H_THRESHOLD               644
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      77
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     207
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      75
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      84
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     109
 
 #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
 #define SQR_TOOM2_THRESHOLD                 38
-#define SQR_TOOM3_THRESHOLD                113
+#define SQR_TOOM3_THRESHOLD                117
 #define SQR_TOOM4_THRESHOLD                442
 #define SQR_TOOM6_THRESHOLD                517
-#define SQR_TOOM8_THRESHOLD                572
+#define SQR_TOOM8_THRESHOLD                597
 
-#define MULMID_TOOM42_THRESHOLD             46
+#define MULMID_TOOM42_THRESHOLD             48
 
 #define MULMOD_BNM1_THRESHOLD               16
-#define SQRMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               18
 
-#define MUL_FFT_MODF_THRESHOLD             428  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             424  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    428, 5}, {     23, 6}, {     25, 7}, {     13, 6}, \
-    {     27, 7}, {     14, 6}, {     29, 7}, {     24, 8}, \
-    {     13, 7}, {     29, 8}, {     15, 7}, {     31, 8}, \
-    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+  { {    424, 5}, {     19, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     23, 7}, {     12, 6}, \
+    {     25, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     17, 7}, {     36, 8}, {     19, 7}, {     39, 8}, \
+    {     21, 9}, {     11, 8}, {     23, 7}, {     47, 8}, \
+    {     25, 7}, {     51, 8}, {     27, 9}, {     15, 8}, \
     {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     49, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     51, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39, 8}, {     79, 9}, {     43,10}, \
     {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     99,10}, {     55,11}, {     31,10}, {     71, 6}, \
-    {   1215, 7}, {    639, 8}, {    335, 9}, {    179,12}, \
-    {     31,11}, {     63,10}, {    143,11}, {     79,10}, \
-    {    167,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543,10}, \
-    {    287, 9}, {    575,10}, {    303,11}, {    159,10}, \
-    {    319,12}, {     95,11}, {    191,10}, {    383,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    575,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    335,10}, \
-    {    671,11}, {    351,10}, {    703,12}, {    191,11}, \
-    {    383,10}, {    767,11}, {    415,10}, {    831,12}, \
-    {    223,11}, {    447,13}, {    127,11}, {    543,12}, \
-    {    287,11}, {    607,10}, {   1215,12}, {    319,11}, \
-    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
-    {    383,11}, {    799,12}, {    415,11}, {    831,12}, \
-    {    447,14}, {    127,13}, {    255,12}, {    543,11}, \
-    {   1087,12}, {    607,13}, {    319,12}, {    671,11}, \
-    {   1343,12}, {    735,13}, {    383,12}, {    831,13}, \
-    {    447,12}, {    959,13}, {    511,12}, {   1023,13}, \
-    {    575,12}, {   1151,13}, {    639,12}, {   1343,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    831,12}, \
-    {   1663,13}, {    895,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1151,14}, {    639,13}, {   1343,12}, \
-    {   2687,13}, {   1407,14}, {    767,13}, {   1535,12}, \
-    {   3199,13}, {   1663,14}, {    895,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2943,15}, {    767,14}, {   1535,13}, {   3199,14}, \
-    {   1663,13}, {   3455,14}, {   1791,16}, {    511,15}, \
-    {   1023,14}, {   2175,13}, {   4479,14}, {   2431,13}, \
-    {   4863,15}, {   1279,14}, {   2943,13}, {   5887,15}, \
-    {   1535,14}, {   3455,15}, {   1791,14}, {   3839,16}, \
-    {   1023,15}, {   2047,14}, {   4479,15}, {   2303,14}, \
-    {   4863,15}, {   2559,14}, {   5247,15}, {   2815,14}, \
-    {   5887,16}, {   1535,15}, {   3327,14}, {   6911,15}, \
-    {   3839,17}, {   1023,16}, {   2047,15}, {   4863,16}, \
-    {   2559,15}, {   5887,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 194
-#define MUL_FFT_THRESHOLD                 4736
+    {     71,10}, {     39, 9}, {     87,10}, {     47, 9}, \
+    {     99,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     87,11}, {     47,10}, {    103,12}, \
+    {     31,11}, {     63,10}, {    135, 9}, {    271,10}, \
+    {    143, 8}, {    575, 9}, {    303,11}, {     79,10}, \
+    {    159, 8}, {    639,10}, {    175, 8}, {    703, 9}, \
+    {    367,11}, {     95, 9}, {    383, 8}, {    767, 9}, \
+    {    399, 8}, {    799, 9}, {    415,11}, {    111,12}, \
+    {     63,11}, {    127, 9}, {    511,10}, {    271, 9}, \
+    {    543,11}, {    143,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319, 9}, {    639,11}, {    175,12}, \
+    {     95,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    399,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 104
+#define MUL_FFT_THRESHOLD                 5760
 
 #define SQR_FFT_MODF_THRESHOLD             396  /* k = 5 */
 #define SQR_FFT_TABLE3                                      \
-  { {    396, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     14, 6}, \
-    {     29, 7}, {     15, 6}, {     31, 7}, {     25, 8}, \
-    {     13, 7}, {     28, 8}, {     15, 7}, {     32, 8}, \
+  { {    396, 5}, {     23, 6}, {     29, 7}, {     15, 6}, \
+    {     31, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
     {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
     {     33, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
     {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
     {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
     {     67,10}, {     39, 9}, {     79,10}, {     55,11}, \
     {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31, 9}, {    255,10}, {    135,11}, {     79,10}, \
-    {    159,11}, {     95,10}, {    191, 9}, {    383,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
-    {    303, 9}, {    607,11}, {    159,10}, {    319, 9}, \
-    {    639,12}, {     95,10}, {    383, 9}, {    767,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    607,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    335,10}, \
-    {    671,11}, {    351,10}, {    703,11}, {    367,10}, \
-    {    735,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,12}, {    223,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    671,12}, {    351,11}, {    735,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
-    {    447,11}, {    895,12}, {    479,14}, {    127,13}, \
-    {    255,12}, {    735,13}, {    383,12}, {    831,13}, \
-    {    447,12}, {    927,14}, {    255,13}, {    511,12}, \
-    {   1023,13}, {    575,12}, {   1151,13}, {    639,12}, \
-    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
-    {    767,12}, {   1535,13}, {    831,12}, {   1663,13}, \
-    {    895,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1151,14}, {    639,13}, {   1343,12}, {   2687,13}, \
-    {   1407,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1663,14}, {    895,13}, {   1791,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,15}, \
-    {    767,14}, {   1535,13}, {   3199,14}, {   1663,13}, \
-    {   3455,14}, {   1791,16}, {    511,15}, {   1023,14}, \
-    {   2431,13}, {   4863,15}, {   1279,14}, {   2943,13}, \
-    {   5887,15}, {   1535,14}, {   3455,15}, {   1791,14}, \
-    {   3839,16}, {   1023,15}, {   2047,14}, {   4223,15}, \
-    {   2303,14}, {   4863,15}, {   2559,14}, {   5119,15}, \
-    {   2815,14}, {   5887,16}, {   1535,15}, {   3327,14}, \
-    {   6911,15}, {   3839,17}, {   1023,16}, {   2047,15}, \
-    {   4863,16}, {   2559,15}, {   5887,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 191
-#define SQR_FFT_THRESHOLD                 3776
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    159,11}, {     95,12}, {     63,11}, {    127, 9}, \
+    {    511, 8}, {   1023, 9}, {    543,10}, {    287, 9}, \
+    {    607,11}, {    159,10}, {    319, 9}, {    671,12}, \
+    {     95,10}, {    383, 9}, {    767,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 66
+#define SQR_FFT_THRESHOLD                 3712
 
 #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  27
-#define MULLO_MUL_N_THRESHOLD             8648
-#define SQRLO_BASECASE_THRESHOLD             9
-#define SQRLO_DC_THRESHOLD                 172
-#define SQRLO_SQR_THRESHOLD               7487
-
-#define DC_DIV_QR_THRESHOLD                 56
-#define DC_DIVAPPR_Q_THRESHOLD             191
-#define DC_BDIV_QR_THRESHOLD                86
-#define DC_BDIV_Q_THRESHOLD                188
-
-#define INV_MULMOD_BNM1_THRESHOLD           70
-#define INV_NEWTON_THRESHOLD               179
-#define INV_APPR_THRESHOLD                 181
+#define MULLO_DC_THRESHOLD                  29
+#define MULLO_MUL_N_THRESHOLD            10950
+#define SQRLO_BASECASE_THRESHOLD             8
+#define SQRLO_DC_THRESHOLD                 177
+#define SQRLO_SQR_THRESHOLD               7269
 
-#define BINV_NEWTON_THRESHOLD              309
-#define REDC_1_TO_REDC_2_THRESHOLD          33
-#define REDC_2_TO_REDC_N_THRESHOLD          78
+#define DC_DIV_QR_THRESHOLD                 58
+#define DC_DIVAPPR_Q_THRESHOLD             195
+#define DC_BDIV_QR_THRESHOLD               101
+#define DC_BDIV_Q_THRESHOLD                204
 
-#define MU_DIV_QR_THRESHOLD               1718
-#define MU_DIVAPPR_Q_THRESHOLD            1718
-#define MUPI_DIV_QR_THRESHOLD               79
-#define MU_BDIV_QR_THRESHOLD              1652
-#define MU_BDIV_Q_THRESHOLD               1718
+#define INV_MULMOD_BNM1_THRESHOLD           58
+#define INV_NEWTON_THRESHOLD               183
+#define INV_APPR_THRESHOLD                 179
 
-#define POWM_SEC_TABLE  1,9,191,779,839
+#define BINV_NEWTON_THRESHOLD              375
+#define REDC_1_TO_REDC_2_THRESHOLD          32
+#define REDC_2_TO_REDC_N_THRESHOLD          92
 
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        20
-#define SET_STR_DC_THRESHOLD               711
-#define SET_STR_PRECOMPUTE_THRESHOLD      1664
+#define MU_DIV_QR_THRESHOLD               1589
+#define MU_DIVAPPR_Q_THRESHOLD            1589
+#define MUPI_DIV_QR_THRESHOLD               82
+#define MU_BDIV_QR_THRESHOLD              1597
+#define MU_BDIV_Q_THRESHOLD               1747
 
-#define FAC_DSC_THRESHOLD                  810
-#define FAC_ODD_THRESHOLD                   23
+#define POWM_SEC_TABLE  1,16,194,579,779,2387
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        19
+#define SET_STR_DC_THRESHOLD              1055
+#define SET_STR_PRECOMPUTE_THRESHOLD      2170
+
+#define FAC_DSC_THRESHOLD                  668
+#define FAC_ODD_THRESHOLD                   44
 
 #define MATRIX22_STRASSEN_THRESHOLD         19
 #define HGCD_THRESHOLD                      63
diff -r 5825c659d76a -r f7e1104aaaa5 mpn/x86_64/skylake/gmp-mparam.h
--- a/mpn/x86_64/skylake/gmp-mparam.h	Tue Jan 03 20:05:28 2017 +0100


More information about the gmp-commit mailing list