[Gmp-commit] /var/hg/gmp: New tuneup files.

mercurial at gmplib.org mercurial at gmplib.org
Tue Oct 1 22:10:51 UTC 2019


details:   /var/hg/gmp/rev/9ba25d46609f
changeset: 17935:9ba25d46609f
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Oct 02 00:10:49 2019 +0200
description:
New tuneup files.

diffstat:

 mpn/ia64/gmp-mparam.h              |  138 +++++++++--------
 mpn/x86/bd1/gmp-mparam.h           |  230 ++++++++++++------------------
 mpn/x86/bd2/gmp-mparam.h           |  223 +++++++++++------------------
 mpn/x86/bt1/gmp-mparam.h           |  232 +++++++++++++-----------------
 mpn/x86/coreinhm/gmp-mparam.h      |  259 +++++++++++++---------------------
 mpn/x86/coreisbr/gmp-mparam.h      |  230 ++++++++++++-----------------
 mpn/x86/k10/gmp-mparam.h           |  232 ++++++++++++------------------
 mpn/x86/k7/gmp-mparam.h            |  276 +++++++++++++-----------------------
 mpn/x86/k8/gmp-mparam.h            |  234 +++++++++++++-----------------
 mpn/x86/pentium4/sse2/gmp-mparam.h |  253 ++++++++++++++-------------------
 10 files changed, 945 insertions(+), 1362 deletions(-)

diffs (truncated from 2831 to 300 lines):

diff -r 2ced5e1c4a4e -r 9ba25d46609f mpn/ia64/gmp-mparam.h
--- a/mpn/ia64/gmp-mparam.h	Tue Oct 01 23:05:40 2019 +0200
+++ b/mpn/ia64/gmp-mparam.h	Wed Oct 02 00:10:49 2019 +0200
@@ -1,6 +1,6 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2017 Free Software Foundation, Inc.
+Copyright 2019 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -33,9 +33,9 @@
 
 /* 900MHz Itanium2 (olympic.gmplib.org) */
 /* FFT tuning limit = 0.5 M */
-/* Generated by tuneup.c, 2017-02-19, gcc 4.2 */
+/* Generated by tuneup.c, 2019-09-30, gcc 4.2 */
 
-#define MOD_1_1P_METHOD                      2
+#define MOD_1_1P_METHOD                      2  /* 17.08% faster than 1 */
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
 #define MOD_1N_TO_MOD_1_1_THRESHOLD          8
@@ -44,95 +44,98 @@
 #define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
 #define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
+#define DIV_QR_1N_PI1_METHOD                 1  /* 1.35% faster than 2 */
 #define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
 #define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD              12
+#define DIV_QR_2_PI2_THRESHOLD              10
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
 #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
 
 #define DIV_1_VS_MUL_1_PERCENT             316
 
-#define MUL_TOOM22_THRESHOLD                48
+#define MUL_TOOM22_THRESHOLD                47
 #define MUL_TOOM33_THRESHOLD                90
 #define MUL_TOOM44_THRESHOLD               216
 #define MUL_TOOM6H_THRESHOLD               327
 #define MUL_TOOM8H_THRESHOLD               454
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      93
 #define MUL_TOOM32_TO_TOOM53_THRESHOLD     153
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     148
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     137
 #define MUL_TOOM42_TO_TOOM63_THRESHOLD     153
 #define MUL_TOOM43_TO_TOOM54_THRESHOLD     226
 
 #define SQR_BASECASE_THRESHOLD              11
 #define SQR_TOOM2_THRESHOLD                 98
-#define SQR_TOOM3_THRESHOLD                140
+#define SQR_TOOM3_THRESHOLD                135
 #define SQR_TOOM4_THRESHOLD                260
 #define SQR_TOOM6_THRESHOLD                354
 #define SQR_TOOM8_THRESHOLD                502
 
 #define MULMID_TOOM42_THRESHOLD             99
 
-#define MULMOD_BNM1_THRESHOLD               25
-#define SQRMOD_BNM1_THRESHOLD               33
+#define MULMOD_BNM1_THRESHOLD               23
+#define SQRMOD_BNM1_THRESHOLD               31
 
 #define MUL_FFT_MODF_THRESHOLD             840  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    840, 5}, {     31, 6}, {     16, 5}, {     33, 6}, \
-    {     17, 5}, {     35, 6}, {     37, 7}, {     19, 6}, \
-    {     43, 7}, {     44, 8}, {     23, 7}, {     47, 8}, \
-    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
-    {     57, 9}, {     31, 8}, {     65, 9}, {     35, 8}, \
-    {     71, 9}, {     43,10}, {     23, 9}, {     59,10}, \
-    {     31, 9}, {     71,10}, {     39, 9}, {     83,10}, \
-    {     47, 9}, {     99,10}, {     55, 9}, {    111,11}, \
-    {     31,10}, {     63, 9}, {    127,10}, {     87,11}, \
-    {     47,10}, {    111,11}, {     63,10}, {    143,11}, \
-    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
-    {    111,12}, {     63,11}, {    175,12}, {     95,11}, \
-    {    207,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 60
-#define MUL_FFT_THRESHOLD                 8832
-
-#define SQR_FFT_MODF_THRESHOLD             765  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    765, 5}, {     35, 6}, {     43, 7}, {     43, 8}, \
-    {     43, 9}, {     23, 8}, {     49, 9}, {     27, 8}, \
-    {     57, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
-    {     23, 9}, {     55,10}, {     31, 9}, {     71,10}, \
+  { {    840, 5}, {     30, 6}, {     16, 5}, {     33, 6}, \
+    {     17, 5}, {     35, 6}, {     35, 7}, {     18, 6}, \
+    {     37, 7}, {     19, 6}, {     41, 7}, {     37, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     47, 8}, \
+    {     37, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     27, 8}, {     57, 9}, {     31, 8}, \
+    {     65, 9}, {     35, 8}, {     71, 9}, {     43,10}, \
+    {     23, 9}, {     59,10}, {     31, 9}, {     71,10}, \
     {     39, 9}, {     83,10}, {     47, 9}, {     99,10}, \
-    {     55,11}, {     31,10}, {     87,11}, {     47,10}, \
-    {    111,12}, {     31,11}, {     63,10}, {    143,11}, \
-    {     79,10}, {    167,11}, {    111,12}, {     63,11}, \
+    {     55,11}, {     31,10}, {     71, 9}, {    143,10}, \
+    {     87,11}, {     47,10}, {    103,12}, {     31,11}, \
+    {     63,10}, {    151,11}, {     79,10}, {    175,11}, \
+    {     95,10}, {    191,11}, {    111,12}, {     63,11}, \
     {    159,12}, {     95,11}, {    207,13}, {   8192,14}, \
     {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
     { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
     {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 46
-#define SQR_FFT_THRESHOLD                 6784
-
-#define MULLO_BASECASE_THRESHOLD            47
-#define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
-#define MULLO_MUL_N_THRESHOLD            17561
-#define SQRLO_BASECASE_THRESHOLD             0  /* always */
-#define SQRLO_DC_THRESHOLD                 152
-#define SQRLO_SQR_THRESHOLD              13555
+#define MUL_FFT_TABLE3_SIZE 66
+#define MUL_FFT_THRESHOLD                 8576
 
-#define DC_DIV_QR_THRESHOLD                 76
-#define DC_DIVAPPR_Q_THRESHOLD             260
-#define DC_BDIV_QR_THRESHOLD               116
-#define DC_BDIV_Q_THRESHOLD                315
+#define SQR_FFT_MODF_THRESHOLD             758  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    758, 5}, {     36, 6}, {     37, 7}, {     19, 6}, \
+    {     42, 7}, {     43, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     49, 9}, {     27, 8}, \
+    {     57, 9}, {     43,10}, {     23, 9}, {     55,10}, \
+    {     31, 9}, {     71,10}, {     39, 9}, {     83,10}, \
+    {     47, 9}, {     99,10}, {     55,11}, {     31,10}, \
+    {     87,11}, {     47,10}, {    111,12}, {     31,11}, \
+    {     63,10}, {    135,11}, {     79,10}, {    167,11}, \
+    {     95,10}, {    199,11}, {    111,12}, {     63,11}, \
+    {    159,12}, {     95,11}, {    191,10}, {    383,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 51
+#define SQR_FFT_THRESHOLD                 6272
 
-#define INV_MULMOD_BNM1_THRESHOLD           94
-#define INV_NEWTON_THRESHOLD                17
-#define INV_APPR_THRESHOLD                  21
+#define MULLO_BASECASE_THRESHOLD             9
+#define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
+#define MULLO_MUL_N_THRESHOLD            17050
+#define SQRLO_BASECASE_THRESHOLD             0  /* always */
+#define SQRLO_DC_THRESHOLD                 134
+#define SQRLO_SQR_THRESHOLD              12322
 
-#define BINV_NEWTON_THRESHOLD              296
+#define DC_DIV_QR_THRESHOLD                 75
+#define DC_DIVAPPR_Q_THRESHOLD             264
+#define DC_BDIV_QR_THRESHOLD               116
+#define DC_BDIV_Q_THRESHOLD                309
+
+#define INV_MULMOD_BNM1_THRESHOLD           92
+#define INV_NEWTON_THRESHOLD                15
+#define INV_APPR_THRESHOLD                  17
+
+#define BINV_NEWTON_THRESHOLD              280
 #define REDC_1_TO_REDC_2_THRESHOLD           0  /* always */
-#define REDC_2_TO_REDC_N_THRESHOLD         174
+#define REDC_2_TO_REDC_N_THRESHOLD         173
 
 #define MU_DIV_QR_THRESHOLD               1470
 #define MU_DIVAPPR_Q_THRESHOLD            1164
@@ -140,20 +143,21 @@
 #define MU_BDIV_QR_THRESHOLD              1685
 #define MU_BDIV_Q_THRESHOLD               1787
 
-#define POWM_SEC_TABLE  1,22,175,1896
+#define POWM_SEC_TABLE  3,22,129,1925
 
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        33
-#define SET_STR_DC_THRESHOLD              1418
-#define SET_STR_PRECOMPUTE_THRESHOLD      4091
+#define GET_STR_DC_THRESHOLD                 8
+#define GET_STR_PRECOMPUTE_THRESHOLD        37
+#define SET_STR_DC_THRESHOLD              1391
+#define SET_STR_PRECOMPUTE_THRESHOLD      3933
 
-#define FAC_DSC_THRESHOLD                 1547
+#define FAC_DSC_THRESHOLD                 1035
 #define FAC_ODD_THRESHOLD                   23
 
 #define MATRIX22_STRASSEN_THRESHOLD         23
-#define HGCD_THRESHOLD                     141
-#define HGCD_APPR_THRESHOLD                147
+#define HGCD2_DIV1_METHOD                    3  /* 13.72% faster than 1 */
+#define HGCD_THRESHOLD                     144
+#define HGCD_APPR_THRESHOLD                175
 #define HGCD_REDUCE_THRESHOLD             4455
-#define GCD_DC_THRESHOLD                   710
-#define GCDEXT_DC_THRESHOLD                505
-#define JACOBI_BASE_METHOD                   2
+#define GCD_DC_THRESHOLD                   501
+#define GCDEXT_DC_THRESHOLD                440
+#define JACOBI_BASE_METHOD                   2  /* 0.66% faster than 4 */
diff -r 2ced5e1c4a4e -r 9ba25d46609f mpn/x86/bd1/gmp-mparam.h
--- a/mpn/x86/bd1/gmp-mparam.h	Tue Oct 01 23:05:40 2019 +0200
+++ b/mpn/x86/bd1/gmp-mparam.h	Wed Oct 02 00:10:49 2019 +0200
@@ -1,7 +1,6 @@
-/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file.
+/* AMD bd1 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
+Copyright 2019 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -33,178 +32,135 @@
 #define GMP_LIMB_BYTES 4
 
 /* 3600 MHz Bulldozer Zambezi */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-09-30, gcc 8.3 */
 
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               3
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
 #define MOD_1N_TO_MOD_1_1_THRESHOLD          7
 #define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        15
 #define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
 #define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              3
+#define DIV_QR_1N_PI1_METHOD                 1  /* 60.28% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD              5
 #define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
 #define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
 #define BMOD_1_TO_MOD_1_THRESHOLD           27
 
-#define DIV_1_VS_MUL_1_PERCENT             256
+#define DIV_1_VS_MUL_1_PERCENT             244
 
 #define MUL_TOOM22_THRESHOLD                32
-#define MUL_TOOM33_THRESHOLD                65
+#define MUL_TOOM33_THRESHOLD                89
 #define MUL_TOOM44_THRESHOLD               154
 #define MUL_TOOM6H_THRESHOLD               230
-#define MUL_TOOM8H_THRESHOLD               354
+#define MUL_TOOM8H_THRESHOLD               357
 
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     110
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     101
 #define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     130
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     136
 
 #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 48
-#define SQR_TOOM3_THRESHOLD                 87
-#define SQR_TOOM4_THRESHOLD                204
-#define SQR_TOOM6_THRESHOLD                315
-#define SQR_TOOM8_THRESHOLD                430
+#define SQR_TOOM2_THRESHOLD                 46
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                214
+#define SQR_TOOM6_THRESHOLD                300
+#define SQR_TOOM8_THRESHOLD                454
 
-#define MULMID_TOOM42_THRESHOLD             48
+#define MULMID_TOOM42_THRESHOLD             50
 
-#define MULMOD_BNM1_THRESHOLD               21
+#define MULMOD_BNM1_THRESHOLD               22
 #define SQRMOD_BNM1_THRESHOLD               23
 
-#define MUL_FFT_MODF_THRESHOLD             840  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             636  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    840, 5}, {     28, 6}, {     15, 5}, {     33, 6}, \
-    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     31, 6}, \
-    {     63, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     51, 8}, {     27, 7}, {     55, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 7}, {     79, 9}, \
+  { {    636, 5}, {     25, 6}, {     13, 5}, {     29, 6}, \


More information about the gmp-commit mailing list