[Gmp-commit] /var/hg/gmp: New tuneup files.
mercurial at gmplib.org
mercurial at gmplib.org
Tue Oct 1 22:10:51 UTC 2019
details: /var/hg/gmp/rev/9ba25d46609f
changeset: 17935:9ba25d46609f
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Oct 02 00:10:49 2019 +0200
description:
New tuneup files.
diffstat:
mpn/ia64/gmp-mparam.h | 138 +++++++++--------
mpn/x86/bd1/gmp-mparam.h | 230 ++++++++++++------------------
mpn/x86/bd2/gmp-mparam.h | 223 +++++++++++------------------
mpn/x86/bt1/gmp-mparam.h | 232 +++++++++++++-----------------
mpn/x86/coreinhm/gmp-mparam.h | 259 +++++++++++++---------------------
mpn/x86/coreisbr/gmp-mparam.h | 230 ++++++++++++-----------------
mpn/x86/k10/gmp-mparam.h | 232 ++++++++++++------------------
mpn/x86/k7/gmp-mparam.h | 276 +++++++++++++-----------------------
mpn/x86/k8/gmp-mparam.h | 234 +++++++++++++-----------------
mpn/x86/pentium4/sse2/gmp-mparam.h | 253 ++++++++++++++-------------------
10 files changed, 945 insertions(+), 1362 deletions(-)
diffs (truncated from 2831 to 300 lines):
diff -r 2ced5e1c4a4e -r 9ba25d46609f mpn/ia64/gmp-mparam.h
--- a/mpn/ia64/gmp-mparam.h Tue Oct 01 23:05:40 2019 +0200
+++ b/mpn/ia64/gmp-mparam.h Wed Oct 02 00:10:49 2019 +0200
@@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 2017 Free Software Foundation, Inc.
+Copyright 2019 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -33,9 +33,9 @@
/* 900MHz Itanium2 (olympic.gmplib.org) */
/* FFT tuning limit = 0.5 M */
-/* Generated by tuneup.c, 2017-02-19, gcc 4.2 */
+/* Generated by tuneup.c, 2019-09-30, gcc 4.2 */
-#define MOD_1_1P_METHOD 2
+#define MOD_1_1P_METHOD 2 /* 17.08% faster than 1 */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
@@ -44,95 +44,98 @@
#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
+#define DIV_QR_1N_PI1_METHOD 1 /* 1.35% faster than 2 */
#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD 12
+#define DIV_QR_2_PI2_THRESHOLD 10
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIV_1_VS_MUL_1_PERCENT 316
-#define MUL_TOOM22_THRESHOLD 48
+#define MUL_TOOM22_THRESHOLD 47
#define MUL_TOOM33_THRESHOLD 90
#define MUL_TOOM44_THRESHOLD 216
#define MUL_TOOM6H_THRESHOLD 327
#define MUL_TOOM8H_THRESHOLD 454
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93
#define MUL_TOOM32_TO_TOOM53_THRESHOLD 153
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 148
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 137
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 153
#define MUL_TOOM43_TO_TOOM54_THRESHOLD 226
#define SQR_BASECASE_THRESHOLD 11
#define SQR_TOOM2_THRESHOLD 98
-#define SQR_TOOM3_THRESHOLD 140
+#define SQR_TOOM3_THRESHOLD 135
#define SQR_TOOM4_THRESHOLD 260
#define SQR_TOOM6_THRESHOLD 354
#define SQR_TOOM8_THRESHOLD 502
#define MULMID_TOOM42_THRESHOLD 99
-#define MULMOD_BNM1_THRESHOLD 25
-#define SQRMOD_BNM1_THRESHOLD 33
+#define MULMOD_BNM1_THRESHOLD 23
+#define SQRMOD_BNM1_THRESHOLD 31
#define MUL_FFT_MODF_THRESHOLD 840 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 840, 5}, { 31, 6}, { 16, 5}, { 33, 6}, \
- { 17, 5}, { 35, 6}, { 37, 7}, { 19, 6}, \
- { 43, 7}, { 44, 8}, { 23, 7}, { 47, 8}, \
- { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
- { 57, 9}, { 31, 8}, { 65, 9}, { 35, 8}, \
- { 71, 9}, { 43,10}, { 23, 9}, { 59,10}, \
- { 31, 9}, { 71,10}, { 39, 9}, { 83,10}, \
- { 47, 9}, { 99,10}, { 55, 9}, { 111,11}, \
- { 31,10}, { 63, 9}, { 127,10}, { 87,11}, \
- { 47,10}, { 111,11}, { 63,10}, { 143,11}, \
- { 79,10}, { 167,11}, { 95,10}, { 191,11}, \
- { 111,12}, { 63,11}, { 175,12}, { 95,11}, \
- { 207,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 60
-#define MUL_FFT_THRESHOLD 8832
-
-#define SQR_FFT_MODF_THRESHOLD 765 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 765, 5}, { 35, 6}, { 43, 7}, { 43, 8}, \
- { 43, 9}, { 23, 8}, { 49, 9}, { 27, 8}, \
- { 57, 9}, { 31, 8}, { 63, 9}, { 43,10}, \
- { 23, 9}, { 55,10}, { 31, 9}, { 71,10}, \
+ { { 840, 5}, { 30, 6}, { 16, 5}, { 33, 6}, \
+ { 17, 5}, { 35, 6}, { 35, 7}, { 18, 6}, \
+ { 37, 7}, { 19, 6}, { 41, 7}, { 37, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 47, 8}, \
+ { 37, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
+ { 51, 9}, { 27, 8}, { 57, 9}, { 31, 8}, \
+ { 65, 9}, { 35, 8}, { 71, 9}, { 43,10}, \
+ { 23, 9}, { 59,10}, { 31, 9}, { 71,10}, \
{ 39, 9}, { 83,10}, { 47, 9}, { 99,10}, \
- { 55,11}, { 31,10}, { 87,11}, { 47,10}, \
- { 111,12}, { 31,11}, { 63,10}, { 143,11}, \
- { 79,10}, { 167,11}, { 111,12}, { 63,11}, \
+ { 55,11}, { 31,10}, { 71, 9}, { 143,10}, \
+ { 87,11}, { 47,10}, { 103,12}, { 31,11}, \
+ { 63,10}, { 151,11}, { 79,10}, { 175,11}, \
+ { 95,10}, { 191,11}, { 111,12}, { 63,11}, \
{ 159,12}, { 95,11}, { 207,13}, { 8192,14}, \
{ 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
{ 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
{4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 46
-#define SQR_FFT_THRESHOLD 6784
-
-#define MULLO_BASECASE_THRESHOLD 47
-#define MULLO_DC_THRESHOLD 0 /* never mpn_mullo_basecase */
-#define MULLO_MUL_N_THRESHOLD 17561
-#define SQRLO_BASECASE_THRESHOLD 0 /* always */
-#define SQRLO_DC_THRESHOLD 152
-#define SQRLO_SQR_THRESHOLD 13555
+#define MUL_FFT_TABLE3_SIZE 66
+#define MUL_FFT_THRESHOLD 8576
-#define DC_DIV_QR_THRESHOLD 76
-#define DC_DIVAPPR_Q_THRESHOLD 260
-#define DC_BDIV_QR_THRESHOLD 116
-#define DC_BDIV_Q_THRESHOLD 315
+#define SQR_FFT_MODF_THRESHOLD 758 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 758, 5}, { 36, 6}, { 37, 7}, { 19, 6}, \
+ { 42, 7}, { 43, 8}, { 37, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 49, 9}, { 27, 8}, \
+ { 57, 9}, { 43,10}, { 23, 9}, { 55,10}, \
+ { 31, 9}, { 71,10}, { 39, 9}, { 83,10}, \
+ { 47, 9}, { 99,10}, { 55,11}, { 31,10}, \
+ { 87,11}, { 47,10}, { 111,12}, { 31,11}, \
+ { 63,10}, { 135,11}, { 79,10}, { 167,11}, \
+ { 95,10}, { 199,11}, { 111,12}, { 63,11}, \
+ { 159,12}, { 95,11}, { 191,10}, { 383,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 51
+#define SQR_FFT_THRESHOLD 6272
-#define INV_MULMOD_BNM1_THRESHOLD 94
-#define INV_NEWTON_THRESHOLD 17
-#define INV_APPR_THRESHOLD 21
+#define MULLO_BASECASE_THRESHOLD 9
+#define MULLO_DC_THRESHOLD 0 /* never mpn_mullo_basecase */
+#define MULLO_MUL_N_THRESHOLD 17050
+#define SQRLO_BASECASE_THRESHOLD 0 /* always */
+#define SQRLO_DC_THRESHOLD 134
+#define SQRLO_SQR_THRESHOLD 12322
-#define BINV_NEWTON_THRESHOLD 296
+#define DC_DIV_QR_THRESHOLD 75
+#define DC_DIVAPPR_Q_THRESHOLD 264
+#define DC_BDIV_QR_THRESHOLD 116
+#define DC_BDIV_Q_THRESHOLD 309
+
+#define INV_MULMOD_BNM1_THRESHOLD 92
+#define INV_NEWTON_THRESHOLD 15
+#define INV_APPR_THRESHOLD 17
+
+#define BINV_NEWTON_THRESHOLD 280
#define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */
-#define REDC_2_TO_REDC_N_THRESHOLD 174
+#define REDC_2_TO_REDC_N_THRESHOLD 173
#define MU_DIV_QR_THRESHOLD 1470
#define MU_DIVAPPR_Q_THRESHOLD 1164
@@ -140,20 +143,21 @@
#define MU_BDIV_QR_THRESHOLD 1685
#define MU_BDIV_Q_THRESHOLD 1787
-#define POWM_SEC_TABLE 1,22,175,1896
+#define POWM_SEC_TABLE 3,22,129,1925
-#define GET_STR_DC_THRESHOLD 15
-#define GET_STR_PRECOMPUTE_THRESHOLD 33
-#define SET_STR_DC_THRESHOLD 1418
-#define SET_STR_PRECOMPUTE_THRESHOLD 4091
+#define GET_STR_DC_THRESHOLD 8
+#define GET_STR_PRECOMPUTE_THRESHOLD 37
+#define SET_STR_DC_THRESHOLD 1391
+#define SET_STR_PRECOMPUTE_THRESHOLD 3933
-#define FAC_DSC_THRESHOLD 1547
+#define FAC_DSC_THRESHOLD 1035
#define FAC_ODD_THRESHOLD 23
#define MATRIX22_STRASSEN_THRESHOLD 23
-#define HGCD_THRESHOLD 141
-#define HGCD_APPR_THRESHOLD 147
+#define HGCD2_DIV1_METHOD 3 /* 13.72% faster than 1 */
+#define HGCD_THRESHOLD 144
+#define HGCD_APPR_THRESHOLD 175
#define HGCD_REDUCE_THRESHOLD 4455
-#define GCD_DC_THRESHOLD 710
-#define GCDEXT_DC_THRESHOLD 505
-#define JACOBI_BASE_METHOD 2
+#define GCD_DC_THRESHOLD 501
+#define GCDEXT_DC_THRESHOLD 440
+#define JACOBI_BASE_METHOD 2 /* 0.66% faster than 4 */
diff -r 2ced5e1c4a4e -r 9ba25d46609f mpn/x86/bd1/gmp-mparam.h
--- a/mpn/x86/bd1/gmp-mparam.h Tue Oct 01 23:05:40 2019 +0200
+++ b/mpn/x86/bd1/gmp-mparam.h Wed Oct 02 00:10:49 2019 +0200
@@ -1,7 +1,6 @@
-/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file.
+/* AMD bd1 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
+Copyright 2019 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -33,178 +32,135 @@
#define GMP_LIMB_BYTES 4
/* 3600 MHz Bulldozer Zambezi */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-09-30, gcc 8.3 */
#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 15
#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 14
#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 3
+#define DIV_QR_1N_PI1_METHOD 1 /* 60.28% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 5
#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD 27
-#define DIV_1_VS_MUL_1_PERCENT 256
+#define DIV_1_VS_MUL_1_PERCENT 244
#define MUL_TOOM22_THRESHOLD 32
-#define MUL_TOOM33_THRESHOLD 65
+#define MUL_TOOM33_THRESHOLD 89
#define MUL_TOOM44_THRESHOLD 154
#define MUL_TOOM6H_THRESHOLD 230
-#define MUL_TOOM8H_THRESHOLD 354
+#define MUL_TOOM8H_THRESHOLD 357
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 101
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 136
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 48
-#define SQR_TOOM3_THRESHOLD 87
-#define SQR_TOOM4_THRESHOLD 204
-#define SQR_TOOM6_THRESHOLD 315
-#define SQR_TOOM8_THRESHOLD 430
+#define SQR_TOOM2_THRESHOLD 46
+#define SQR_TOOM3_THRESHOLD 81
+#define SQR_TOOM4_THRESHOLD 214
+#define SQR_TOOM6_THRESHOLD 300
+#define SQR_TOOM8_THRESHOLD 454
-#define MULMID_TOOM42_THRESHOLD 48
+#define MULMID_TOOM42_THRESHOLD 50
-#define MULMOD_BNM1_THRESHOLD 21
+#define MULMOD_BNM1_THRESHOLD 22
#define SQRMOD_BNM1_THRESHOLD 23
-#define MUL_FFT_MODF_THRESHOLD 840 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 636 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 840, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \
- { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 29, 8}, { 15, 7}, { 31, 6}, \
- { 63, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \
- { 31, 7}, { 63, 8}, { 39, 7}, { 79, 9}, \
+ { { 636, 5}, { 25, 6}, { 13, 5}, { 29, 6}, \
More information about the gmp-commit
mailing list