[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Oct 2 20:02:28 UTC 2019
details: /var/hg/gmp/rev/102925fbb3d4
changeset: 17938:102925fbb3d4
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Oct 02 21:06:48 2019 +0200
description:
New tuneup files.
details: /var/hg/gmp/rev/586ac2b14673
changeset: 17939:586ac2b14673
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Oct 02 21:09:56 2019 +0200
description:
Make more path distinctions for the benefit of gmp-mparam.h.
details: /var/hg/gmp/rev/4a44884aaa33
changeset: 17940:4a44884aaa33
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Oct 02 21:10:17 2019 +0200
description:
ChangeLog
details: /var/hg/gmp/rev/95074fea4d54
changeset: 17941:95074fea4d54
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Oct 02 22:02:21 2019 +0200
description:
Trivial merge.
diffstat:
ChangeLog | 5 +
configure.ac | 14 +-
mpn/arm64/cora57/gmp-mparam.h | 232 ++++++++++++++++---------------
mpn/powerpc64/mode64/p7/gmp-mparam.h | 163 +++++++++++----------
mpn/powerpc64/mode64/p8/gmp-mparam.h | 161 ++++++++++----------
mpn/powerpc64/mode64/p9/gmp-mparam.h | 172 +++++++++++-----------
mpn/sparc64/ultrasparct45/gmp-mparam.h | 173 +++++++++++++++++++++++
mpn/x86/atom/gmp-mparam.h | 184 ++++++++++--------------
mpn/x86/bt1/gmp-mparam.h | 6 +-
mpn/x86/bt2/gmp-mparam.h | 171 +++++++++++++++++++++++
mpn/x86/core2/gmp-mparam.h | 194 ++++++++++++-------------
mpn/x86/coreihwl/gmp-mparam.h | 244 +++++++++++++-------------------
mpn/x86/goldmont/gmp-mparam.h | 174 +++++++++++++++++++++++
mpn/x86/silvermont/gmp-mparam.h | 175 +++++++++++++++++++++++
mpn/x86_64/core2/gmp-mparam.h | 2 +-
tests/mpn/t-gcd_11.c | 2 +-
tests/mpn/t-gcd_22.c | 2 +-
tests/mpn/t-gcdext_1.c | 6 +-
18 files changed, 1358 insertions(+), 722 deletions(-)
diffs (truncated from 2618 to 300 lines):
diff -r 9ba25d46609f -r 95074fea4d54 ChangeLog
--- a/ChangeLog Wed Oct 02 00:10:49 2019 +0200
+++ b/ChangeLog Wed Oct 02 22:02:21 2019 +0200
@@ -1,3 +1,8 @@
+2019-10-02 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac: Make more path distinctions for the benefit of
+ gmp-mparam.h.
+
2019-10-01 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]: Fix
diff -r 9ba25d46609f -r 95074fea4d54 configure.ac
--- a/configure.ac Wed Oct 02 00:10:49 2019 +0200
+++ b/configure.ac Wed Oct 02 22:02:21 2019 +0200
@@ -1593,8 +1593,10 @@
path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
[ultrasparct[12]])
path_64="sparc64/ultrasparct1 sparc64" ;;
- [ultrasparct[345]])
+ [ultrasparct3])
path_64="sparc64/ultrasparct3 sparc64" ;;
+ [ultrasparct[45]])
+ path_64="sparc64/ultrasparct45 sparc64/ultrasparct3 sparc64" ;;
*)
path_64="sparc64"
esac
@@ -1909,14 +1911,14 @@
coreihwl | coreihwlnoavx | haswell | haswellnoavx)
gcc_cflags_cpu="-mtune=haswell -mtune=corei7 -mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=haswell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
- path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
+ path="x86/coreihwl x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
path_64="x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
x86_have_mulx=yes
;;
coreibwl | coreibwlnoavx | broadwell | broadwellnoavx)
gcc_cflags_cpu="-mtune=broadwell -mtune=corei7 -mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=broadwell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
- path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
+ path="x86/coreihwl x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
path_64="x86_64/coreibwl x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
# extra_functions_64="missing" # enable for bmi2/adx simulation
x86_have_mulx=yes
@@ -1925,7 +1927,7 @@
gcc_cflags_cpu="-mtune=skylake -mtune=broadwell -mtune=corei7 -mtune=core2 -mtune=k8"
# Don't pass -march=skylake for now as then some compilers emit AVX512.
gcc_cflags_arch="-march=broadwell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
- path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
+ path="x86/coreihwl x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
path_64="x86_64/skylake x86_64/coreibwl x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
# extra_functions_64="missing" # enable for bmi2/adx simulation
x86_have_mulx=yes
@@ -1939,13 +1941,13 @@
silvermont) # out-of-order pipeline atom
gcc_cflags_cpu="-mtune=slm -mtune=atom -mtune=pentium3"
gcc_cflags_arch="-march=slm -march=atom -march=pentium3"
- path="x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
+ path="x86/silvermont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
path_64="x86_64/silvermont x86_64/atom x86_64"
;;
goldmont) # out-of-order pipeline atom
gcc_cflags_cpu="-mtune=slm -mtune=atom -mtune=pentium3"
gcc_cflags_arch="-march=slm -march=atom -march=pentium3"
- path="x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
+ path="x86/goldmont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
path_64="x86_64/goldmont x86_64/silvermont x86_64/atom x86_64"
;;
nano)
diff -r 9ba25d46609f -r 95074fea4d54 mpn/arm64/cora57/gmp-mparam.h
--- a/mpn/arm64/cora57/gmp-mparam.h Wed Oct 02 00:10:49 2019 +0200
+++ b/mpn/arm64/cora57/gmp-mparam.h Wed Oct 02 22:02:21 2019 +0200
@@ -1,6 +1,6 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* gmp-mparam.h -- Compiler/machine parameter header file for a57, a72-a75.
-Copyright 2017 Free Software Foundation, Inc.
+Copyright 2019 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -31,147 +31,157 @@
#define GMP_LIMB_BITS 64
#define GMP_LIMB_BYTES 8
-/* 2000 MHz Cortex-A57 */
+/* 1800 MHz Cortex-A72 */
/* FFT tuning limit = 0.5 M */
-/* Generated by tuneup.c, 2017-02-23, gcc 4.8 */
+/* Generated by tuneup.c, 2019-10-02, gcc 7.4 */
-#define DIVREM_1_NORM_THRESHOLD 0 /* always */
-#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1_1P_METHOD 2
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 26
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_1P_METHOD 1 /* 2.21% faster than 2 */
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 42
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
#define USE_PREINV_DIVREM_1 1
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 2
-#define DIV_QR_1_UNNORM_THRESHOLD 2
-#define DIV_QR_2_PI2_THRESHOLD 13
+#define DIV_QR_1N_PI1_METHOD 1 /* 34.95% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 5
+#define DIV_QR_1_UNNORM_THRESHOLD 5
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 37
+#define BMOD_1_TO_MOD_1_THRESHOLD 33
#define DIV_1_VS_MUL_1_PERCENT 168
-#define MUL_TOOM22_THRESHOLD 12
-#define MUL_TOOM33_THRESHOLD 49
-#define MUL_TOOM44_THRESHOLD 118
-#define MUL_TOOM6H_THRESHOLD 157
-#define MUL_TOOM8H_THRESHOLD 236
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 41
+#define MUL_TOOM44_THRESHOLD 99
+#define MUL_TOOM6H_THRESHOLD 142
+#define MUL_TOOM8H_THRESHOLD 199
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 77
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 77
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 64
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 63
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 66
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 55
#define SQR_BASECASE_THRESHOLD 0 /* always */
-#define SQR_TOOM2_THRESHOLD 16
-#define SQR_TOOM3_THRESHOLD 57
-#define SQR_TOOM4_THRESHOLD 160
-#define SQR_TOOM6_THRESHOLD 206
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 65
+#define SQR_TOOM4_THRESHOLD 166
+#define SQR_TOOM6_THRESHOLD 222
#define SQR_TOOM8_THRESHOLD 309
#define MULMID_TOOM42_THRESHOLD 22
-#define MULMOD_BNM1_THRESHOLD 9
-#define SQRMOD_BNM1_THRESHOLD 11
+#define MULMOD_BNM1_THRESHOLD 7
+#define SQRMOD_BNM1_THRESHOLD 12
-#define MUL_FFT_MODF_THRESHOLD 284 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 276 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 284, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { { 276, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 8, 6}, \
{ 17, 7}, { 9, 6}, { 19, 7}, { 13, 8}, \
- { 7, 7}, { 17, 8}, { 9, 7}, { 19, 8}, \
+ { 7, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
{ 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
- { 19, 9}, { 11, 8}, { 25, 9}, { 15, 8}, \
- { 31, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \
- { 47,10}, { 15, 9}, { 39,10}, { 23, 9}, \
- { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \
- { 39, 9}, { 79,10}, { 47, 9}, { 95,10}, \
- { 55,11}, { 31,10}, { 71, 9}, { 143, 8}, \
- { 287,10}, { 79,11}, { 47,10}, { 95,12}, \
+ { 21, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 23, 8}, { 49, 9}, { 27,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 51,11}, { 15,10}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 99,10}, { 55,11}, { 31,10}, \
+ { 63, 8}, { 255,10}, { 71, 9}, { 143, 8}, \
+ { 287,10}, { 79, 9}, { 159, 8}, { 319,11}, \
+ { 47,10}, { 95, 9}, { 191,10}, { 103,12}, \
{ 31,11}, { 63, 9}, { 255, 8}, { 511,10}, \
- { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
- { 319, 8}, { 639,10}, { 175, 9}, { 351,11}, \
+ { 143, 8}, { 575,11}, { 79,10}, { 159, 9}, \
+ { 319,10}, { 175, 9}, { 351, 8}, { 703,11}, \
{ 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \
- { 415,12}, { 63,10}, { 255, 9}, { 511,11}, \
- { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \
- { 319, 9}, { 639,11}, { 175,10}, { 351, 9}, \
- { 703,12}, { 95,11}, { 191,10}, { 383,11}, \
- { 207,10}, { 415, 9}, { 831,11}, { 223,13}, \
- { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
- { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
- {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 91
-#define MUL_FFT_THRESHOLD 2688
+ { 415,10}, { 223, 9}, { 447, 8}, { 895,12}, \
+ { 63,10}, { 255, 9}, { 511, 8}, { 1023, 9}, \
+ { 543,11}, { 143,10}, { 287, 9}, { 575, 8}, \
+ { 1151,10}, { 319, 9}, { 639,11}, { 175,10}, \
+ { 351, 9}, { 703,12}, { 95,10}, { 383, 9}, \
+ { 767,11}, { 207, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 109
+#define MUL_FFT_THRESHOLD 3200
-#define SQR_FFT_MODF_THRESHOLD 272 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 244 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 272, 5}, { 15, 6}, { 15, 7}, { 8, 6}, \
- { 17, 7}, { 9, 6}, { 19, 7}, { 17, 8}, \
- { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
- { 19, 9}, { 11, 8}, { 25, 9}, { 15, 8}, \
- { 31, 9}, { 23,10}, { 15, 9}, { 35,10}, \
- { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
- { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 71, 9}, { 143, 8}, \
- { 287,10}, { 79,11}, { 47,10}, { 95,12}, \
- { 31,11}, { 63, 9}, { 255, 8}, { 511,10}, \
- { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
- { 319, 8}, { 639,10}, { 175, 9}, { 351, 8}, \
- { 703,11}, { 95,10}, { 191, 9}, { 383,10}, \
- { 207, 9}, { 415,12}, { 63,10}, { 255, 9}, \
- { 511,11}, { 143,10}, { 287, 9}, { 575,11}, \
- { 159,10}, { 319, 9}, { 639,11}, { 175,10}, \
- { 351, 9}, { 703,12}, { 95,11}, { 191,10}, \
- { 383,11}, { 207,10}, { 415, 9}, { 831,13}, \
- { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
- { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
- {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 83
-#define SQR_FFT_THRESHOLD 2240
+ { { 244, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \
+ { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \
+ { 39, 9}, { 79,10}, { 47, 9}, { 99,10}, \
+ { 55,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255,10}, { 71, 8}, { 287, 7}, { 575, 9}, \
+ { 159, 8}, { 319,11}, { 47,10}, { 95, 9}, \
+ { 191, 8}, { 383,12}, { 31,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 143, 9}, { 287, 8}, \
+ { 575,11}, { 79,10}, { 159, 9}, { 319, 8}, \
+ { 639, 9}, { 351,10}, { 191, 9}, { 383,10}, \
+ { 207, 9}, { 415,10}, { 239,12}, { 63,10}, \
+ { 255, 9}, { 511,10}, { 271,11}, { 143,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 351, 9}, { 703,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
+ { 831,11}, { 223,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 97
+#define SQR_FFT_THRESHOLD 2496
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 37
-#define MULLO_MUL_N_THRESHOLD 5240
+#define MULLO_DC_THRESHOLD 39
+#define MULLO_MUL_N_THRESHOLD 6253
#define SQRLO_BASECASE_THRESHOLD 4
-#define SQRLO_DC_THRESHOLD 45
-#define SQRLO_SQR_THRESHOLD 4392
+#define SQRLO_DC_THRESHOLD 56
+#define SQRLO_SQR_THRESHOLD 4940
-#define DC_DIV_QR_THRESHOLD 31
-#define DC_DIVAPPR_Q_THRESHOLD 125
-#define DC_BDIV_QR_THRESHOLD 32
-#define DC_BDIV_Q_THRESHOLD 52
+#define DC_DIV_QR_THRESHOLD 41
+#define DC_DIVAPPR_Q_THRESHOLD 136
+#define DC_BDIV_QR_THRESHOLD 39
+#define DC_BDIV_Q_THRESHOLD 89
-#define INV_MULMOD_BNM1_THRESHOLD 23
-#define INV_NEWTON_THRESHOLD 153
-#define INV_APPR_THRESHOLD 133
+#define INV_MULMOD_BNM1_THRESHOLD 22
+#define INV_NEWTON_THRESHOLD 154
More information about the gmp-commit
mailing list