[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Oct 2 20:02:28 UTC 2019


details:   /var/hg/gmp/rev/102925fbb3d4
changeset: 17938:102925fbb3d4
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Oct 02 21:06:48 2019 +0200
description:
New tuneup files.

details:   /var/hg/gmp/rev/586ac2b14673
changeset: 17939:586ac2b14673
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Oct 02 21:09:56 2019 +0200
description:
Make more path distinctions for the benefit of gmp-mparam.h.

details:   /var/hg/gmp/rev/4a44884aaa33
changeset: 17940:4a44884aaa33
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Oct 02 21:10:17 2019 +0200
description:
ChangeLog

details:   /var/hg/gmp/rev/95074fea4d54
changeset: 17941:95074fea4d54
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Oct 02 22:02:21 2019 +0200
description:
Trivial merge.

diffstat:

 ChangeLog                              |    5 +
 configure.ac                           |   14 +-
 mpn/arm64/cora57/gmp-mparam.h          |  232 ++++++++++++++++---------------
 mpn/powerpc64/mode64/p7/gmp-mparam.h   |  163 +++++++++++----------
 mpn/powerpc64/mode64/p8/gmp-mparam.h   |  161 ++++++++++----------
 mpn/powerpc64/mode64/p9/gmp-mparam.h   |  172 +++++++++++-----------
 mpn/sparc64/ultrasparct45/gmp-mparam.h |  173 +++++++++++++++++++++++
 mpn/x86/atom/gmp-mparam.h              |  184 ++++++++++--------------
 mpn/x86/bt1/gmp-mparam.h               |    6 +-
 mpn/x86/bt2/gmp-mparam.h               |  171 +++++++++++++++++++++++
 mpn/x86/core2/gmp-mparam.h             |  194 ++++++++++++-------------
 mpn/x86/coreihwl/gmp-mparam.h          |  244 +++++++++++++-------------------
 mpn/x86/goldmont/gmp-mparam.h          |  174 +++++++++++++++++++++++
 mpn/x86/silvermont/gmp-mparam.h        |  175 +++++++++++++++++++++++
 mpn/x86_64/core2/gmp-mparam.h          |    2 +-
 tests/mpn/t-gcd_11.c                   |    2 +-
 tests/mpn/t-gcd_22.c                   |    2 +-
 tests/mpn/t-gcdext_1.c                 |    6 +-
 18 files changed, 1358 insertions(+), 722 deletions(-)

diffs (truncated from 2618 to 300 lines):

diff -r 9ba25d46609f -r 95074fea4d54 ChangeLog
--- a/ChangeLog	Wed Oct 02 00:10:49 2019 +0200
+++ b/ChangeLog	Wed Oct 02 22:02:21 2019 +0200
@@ -1,3 +1,8 @@
+2019-10-02  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac: Make more path distinctions for the benefit of
+	gmp-mparam.h.
+
 2019-10-01  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]: Fix
diff -r 9ba25d46609f -r 95074fea4d54 configure.ac
--- a/configure.ac	Wed Oct 02 00:10:49 2019 +0200
+++ b/configure.ac	Wed Oct 02 22:02:21 2019 +0200
@@ -1593,8 +1593,10 @@
 	    path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
 	  [ultrasparct[12]])
 	    path_64="sparc64/ultrasparct1 sparc64" ;;
-	  [ultrasparct[345]])
+	  [ultrasparct3])
 	    path_64="sparc64/ultrasparct3 sparc64" ;;
+	  [ultrasparct[45]])
+	    path_64="sparc64/ultrasparct45 sparc64/ultrasparct3 sparc64" ;;
 	  *)
 	    path_64="sparc64"
 	esac
@@ -1909,14 +1911,14 @@
       coreihwl | coreihwlnoavx | haswell | haswellnoavx)
 	gcc_cflags_cpu="-mtune=haswell -mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=haswell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
+	path="x86/coreihwl x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	x86_have_mulx=yes
 	;;
       coreibwl | coreibwlnoavx | broadwell | broadwellnoavx)
 	gcc_cflags_cpu="-mtune=broadwell -mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=broadwell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
+	path="x86/coreihwl x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/coreibwl x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	# extra_functions_64="missing"	 # enable for bmi2/adx simulation
 	x86_have_mulx=yes
@@ -1925,7 +1927,7 @@
 	gcc_cflags_cpu="-mtune=skylake -mtune=broadwell -mtune=corei7 -mtune=core2 -mtune=k8"
 	# Don't pass -march=skylake for now as then some compilers emit AVX512.
 	gcc_cflags_arch="-march=broadwell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
+	path="x86/coreihwl x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/skylake x86_64/coreibwl x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	# extra_functions_64="missing"	 # enable for bmi2/adx simulation
 	x86_have_mulx=yes
@@ -1939,13 +1941,13 @@
       silvermont)		# out-of-order pipeline atom
 	gcc_cflags_cpu="-mtune=slm -mtune=atom -mtune=pentium3"
 	gcc_cflags_arch="-march=slm -march=atom -march=pentium3"
-	path="x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
+	path="x86/silvermont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
 	path_64="x86_64/silvermont x86_64/atom x86_64"
 	;;
       goldmont)			# out-of-order pipeline atom
 	gcc_cflags_cpu="-mtune=slm -mtune=atom -mtune=pentium3"
 	gcc_cflags_arch="-march=slm -march=atom -march=pentium3"
-	path="x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
+	path="x86/goldmont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
 	path_64="x86_64/goldmont x86_64/silvermont x86_64/atom x86_64"
 	;;
       nano)
diff -r 9ba25d46609f -r 95074fea4d54 mpn/arm64/cora57/gmp-mparam.h
--- a/mpn/arm64/cora57/gmp-mparam.h	Wed Oct 02 00:10:49 2019 +0200
+++ b/mpn/arm64/cora57/gmp-mparam.h	Wed Oct 02 22:02:21 2019 +0200
@@ -1,6 +1,6 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* gmp-mparam.h -- Compiler/machine parameter header file for a57, a72-a75.
 
-Copyright 2017 Free Software Foundation, Inc.
+Copyright 2019 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -31,147 +31,157 @@
 #define GMP_LIMB_BITS 64
 #define GMP_LIMB_BYTES 8
 
-/* 2000 MHz Cortex-A57 */
+/* 1800 MHz Cortex-A72 */
 /* FFT tuning limit = 0.5 M */
-/* Generated by tuneup.c, 2017-02-23, gcc 4.8 */
+/* Generated by tuneup.c, 2019-10-02, gcc 7.4 */
 
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        26
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
+#define DIVREM_1_NORM_THRESHOLD              3
+#define DIVREM_1_UNNORM_THRESHOLD            4
+#define MOD_1_1P_METHOD                      1  /* 2.21% faster than 2 */
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        42
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
 #define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              2
-#define DIV_QR_1_UNNORM_THRESHOLD            2
-#define DIV_QR_2_PI2_THRESHOLD              13
+#define DIV_QR_1N_PI1_METHOD                 1  /* 34.95% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD              5
+#define DIV_QR_1_UNNORM_THRESHOLD            5
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
 #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           37
+#define BMOD_1_TO_MOD_1_THRESHOLD           33
 
 #define DIV_1_VS_MUL_1_PERCENT             168
 
-#define MUL_TOOM22_THRESHOLD                12
-#define MUL_TOOM33_THRESHOLD                49
-#define MUL_TOOM44_THRESHOLD               118
-#define MUL_TOOM6H_THRESHOLD               157
-#define MUL_TOOM8H_THRESHOLD               236
+#define MUL_TOOM22_THRESHOLD                10
+#define MUL_TOOM33_THRESHOLD                41
+#define MUL_TOOM44_THRESHOLD                99
+#define MUL_TOOM6H_THRESHOLD               142
+#define MUL_TOOM8H_THRESHOLD               199
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      77
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      77
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      64
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      63
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      55
 
 #define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 16
-#define SQR_TOOM3_THRESHOLD                 57
-#define SQR_TOOM4_THRESHOLD                160
-#define SQR_TOOM6_THRESHOLD                206
+#define SQR_TOOM2_THRESHOLD                 18
+#define SQR_TOOM3_THRESHOLD                 65
+#define SQR_TOOM4_THRESHOLD                166
+#define SQR_TOOM6_THRESHOLD                222
 #define SQR_TOOM8_THRESHOLD                309
 
 #define MULMID_TOOM42_THRESHOLD             22
 
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD               11
+#define MULMOD_BNM1_THRESHOLD                7
+#define SQRMOD_BNM1_THRESHOLD               12
 
-#define MUL_FFT_MODF_THRESHOLD             284  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             276  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    284, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+  { {    276, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {      8, 6}, \
     {     17, 7}, {      9, 6}, {     19, 7}, {     13, 8}, \
-    {      7, 7}, {     17, 8}, {      9, 7}, {     19, 8}, \
+    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
     {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
-    {     31, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
-    {     47,10}, {     15, 9}, {     39,10}, {     23, 9}, \
-    {     47,11}, {     15,10}, {     31, 9}, {     67,10}, \
-    {     39, 9}, {     79,10}, {     47, 9}, {     95,10}, \
-    {     55,11}, {     31,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     21, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23, 8}, {     49, 9}, {     27,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     51,11}, {     15,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47, 9}, {     99,10}, {     55,11}, {     31,10}, \
+    {     63, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
+    {    287,10}, {     79, 9}, {    159, 8}, {    319,11}, \
+    {     47,10}, {     95, 9}, {    191,10}, {    103,12}, \
     {     31,11}, {     63, 9}, {    255, 8}, {    511,10}, \
-    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
-    {    319, 8}, {    639,10}, {    175, 9}, {    351,11}, \
+    {    143, 8}, {    575,11}, {     79,10}, {    159, 9}, \
+    {    319,10}, {    175, 9}, {    351, 8}, {    703,11}, \
     {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415,12}, {     63,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319, 9}, {    639,11}, {    175,10}, {    351, 9}, \
-    {    703,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,10}, {    415, 9}, {    831,11}, {    223,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 91
-#define MUL_FFT_THRESHOLD                 2688
+    {    415,10}, {    223, 9}, {    447, 8}, {    895,12}, \
+    {     63,10}, {    255, 9}, {    511, 8}, {   1023, 9}, \
+    {    543,11}, {    143,10}, {    287, 9}, {    575, 8}, \
+    {   1151,10}, {    319, 9}, {    639,11}, {    175,10}, \
+    {    351, 9}, {    703,12}, {     95,10}, {    383, 9}, \
+    {    767,11}, {    207, 9}, {    831,11}, {    223,10}, \
+    {    447, 9}, {    895,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 109
+#define MUL_FFT_THRESHOLD                 3200
 
-#define SQR_FFT_MODF_THRESHOLD             272  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             244  /* k = 5 */
 #define SQR_FFT_TABLE3                                      \
-  { {    272, 5}, {     15, 6}, {     15, 7}, {      8, 6}, \
-    {     17, 7}, {      9, 6}, {     19, 7}, {     17, 8}, \
-    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
-    {     19, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
-    {     31, 9}, {     23,10}, {     15, 9}, {     35,10}, \
-    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63, 9}, {    255, 8}, {    511,10}, \
-    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
-    {    319, 8}, {    639,10}, {    175, 9}, {    351, 8}, \
-    {    703,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207, 9}, {    415,12}, {     63,10}, {    255, 9}, \
-    {    511,11}, {    143,10}, {    287, 9}, {    575,11}, \
-    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
-    {    351, 9}, {    703,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    207,10}, {    415, 9}, {    831,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 83
-#define SQR_FFT_THRESHOLD                 2240
+  { {    244, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     27,10}, {     15, 9}, {     39,10}, {     23, 9}, \
+    {     47,11}, {     15,10}, {     31, 9}, {     67,10}, \
+    {     39, 9}, {     79,10}, {     47, 9}, {     99,10}, \
+    {     55,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     71, 8}, {    287, 7}, {    575, 9}, \
+    {    159, 8}, {    319,11}, {     47,10}, {     95, 9}, \
+    {    191, 8}, {    383,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287, 8}, \
+    {    575,11}, {     79,10}, {    159, 9}, {    319, 8}, \
+    {    639, 9}, {    351,10}, {    191, 9}, {    383,10}, \
+    {    207, 9}, {    415,10}, {    239,12}, {     63,10}, \
+    {    255, 9}, {    511,10}, {    271,11}, {    143,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351, 9}, {    703,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
+    {    831,11}, {    223,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 97
+#define SQR_FFT_THRESHOLD                 2496
 
 #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  37
-#define MULLO_MUL_N_THRESHOLD             5240
+#define MULLO_DC_THRESHOLD                  39
+#define MULLO_MUL_N_THRESHOLD             6253
 #define SQRLO_BASECASE_THRESHOLD             4
-#define SQRLO_DC_THRESHOLD                  45
-#define SQRLO_SQR_THRESHOLD               4392
+#define SQRLO_DC_THRESHOLD                  56
+#define SQRLO_SQR_THRESHOLD               4940
 
-#define DC_DIV_QR_THRESHOLD                 31
-#define DC_DIVAPPR_Q_THRESHOLD             125
-#define DC_BDIV_QR_THRESHOLD                32
-#define DC_BDIV_Q_THRESHOLD                 52
+#define DC_DIV_QR_THRESHOLD                 41
+#define DC_DIVAPPR_Q_THRESHOLD             136
+#define DC_BDIV_QR_THRESHOLD                39
+#define DC_BDIV_Q_THRESHOLD                 89
 
-#define INV_MULMOD_BNM1_THRESHOLD           23
-#define INV_NEWTON_THRESHOLD               153
-#define INV_APPR_THRESHOLD                 133
+#define INV_MULMOD_BNM1_THRESHOLD           22
+#define INV_NEWTON_THRESHOLD               154


More information about the gmp-commit mailing list