[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Mar 30 23:17:29 CEST 2022
details: /var/hg/gmp/rev/93cb25d52eea
changeset: 18338:93cb25d52eea
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Mar 30 23:09:55 2022 +0200
description:
ChangeLog
details: /var/hg/gmp/rev/32dc4af70f95
changeset: 18339:32dc4af70f95
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Mar 30 23:11:05 2022 +0200
description:
(loongarch64 umul_ppmm): New #define.
details: /var/hg/gmp/rev/46ea9c505ba9
changeset: 18340:46ea9c505ba9
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Mar 30 23:13:11 2022 +0200
description:
Handle Intel tremont separately.
details: /var/hg/gmp/rev/558eab468146
changeset: 18341:558eab468146
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Mar 30 23:14:46 2022 +0200
description:
Handle Intel tremont separately.
details: /var/hg/gmp/rev/d45103d658ca
changeset: 18342:d45103d658ca
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Mar 30 23:16:18 2022 +0200
description:
Cosmetic layout fixes.
diffstat:
ChangeLog | 42 ++++++++++++++++++++++++++++++++++++++
acinclude.m4 | 2 +-
config.guess | 6 ++--
config.sub | 2 +-
configure.ac | 7 ++++++
longlong.h | 11 +++++++++
mpn/arm64/applem1/aorsmul_1.asm | 2 +-
mpn/arm64/applem1/sqr_basecase.asm | 2 +-
8 files changed, 67 insertions(+), 7 deletions(-)
diffs (188 lines):
diff -r 27c014307e66 -r d45103d658ca ChangeLog
--- a/ChangeLog Sun Mar 20 20:15:49 2022 +0100
+++ b/ChangeLog Wed Mar 30 23:16:18 2022 +0200
@@ -8,6 +8,10 @@
* mpn/generic/mul_fft.c: Use _bknp1. Not when tuning, to avoid "noise".
+2022-03-09 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/alderlake/submul_1.asm: New file.
+
2022-03-08 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/mulmod_bnm1.c: Use mulmod_bknp1, on the bnp1 side.
@@ -15,6 +19,15 @@
* mpn/generic/mul_fft.c: Remove a branch and improve carry propagation.
+2022-03-03 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/alderlake/mul_basecase.asm: New file.
+ * mpn/x86_64/alderlake/addmul_1.asm: New file.
+
+2022-02-21 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac: Handle alderlake specifically, as it lacks avx512.
+
2022-02-15 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/strongfibo.c: Correct condition in #if.
@@ -31,6 +44,10 @@
* tune/speed.h: Declare new speed_ functions to measure them.
* tune/common.c: Implement speed_ functions.
+2022-02-14 Torbjörn Granlund <tg at gmplib.org>
+
+ * config.guess: Recognize Tremont CPU variants.
+
2022-02-01 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpz/aorsmul_i.c: Use MPZ_NEWALLOC when the operand is overwritten.
@@ -65,6 +82,31 @@
* mpz/tdiv_r.c: Special code for divisor with low zero limbs.
* mpz/tdiv_qr.c: Likewise.
+2021-11-14 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac: Adapt to acinclude.m4 change.
+
+ * mpn/riscv/64/aors_n.asm: Accept 4th CMPCY operand, allowing for
+ 2 c/l mpn_sub_n (mpn_add_n cannot beat 3 c/l with RISC V's ISA).
+
+ * configure.ac (S390_PATTERN): Rewrite to handle path inheritance.
+
+ * acinclude.m4 (GMP_ASM_SPARC_GDOP): Renamed GMP_ASM_SPARC_GOTDATA.
+
+ * mpn/sparc32/sparc-defs.m4 (LEA64): Remove misguided assert.
+ * mpn/sparc32/sparc-defs.m4 (LEA64): Allow PIC withot gdop_*.
+
+ * mpn/sparc64/gcd_11.asm: Optimise out annulled shift insn in loop.
+
+2021-11-07 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/generic/sec_tabselect.c: Make recently added code actually work.
+ (Thanks Marco!)
+
+ * mpn/s390_32/sec_tabselect.asm: New file.
+
+ * mpn/s390_64/sec_tabselect.asm: Avoid "slfi" for portability.
+
2021-11-02 Torbjörn Granlund <tg at gmplib.org>
* mpn/s390_64/sec_tabselect.asm: Rewrite.
diff -r 27c014307e66 -r d45103d658ca acinclude.m4
--- a/acinclude.m4 Sun Mar 20 20:15:49 2022 +0100
+++ b/acinclude.m4 Wed Mar 30 23:16:18 2022 +0200
@@ -63,7 +63,7 @@
[[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | icelake*-*-* | tigerlake*-*-* | rocketlake*-*-* | alderlake*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | tremont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | icelake*-*-* | tigerlake*-*-* | rocketlake*-*-* | alderlake*-*-*]])
dnl GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
dnl ---------------------------------
diff -r 27c014307e66 -r d45103d658ca config.guess
--- a/config.guess Sun Mar 20 20:15:49 2022 +0100
+++ b/config.guess Wed Mar 30 23:16:18 2022 +0200
@@ -924,15 +924,15 @@
else if (model == 0x7a) cpu_64bit = 1, modelstr = "goldmont"; /* Goldmont Plus */
else if (model == 0x7d) cpu_64bit = 1, cpu_avx=1, modelstr = "icelake"; /* Ice Lake Y */
else if (model == 0x7e) cpu_64bit = 1, cpu_avx=1, modelstr = "icelake"; /* Ice Lake U */
- else if (model == 0x8a) cpu_64bit = 1, modelstr = "goldmont"; /* Tremont */
+ else if (model == 0x8a) cpu_64bit = 1, modelstr = "tremont"; /* Tremont */
else if (model == 0x8c) cpu_64bit = 1, cpu_avx=1, modelstr = "tigerlake"; /* Tiger Lake U */
else if (model == 0x8d) cpu_64bit = 1, cpu_avx=1, modelstr = "tigerlake"; /* Tiger Lake H */
else if (model == 0x8e) cpu_64bit = 1, cpu_avx=1, modelstr = "kabylake"; /* Kaby Lake Y/U */
else if (model == 0x8f) cpu_64bit = 1, cpu_avx=1, modelstr = "alderlake"; /* Sapphire Rapids */
- else if (model == 0x96) cpu_64bit = 1, modelstr = "goldmont"; /* Tremont */
+ else if (model == 0x96) cpu_64bit = 1, modelstr = "tremont"; /* Tremont */
else if (model == 0x97) cpu_64bit = 1, cpu_avx=1, modelstr = "alderlake"; /* Alder Lake S */
else if (model == 0x9a) cpu_64bit = 1, cpu_avx=1, modelstr = "alderlake"; /* Alder Lake P */
- else if (model == 0x9c) cpu_64bit = 1, modelstr = "goldmont"; /* Tremont */
+ else if (model == 0x9c) cpu_64bit = 1, modelstr = "tremont"; /* Tremont */
else if (model == 0x9e) cpu_64bit = 1, cpu_avx=1, modelstr = "kabylake"; /* Kaby Lake desktop */
else if (model == 0xa7) cpu_64bit = 1, cpu_avx=1, modelstr = "rocketlake"; /* Rocket Lake S */
else cpu_64bit = 1, modelstr = "nehalem"; /* default */
diff -r 27c014307e66 -r d45103d658ca config.sub
--- a/config.sub Sun Mar 20 20:15:49 2022 +0100
+++ b/config.sub Wed Mar 30 23:16:18 2022 +0200
@@ -102,7 +102,7 @@
test_cpu=ia64 ;;
pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
test_cpu=i386 ;;
-athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | icelake* | rocketlake* | tigerlake* | alderlake*| knightslanding)
+athlon64 | atom | silvermont | goldmont | tremont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | icelake* | rocketlake* | tigerlake* | alderlake*| knightslanding)
test_cpu=x86_64 ;;
power[2-9] | power2sc)
test_cpu=power ;;
diff -r 27c014307e66 -r d45103d658ca configure.ac
--- a/configure.ac Sun Mar 20 20:15:49 2022 +0100
+++ b/configure.ac Wed Mar 30 23:16:18 2022 +0200
@@ -333,6 +333,7 @@
#undef HAVE_HOST_CPU_skylake
#undef HAVE_HOST_CPU_silvermont
#undef HAVE_HOST_CPU_goldmont
+#undef HAVE_HOST_CPU_tremont
#undef HAVE_HOST_CPU_k8
#undef HAVE_HOST_CPU_k10
#undef HAVE_HOST_CPU_bulldozer
@@ -1997,6 +1998,12 @@
path="x86/goldmont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
path_64="x86_64/goldmont x86_64/silvermont x86_64/atom x86_64"
;;
+ tremont) # out-of-order pipeline atom
+ gcc_cflags_cpu="-mtune=slm -mtune=atom -mtune=pentium3"
+ gcc_cflags_arch="-march=slm -march=atom -march=pentium3"
+ path="x86/goldmont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
+ path_64="x86_64/tremont x86_64/goldmont x86_64/silvermont x86_64/atom x86_64"
+ ;;
nano)
gcc_cflags_cpu="-mtune=nano"
gcc_cflags_arch="-march=nano"
diff -r 27c014307e66 -r d45103d658ca longlong.h
--- a/longlong.h Sun Mar 20 20:15:49 2022 +0100
+++ b/longlong.h Wed Mar 30 23:16:18 2022 +0200
@@ -1156,6 +1156,17 @@
#endif /* i960mx */
#endif /* i960 */
+
+#if defined (__loongarch64) && W_TYPE_SIZE == 64
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UDItype __u = (u), __v = (v); \
+ (w0) = __u * __v; \
+ (w1) = (unsigned __int128__) __u * __v >> 64; \
+ } while (0)
+#endif
+
+
#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
|| defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
|| defined (__mc5307__)) && W_TYPE_SIZE == 32
diff -r 27c014307e66 -r d45103d658ca mpn/arm64/applem1/aorsmul_1.asm
--- a/mpn/arm64/applem1/aorsmul_1.asm Sun Mar 20 20:15:49 2022 +0100
+++ b/mpn/arm64/applem1/aorsmul_1.asm Wed Mar 30 23:16:18 2022 +0200
@@ -47,7 +47,7 @@
define(`up', x1)
define(`n', x2)
define(`v0', x3)
-define(`cin',`x4')
+define(`cin',x4)
define(`CY',x17)
diff -r 27c014307e66 -r d45103d658ca mpn/arm64/applem1/sqr_basecase.asm
--- a/mpn/arm64/applem1/sqr_basecase.asm Sun Mar 20 20:15:49 2022 +0100
+++ b/mpn/arm64/applem1/sqr_basecase.asm Wed Mar 30 23:16:18 2022 +0200
@@ -245,7 +245,7 @@
adc CY, x4, xzr
adds xzr, x7, x7
adc v0, x10, x10
-L(cor): mul x8, x11, v0
+ mul x8, x11, v0
umulh x4, x11, v0
adds x8, x12, x8
adc x4, x4, xzr
More information about the gmp-commit
mailing list