[Gmp-commit] /var/hg/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Mar 30 23:17:29 CEST 2022


details:   /var/hg/gmp/rev/93cb25d52eea
changeset: 18338:93cb25d52eea
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Mar 30 23:09:55 2022 +0200
description:
ChangeLog

details:   /var/hg/gmp/rev/32dc4af70f95
changeset: 18339:32dc4af70f95
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Mar 30 23:11:05 2022 +0200
description:
(loongarch64 umul_ppmm): New #define.

details:   /var/hg/gmp/rev/46ea9c505ba9
changeset: 18340:46ea9c505ba9
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Mar 30 23:13:11 2022 +0200
description:
Handle Intel tremont separately.

details:   /var/hg/gmp/rev/558eab468146
changeset: 18341:558eab468146
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Mar 30 23:14:46 2022 +0200
description:
Handle Intel tremont separately.

details:   /var/hg/gmp/rev/d45103d658ca
changeset: 18342:d45103d658ca
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Mar 30 23:16:18 2022 +0200
description:
Cosmetic layout fixes.

diffstat:

 ChangeLog                          |  42 ++++++++++++++++++++++++++++++++++++++
 acinclude.m4                       |   2 +-
 config.guess                       |   6 ++--
 config.sub                         |   2 +-
 configure.ac                       |   7 ++++++
 longlong.h                         |  11 +++++++++
 mpn/arm64/applem1/aorsmul_1.asm    |   2 +-
 mpn/arm64/applem1/sqr_basecase.asm |   2 +-
 8 files changed, 67 insertions(+), 7 deletions(-)

diffs (188 lines):

diff -r 27c014307e66 -r d45103d658ca ChangeLog
--- a/ChangeLog	Sun Mar 20 20:15:49 2022 +0100
+++ b/ChangeLog	Wed Mar 30 23:16:18 2022 +0200
@@ -8,6 +8,10 @@
 
 	* mpn/generic/mul_fft.c: Use _bknp1. Not when tuning, to avoid "noise".
 
+2022-03-09  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/alderlake/submul_1.asm: New file.
+
 2022-03-08 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/mulmod_bnm1.c: Use mulmod_bknp1, on the bnp1 side.
@@ -15,6 +19,15 @@
 
 	* mpn/generic/mul_fft.c: Remove a branch and improve carry propagation.
 
+2022-03-03  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/alderlake/mul_basecase.asm: New file.
+	* mpn/x86_64/alderlake/addmul_1.asm: New file.
+
+2022-02-21  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac: Handle alderlake specifically, as it lacks avx512.
+
 2022-02-15 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/strongfibo.c: Correct condition in #if.
@@ -31,6 +44,10 @@
 	* tune/speed.h: Declare new speed_ functions to measure them.
 	* tune/common.c: Implement speed_ functions.
 
+2022-02-14  Torbjörn Granlund  <tg at gmplib.org>
+
+	* config.guess: Recognize Tremont CPU variants.
+
 2022-02-01 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpz/aorsmul_i.c: Use MPZ_NEWALLOC when the operand is overwritten.
@@ -65,6 +82,31 @@
 	* mpz/tdiv_r.c: Special code for divisor with low zero limbs.
 	* mpz/tdiv_qr.c: Likewise.
 
+2021-11-14  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac: Adapt to acinclude.m4 change.
+
+	* mpn/riscv/64/aors_n.asm: Accept 4th CMPCY operand, allowing for
+	2 c/l mpn_sub_n (mpn_add_n cannot beat 3 c/l with RISC V's ISA).
+
+	* configure.ac (S390_PATTERN): Rewrite to handle path inheritance.
+
+	* acinclude.m4 (GMP_ASM_SPARC_GDOP): Renamed GMP_ASM_SPARC_GOTDATA.
+
+	* mpn/sparc32/sparc-defs.m4 (LEA64): Remove misguided assert.
+	* mpn/sparc32/sparc-defs.m4 (LEA64): Allow PIC withot gdop_*.
+
+	* mpn/sparc64/gcd_11.asm: Optimise out annulled shift insn in loop.
+
+2021-11-07  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/generic/sec_tabselect.c: Make recently added code actually work.
+	(Thanks Marco!)
+
+	* mpn/s390_32/sec_tabselect.asm: New file.
+
+	* mpn/s390_64/sec_tabselect.asm: Avoid "slfi" for portability.
+
 2021-11-02  Torbjörn Granlund  <tg at gmplib.org>
 
 	* mpn/s390_64/sec_tabselect.asm: Rewrite.
diff -r 27c014307e66 -r d45103d658ca acinclude.m4
--- a/acinclude.m4	Sun Mar 20 20:15:49 2022 +0100
+++ b/acinclude.m4	Wed Mar 30 23:16:18 2022 +0200
@@ -63,7 +63,7 @@
 [[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
 
 define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | icelake*-*-* | tigerlake*-*-* | rocketlake*-*-* | alderlake*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | tremont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | icelake*-*-* | tigerlake*-*-* | rocketlake*-*-* | alderlake*-*-*]])
 
 dnl  GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
 dnl  ---------------------------------
diff -r 27c014307e66 -r d45103d658ca config.guess
--- a/config.guess	Sun Mar 20 20:15:49 2022 +0100
+++ b/config.guess	Wed Mar 30 23:16:18 2022 +0200
@@ -924,15 +924,15 @@
           else if (model == 0x7a) cpu_64bit = 1,            modelstr = "goldmont";   /* Goldmont Plus */
           else if (model == 0x7d) cpu_64bit = 1, cpu_avx=1, modelstr = "icelake";    /* Ice Lake Y */
           else if (model == 0x7e) cpu_64bit = 1, cpu_avx=1, modelstr = "icelake";    /* Ice Lake U */
-          else if (model == 0x8a) cpu_64bit = 1,            modelstr = "goldmont";   /* Tremont */
+          else if (model == 0x8a) cpu_64bit = 1,            modelstr = "tremont";    /* Tremont */
           else if (model == 0x8c) cpu_64bit = 1, cpu_avx=1, modelstr = "tigerlake";  /* Tiger Lake U */
           else if (model == 0x8d) cpu_64bit = 1, cpu_avx=1, modelstr = "tigerlake";  /* Tiger Lake H */
           else if (model == 0x8e) cpu_64bit = 1, cpu_avx=1, modelstr = "kabylake";   /* Kaby Lake Y/U */
           else if (model == 0x8f) cpu_64bit = 1, cpu_avx=1, modelstr = "alderlake";  /* Sapphire Rapids */
-          else if (model == 0x96) cpu_64bit = 1,            modelstr = "goldmont";   /* Tremont */
+          else if (model == 0x96) cpu_64bit = 1,            modelstr = "tremont";    /* Tremont */
           else if (model == 0x97) cpu_64bit = 1, cpu_avx=1, modelstr = "alderlake";  /* Alder Lake S */
           else if (model == 0x9a) cpu_64bit = 1, cpu_avx=1, modelstr = "alderlake";  /* Alder Lake P */
-          else if (model == 0x9c) cpu_64bit = 1,            modelstr = "goldmont";   /* Tremont */
+          else if (model == 0x9c) cpu_64bit = 1,            modelstr = "tremont";    /* Tremont */
           else if (model == 0x9e) cpu_64bit = 1, cpu_avx=1, modelstr = "kabylake";   /* Kaby Lake desktop */
           else if (model == 0xa7) cpu_64bit = 1, cpu_avx=1, modelstr = "rocketlake"; /* Rocket Lake S */
           else                    cpu_64bit = 1,            modelstr = "nehalem";    /* default */
diff -r 27c014307e66 -r d45103d658ca config.sub
--- a/config.sub	Sun Mar 20 20:15:49 2022 +0100
+++ b/config.sub	Wed Mar 30 23:16:18 2022 +0200
@@ -102,7 +102,7 @@
   test_cpu=ia64 ;;
 pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
   test_cpu=i386 ;;
-athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | icelake* | rocketlake* | tigerlake* | alderlake*| knightslanding)
+athlon64 | atom | silvermont | goldmont | tremont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | icelake* | rocketlake* | tigerlake* | alderlake*| knightslanding)
   test_cpu=x86_64 ;;
 power[2-9] | power2sc)
   test_cpu=power ;;
diff -r 27c014307e66 -r d45103d658ca configure.ac
--- a/configure.ac	Sun Mar 20 20:15:49 2022 +0100
+++ b/configure.ac	Wed Mar 30 23:16:18 2022 +0200
@@ -333,6 +333,7 @@
 #undef HAVE_HOST_CPU_skylake
 #undef HAVE_HOST_CPU_silvermont
 #undef HAVE_HOST_CPU_goldmont
+#undef HAVE_HOST_CPU_tremont
 #undef HAVE_HOST_CPU_k8
 #undef HAVE_HOST_CPU_k10
 #undef HAVE_HOST_CPU_bulldozer
@@ -1997,6 +1998,12 @@
 	path="x86/goldmont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
 	path_64="x86_64/goldmont x86_64/silvermont x86_64/atom x86_64"
 	;;
+      tremont)			# out-of-order pipeline atom
+	gcc_cflags_cpu="-mtune=slm -mtune=atom -mtune=pentium3"
+	gcc_cflags_arch="-march=slm -march=atom -march=pentium3"
+	path="x86/goldmont x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
+	path_64="x86_64/tremont x86_64/goldmont x86_64/silvermont x86_64/atom x86_64"
+	;;
       nano)
 	gcc_cflags_cpu="-mtune=nano"
 	gcc_cflags_arch="-march=nano"
diff -r 27c014307e66 -r d45103d658ca longlong.h
--- a/longlong.h	Sun Mar 20 20:15:49 2022 +0100
+++ b/longlong.h	Wed Mar 30 23:16:18 2022 +0200
@@ -1156,6 +1156,17 @@
 #endif /* i960mx */
 #endif /* i960 */
 
+
+#if defined (__loongarch64) && W_TYPE_SIZE == 64
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    UDItype __u = (u), __v = (v);					\
+    (w0) = __u * __v;							\
+    (w1) = (unsigned __int128__) __u * __v >> 64;			\
+  } while (0)
+#endif
+
+
 #if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
      || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
      || defined (__mc5307__)) && W_TYPE_SIZE == 32
diff -r 27c014307e66 -r d45103d658ca mpn/arm64/applem1/aorsmul_1.asm
--- a/mpn/arm64/applem1/aorsmul_1.asm	Sun Mar 20 20:15:49 2022 +0100
+++ b/mpn/arm64/applem1/aorsmul_1.asm	Wed Mar 30 23:16:18 2022 +0200
@@ -47,7 +47,7 @@
 define(`up', x1)
 define(`n',  x2)
 define(`v0', x3)
-define(`cin',`x4')
+define(`cin',x4)
 
 define(`CY',x17)
 
diff -r 27c014307e66 -r d45103d658ca mpn/arm64/applem1/sqr_basecase.asm
--- a/mpn/arm64/applem1/sqr_basecase.asm	Sun Mar 20 20:15:49 2022 +0100
+++ b/mpn/arm64/applem1/sqr_basecase.asm	Wed Mar 30 23:16:18 2022 +0200
@@ -245,7 +245,7 @@
 	adc	CY, x4, xzr
 	adds	xzr, x7, x7
 	adc	v0, x10, x10
-L(cor):	mul	x8, x11, v0
+	mul	x8, x11, v0
 	umulh	x4, x11, v0
 	adds	x8, x12, x8
 	adc	x4, x4, xzr


More information about the gmp-commit mailing list