[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Sep 13 12:32:56 UTC 2019
details: /var/hg/gmp/rev/13bb06992f71
changeset: 17891:13bb06992f71
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Sep 13 14:32:04 2019 +0200
description:
(HAVE_HOST_CPU_1): Add many x86_64 CPU types.
details: /var/hg/gmp/rev/edfbbc5d4e48
changeset: 17892:edfbbc5d4e48
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Sep 13 14:32:41 2019 +0200
description:
(umul_ppmm): Fix criterion for when to use mulx.
(count_leading_zeros): Use lzcnt for appropriate CPUs.
(count_trailing_zeros): Use tzcnt for appropriate CPUs.
diffstat:
configure.ac | 22 +++++++++++++++++++++-
longlong.h | 40 ++++++++++++++++++++++++++++++++--------
2 files changed, 53 insertions(+), 9 deletions(-)
diffs (111 lines):
diff -r f4e0b64cf6c6 -r edfbbc5d4e48 configure.ac
--- a/configure.ac Fri Sep 13 11:39:17 2019 +0200
+++ b/configure.ac Fri Sep 13 14:32:41 2019 +0200
@@ -3,7 +3,7 @@
define(GMP_COPYRIGHT,[[
-Copyright 1996-2018 Free Software Foundation, Inc.
+Copyright 1996-2019 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -322,6 +322,26 @@
#undef HAVE_HOST_CPU_pentiumpro
#undef HAVE_HOST_CPU_pentium2
#undef HAVE_HOST_CPU_pentium3
+#undef HAVE_HOST_CPU_pentium4
+#undef HAVE_HOST_CPU_core2
+#undef HAVE_HOST_CPU_nehalem
+#undef HAVE_HOST_CPU_westmere
+#undef HAVE_HOST_CPU_sandybridge
+#undef HAVE_HOST_CPU_ivybridge
+#undef HAVE_HOST_CPU_haswell
+#undef HAVE_HOST_CPU_broadwell
+#undef HAVE_HOST_CPU_skylake
+#undef HAVE_HOST_CPU_silvermont
+#undef HAVE_HOST_CPU_goldmont
+#undef HAVE_HOST_CPU_k8
+#undef HAVE_HOST_CPU_k10
+#undef HAVE_HOST_CPU_bulldozer
+#undef HAVE_HOST_CPU_piledriver
+#undef HAVE_HOST_CPU_steamroller
+#undef HAVE_HOST_CPU_excavator
+#undef HAVE_HOST_CPU_zen
+#undef HAVE_HOST_CPU_bobcat
+#undef HAVE_HOST_CPU_jaguar
#undef HAVE_HOST_CPU_s390_z900
#undef HAVE_HOST_CPU_s390_z990
#undef HAVE_HOST_CPU_s390_z9
diff -r f4e0b64cf6c6 -r edfbbc5d4e48 longlong.h
--- a/longlong.h Fri Sep 13 11:39:17 2019 +0200
+++ b/longlong.h Fri Sep 13 14:32:41 2019 +0200
@@ -1058,14 +1058,15 @@
: "=r" (sh), "=&r" (sl) \
: "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \
"1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
-#if defined (HAVE_MULX)
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+ || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulx %3, %0, %1" \
+ __asm__ ("mulx\t%3, %0, %1" \
: "=r" (w0), "=r" (w1) \
: "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
#else
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulq %3" \
+ __asm__ ("mulq\t%3" \
: "=a" (w0), "=d" (w1) \
: "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
#endif
@@ -1073,21 +1074,44 @@
__asm__ ("divq %4" /* stringification in K&R C */ \
: "=a" (q), "=d" (r) \
: "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
-/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
+
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+ || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2 \
+ || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen \
+ || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar
+#define count_leading_zeros(count, x) \
+ do { \
+ /* This is lzcnt, spelled for older assemblers. Destination and */ \
+ /* source must be a 64-bit registers, hence cast and %q. */ \
+ __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 64
+#else
#define count_leading_zeros(count, x) \
do { \
UDItype __cbtmp; \
ASSERT ((x) != 0); \
- __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
(count) = __cbtmp ^ 63; \
} while (0)
-/* bsfq destination must be a 64-bit register, "%q0" forces this in case
- count is only an int. */
+#endif
+
+#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \
+ || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar
+#define count_trailing_zeros(count, x) \
+ do { \
+ /* This is tzcnt, spelled for older assemblers. Destination and */ \
+ /* source must be a 64-bit registers, hence cast and %q. */ \
+ __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_TRAILING_ZEROS_0 64
+#else
#define count_trailing_zeros(count, x) \
do { \
ASSERT ((x) != 0); \
- __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
} while (0)
+#endif
#endif /* __amd64__ */
#if defined (__i860__) && W_TYPE_SIZE == 32
More information about the gmp-commit
mailing list