[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Sep 13 12:32:56 UTC 2019


details:   /var/hg/gmp/rev/13bb06992f71
changeset: 17891:13bb06992f71
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Sep 13 14:32:04 2019 +0200
description:
(HAVE_HOST_CPU_1): Add many x86_64 CPU types.

details:   /var/hg/gmp/rev/edfbbc5d4e48
changeset: 17892:edfbbc5d4e48
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Sep 13 14:32:41 2019 +0200
description:
(umul_ppmm): Fix criterion for when to use mulx.
	(count_leading_zeros): Use lzcnt for appropriate CPUs.
	(count_trailing_zeros): Use tzcnt for appropriate CPUs.

diffstat:

 configure.ac |  22 +++++++++++++++++++++-
 longlong.h   |  40 ++++++++++++++++++++++++++++++++--------
 2 files changed, 53 insertions(+), 9 deletions(-)

diffs (111 lines):

diff -r f4e0b64cf6c6 -r edfbbc5d4e48 configure.ac
--- a/configure.ac	Fri Sep 13 11:39:17 2019 +0200
+++ b/configure.ac	Fri Sep 13 14:32:41 2019 +0200
@@ -3,7 +3,7 @@
 
 define(GMP_COPYRIGHT,[[
 
-Copyright 1996-2018 Free Software Foundation, Inc.
+Copyright 1996-2019 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -322,6 +322,26 @@
 #undef HAVE_HOST_CPU_pentiumpro
 #undef HAVE_HOST_CPU_pentium2
 #undef HAVE_HOST_CPU_pentium3
+#undef HAVE_HOST_CPU_pentium4
+#undef HAVE_HOST_CPU_core2
+#undef HAVE_HOST_CPU_nehalem
+#undef HAVE_HOST_CPU_westmere
+#undef HAVE_HOST_CPU_sandybridge
+#undef HAVE_HOST_CPU_ivybridge
+#undef HAVE_HOST_CPU_haswell
+#undef HAVE_HOST_CPU_broadwell
+#undef HAVE_HOST_CPU_skylake
+#undef HAVE_HOST_CPU_silvermont
+#undef HAVE_HOST_CPU_goldmont
+#undef HAVE_HOST_CPU_k8
+#undef HAVE_HOST_CPU_k10
+#undef HAVE_HOST_CPU_bulldozer
+#undef HAVE_HOST_CPU_piledriver
+#undef HAVE_HOST_CPU_steamroller
+#undef HAVE_HOST_CPU_excavator
+#undef HAVE_HOST_CPU_zen
+#undef HAVE_HOST_CPU_bobcat
+#undef HAVE_HOST_CPU_jaguar
 #undef HAVE_HOST_CPU_s390_z900
 #undef HAVE_HOST_CPU_s390_z990
 #undef HAVE_HOST_CPU_s390_z9
diff -r f4e0b64cf6c6 -r edfbbc5d4e48 longlong.h
--- a/longlong.h	Fri Sep 13 11:39:17 2019 +0200
+++ b/longlong.h	Fri Sep 13 14:32:41 2019 +0200
@@ -1058,14 +1058,15 @@
 	   : "=r" (sh), "=&r" (sl)					\
 	   : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)),		\
 	     "1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
-#if defined (HAVE_MULX)
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+  || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulx	%3, %0, %1"					\
+  __asm__ ("mulx\t%3, %0, %1"						\
 	   : "=r" (w0), "=r" (w1)					\
 	   : "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
 #else
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulq	%3"						\
+  __asm__ ("mulq\t%3"							\
 	   : "=a" (w0), "=d" (w1)					\
 	   : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
 #endif
@@ -1073,21 +1074,44 @@
   __asm__ ("divq %4"		     /* stringification in K&R C */	\
 	   : "=a" (q), "=d" (r)						\
 	   : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
-/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
+
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+  || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2	\
+  || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen	\
+  || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar
+#define count_leading_zeros(count, x)					\
+  do {									\
+    /* This is lzcnt, spelled for older assemblers.  Destination and */	\
+    /* source must be a 64-bit registers, hence cast and %q.         */	\
+    __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 64
+#else
 #define count_leading_zeros(count, x)					\
   do {									\
     UDItype __cbtmp;							\
     ASSERT ((x) != 0);							\
-    __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x)));	\
+    __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x)));	\
     (count) = __cbtmp ^ 63;						\
   } while (0)
-/* bsfq destination must be a 64-bit register, "%q0" forces this in case
-   count is only an int. */
+#endif
+
+#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \
+  || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    /* This is tzcnt, spelled for older assemblers.  Destination and */	\
+    /* source must be a 64-bit registers, hence cast and %q.         */	\
+    __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+  } while (0)
+#define COUNT_TRAILING_ZEROS_0 64
+#else
 #define count_trailing_zeros(count, x)					\
   do {									\
     ASSERT ((x) != 0);							\
-    __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
+    __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x)));	\
   } while (0)
+#endif
 #endif /* __amd64__ */
 
 #if defined (__i860__) && W_TYPE_SIZE == 32


More information about the gmp-commit mailing list