[Gmp-commit] /var/hg/gmp: (x86 umul_ppmm): Fix typo.

mercurial at gmplib.org mercurial at gmplib.org
Tue Sep 22 09:30:22 UTC 2020


details:   /var/hg/gmp/rev/e524e8c9a6fb
changeset: 18090:e524e8c9a6fb
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Sep 22 11:30:02 2020 +0200
description:
(x86 umul_ppmm): Fix typo.

diffstat:

 longlong.h |  55 +++++++++++++++++++++++++------------------------------
 1 files changed, 25 insertions(+), 30 deletions(-)

diffs (151 lines):

diff -r b851f9336a86 -r e524e8c9a6fb longlong.h
--- a/longlong.h	Thu Sep 17 12:00:00 2020 +0200
+++ b/longlong.h	Tue Sep 22 11:30:02 2020 +0200
@@ -436,7 +436,7 @@
     && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
-    if (__builtin_constant_p (bl) && -(USItype)(bl) < 0x100)		\
+    if (__builtin_constant_p (bl) && -(USItype)(bl) < (USItype)(bl))	\
       __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
 	   : "=r" (sh), "=&r" (sl)					\
 	       : "r" (ah), "rI" (bh),					\
@@ -447,8 +447,8 @@
 	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC);	\
   } while (0)
 /* FIXME: Extend the immediate range for the low word by using both ADDS and
-   SUBS, since they set carry in the same way.  Note: We need separate
-   definitions for thumb and non-thumb to to th absense of RSC under thumb.  */
+   SUBS, since they set carry in the same way.  We need separate definitions
+   for thumb and non-thumb since thumb lacks RSC.  */
 #if defined (__thumb__)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
@@ -461,10 +461,6 @@
       __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3"			\
 	       : "=r" (sh), "=&r" (sl)					\
 	       : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
-    else if (__builtin_constant_p (bl))					\
-      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
-	       : "=r" (sh), "=&r" (sl)					\
-	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
     else								\
       __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
 	       : "=r" (sh), "=&r" (sl)					\
@@ -500,11 +496,7 @@
 		   : "=r" (sh), "=&r" (sl)				\
 		   : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
       }									\
-    else if (__builtin_constant_p (bl))					\
-      __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
-	       : "=r" (sh), "=&r" (sl)					\
-	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
-    else /* only bh might be a constant */				\
+    else								\
       __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3"			\
 	       : "=r" (sh), "=&r" (sl)					\
 	       : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
@@ -560,11 +552,9 @@
 #endif /* __arm__ */
 
 #if defined (__aarch64__) && W_TYPE_SIZE == 64
-/* FIXME: Extend the immediate range for the low word by using both
-   ADDS and SUBS, since they set carry in the same way.  */
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
-    if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000)		\
+    if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl))	\
       __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"			\
 	       : "=r" (sh), "=&r" (sl)					\
 	       : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),		\
@@ -577,7 +567,7 @@
   } while (0)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
-    if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000)		\
+    if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl))	\
       __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"			\
 	       : "=r,r" (sh), "=&r,&r" (sl)				\
 	       : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),	\
@@ -1056,7 +1046,7 @@
    && (HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell \
        || HAVE_HOST_CPU_skylake || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen)
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulx\t%3, %0, %1"						\
+  __asm__ ("mulx\t%3, %q0, %q1"						\
 	   : "=r" (w0), "=r" (w1)					\
 	   : "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
 #else
@@ -1494,36 +1484,37 @@
    This might seem strange, but gcc folds away the dead code late.  */
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
-    if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {	\
+    if (__builtin_constant_p (bl)					\
+	&& (bl) > -0x8000 && (bl) <= 0x8000 && (bl) != 0) {		\
 	if (__builtin_constant_p (ah) && (ah) == 0)			\
 	  __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   :                       "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
 		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
 	  __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
 		   :                       "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
 		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (bh) && (bh) == 0)		\
 	  __asm__ ("addic %1,%3,%4\n\taddme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
-		   : "r"  ((UDItype)(ah)),				\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		   : "r" ((UDItype)(ah)),				\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
 		     __CLOBBER_CC);					\
 	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
 	  __asm__ ("addic %1,%3,%4\n\taddze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
-		   : "r"  ((UDItype)(ah)),				\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		   : "r" ((UDItype)(ah)),				\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
 		     __CLOBBER_CC);					\
 	else								\
 	  __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"			\
 		   : "=r" (sh), "=&r" (sl)				\
-		   : "r"  ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
-		     "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
+		   : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)),		\
+		     "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))	\
 		     __CLOBBER_CC);					\
     } else {								\
 	if (__builtin_constant_p (ah) && (ah) == 0)			\
@@ -2056,8 +2047,10 @@
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     UWtype __x;								\
-    __x = (al) + (bl);							\
-    (sh) = (ah) + (bh) + (__x < (al));					\
+    UWtype __al = (al);							\
+    UWtype __bl = (bl);							\
+    __x = __al + __bl;							\
+    (sh) = (ah) + (bh) + (__x < __al);					\
     (sl) = __x;								\
   } while (0)
 #endif
@@ -2066,8 +2059,10 @@
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     UWtype __x;								\
-    __x = (al) - (bl);							\
-    (sh) = (ah) - (bh) - ((al) < (bl));					\
+    UWtype __al = (al);							\
+    UWtype __bl = (bl);							\
+    __x = __al - __bl;							\
+    (sh) = (ah) - (bh) - (__al < __bl);					\
     (sl) = __x;								\
   } while (0)
 #endif



More information about the gmp-commit mailing list