[Gmp-commit] /var/hg/gmp: (x86 umul_ppmm): Fix typo.
mercurial at gmplib.org
mercurial at gmplib.org
Tue Sep 22 09:30:22 UTC 2020
details: /var/hg/gmp/rev/e524e8c9a6fb
changeset: 18090:e524e8c9a6fb
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Sep 22 11:30:02 2020 +0200
description:
(x86 umul_ppmm): Fix typo.
diffstat:
longlong.h | 55 +++++++++++++++++++++++++------------------------------
1 files changed, 25 insertions(+), 30 deletions(-)
diffs (151 lines):
diff -r b851f9336a86 -r e524e8c9a6fb longlong.h
--- a/longlong.h Thu Sep 17 12:00:00 2020 +0200
+++ b/longlong.h Tue Sep 22 11:30:02 2020 +0200
@@ -436,7 +436,7 @@
&& W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (bl) && -(USItype)(bl) < 0x100) \
+ if (__builtin_constant_p (bl) && -(USItype)(bl) < (USItype)(bl)) \
__asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), \
@@ -447,8 +447,8 @@
: "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC); \
} while (0)
/* FIXME: Extend the immediate range for the low word by using both ADDS and
- SUBS, since they set carry in the same way. Note: We need separate
- definitions for thumb and non-thumb to to th absense of RSC under thumb. */
+ SUBS, since they set carry in the same way. We need separate definitions
+ for thumb and non-thumb since thumb lacks RSC. */
#if defined (__thumb__)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
@@ -461,10 +461,6 @@
__asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
- else if (__builtin_constant_p (bl)) \
- __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
else \
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
@@ -500,11 +496,7 @@
: "=r" (sh), "=&r" (sl) \
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
} \
- else if (__builtin_constant_p (bl)) \
- __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
- else /* only bh might be a constant */ \
+ else \
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
@@ -560,11 +552,9 @@
#endif /* __arm__ */
#if defined (__aarch64__) && W_TYPE_SIZE == 64
-/* FIXME: Extend the immediate range for the low word by using both
- ADDS and SUBS, since they set carry in the same way. */
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000) \
+ if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl)) \
__asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
: "=r" (sh), "=&r" (sl) \
: "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
@@ -577,7 +567,7 @@
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000) \
+ if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl)) \
__asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
: "=r,r" (sh), "=&r,&r" (sl) \
: "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
@@ -1056,7 +1046,7 @@
&& (HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell \
|| HAVE_HOST_CPU_skylake || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen)
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulx\t%3, %0, %1" \
+ __asm__ ("mulx\t%3, %q0, %q1" \
: "=r" (w0), "=r" (w1) \
: "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
#else
@@ -1494,36 +1484,37 @@
This might seem strange, but gcc folds away the dead code late. */
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) { \
+ if (__builtin_constant_p (bl) \
+ && (bl) > -0x8000 && (bl) <= 0x8000 && (bl) != 0) { \
if (__builtin_constant_p (ah) && (ah) == 0) \
__asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
__CLOBBER_CC); \
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
__asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
__CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("addic %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) \
- : "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ : "r" ((UDItype)(ah)), \
+ "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
__CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("addic %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) \
- : "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ : "r" ((UDItype)(ah)), \
+ "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
__CLOBBER_CC); \
else \
__asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
- : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
+ "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
__CLOBBER_CC); \
} else { \
if (__builtin_constant_p (ah) && (ah) == 0) \
@@ -2056,8 +2047,10 @@
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
UWtype __x; \
- __x = (al) + (bl); \
- (sh) = (ah) + (bh) + (__x < (al)); \
+ UWtype __al = (al); \
+ UWtype __bl = (bl); \
+ __x = __al + __bl; \
+ (sh) = (ah) + (bh) + (__x < __al); \
(sl) = __x; \
} while (0)
#endif
@@ -2066,8 +2059,10 @@
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
UWtype __x; \
- __x = (al) - (bl); \
- (sh) = (ah) - (bh) - ((al) < (bl)); \
+ UWtype __al = (al); \
+ UWtype __bl = (bl); \
+ __x = __al - __bl; \
+ (sh) = (ah) - (bh) - (__al < __bl); \
(sl) = __x; \
} while (0)
#endif
More information about the gmp-commit
mailing list