bug in longlong.h for aarch64 sub_ddmmss
Torbjörn Granlund
tg at gmplib.org
Tue Jun 16 16:37:43 UTC 2020
Vincent Lefevre <vincent at vinc17.net> writes:
Note: in the tests, it is important to test the macro on constants
in order to test the "if" case.
Indeed, and one macro expansion per trivial function or else gcc might
get the idea cse constants.
This bug comes untimely for me. I consider a major purge along the
lines of the patch below.
If you Vincent have the time to go through these cases to see what can
safely remain, that would be useful.
*** /tmp/extdiff.sqiwiR/gmp-main.89ed9376523e/longlong.h Thu Jun 11 15:53:20 2020
--- /home/tege/prec/gmp-main/longlong.h Tue Jun 16 15:59:10 2020
***************
*** 436,453 ****
&& W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! do { \
! if (__builtin_constant_p (bl) && -(USItype)(bl) < 0x100) \
! __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
! : "r" (ah), "rI" (bh), \
! "%r" (al), "rI" (-(USItype)(bl)) __CLOBBER_CC); \
! else \
! __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
! : "=r" (sh), "=&r" (sl) \
! : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC); \
! } while (0)
/* FIXME: Extend the immediate range for the low word by using both ADDS and
! SUBS, since they set carry in the same way. Note: We need separate
! definitions for thumb and non-thumb to to th absense of RSC under thumb. */
#if defined (__thumb__)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
--- 436,445 ----
&& W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
! : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
/* FIXME: Extend the immediate range for the low word by using both ADDS and
! SUBS, since they set carry in the same way. We need separate definitions
! for thumb and non-thumb since thumb lacks RSC. */
#if defined (__thumb__)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
***************
*** 564,592 ****
ADDS and SUBS, since they set carry in the same way. */
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! do { \
! if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000) \
! __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
! : "=r" (sh), "=&r" (sl) \
! : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
! "%r" ((UDItype)(al)), "rI" (-(UDItype)(bl)) __CLOBBER_CC);\
! else \
! __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
! : "=r" (sh), "=&r" (sl) \
! : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
! "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC);\
! } while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
! do { \
! if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000) \
! __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
! : "=r,r" (sh), "=&r,&r" (sl) \
! : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
! "r,Z" ((UDItype)(al)), "rI,r" (-(UDItype)(bl)) __CLOBBER_CC);\
! else \
! __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
! : "=r,r" (sh), "=&r,&r" (sl) \
! : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
! "r,Z" ((UDItype)(al)), "rI,r" ((UDItype)(bl)) __CLOBBER_CC);\
! } while(0);
#if __GMP_GNUC_PREREQ (4,9)
#define umul_ppmm(w1, w0, u, v) \
--- 556,568 ----
ADDS and SUBS, since they set carry in the same way. */
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
! __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
! : "=r" (sh), "=&r" (sl) \
! : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
! "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
! __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
! : "=r,r" (sh), "=&r,&r" (sl) \
! : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
! "r,Z" ((UDItype)(al)), "rI,r" ((UDItype)(bl)) __CLOBBER_CC)
#if __GMP_GNUC_PREREQ (4,9)
#define umul_ppmm(w1, w0, u, v) \
***************
*** 1491,1561 ****
__CLOBBER_CC); \
} while (0)
- /* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
- This might seem strange, but gcc folds away the dead code late. */
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
! if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) { \
! if (__builtin_constant_p (ah) && (ah) == 0) \
! __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
! __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (bh) && (bh) == 0) \
! __asm__ ("addic %1,%3,%4\n\taddme %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), \
! "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
! __asm__ ("addic %1,%3,%4\n\taddze %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), \
! "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
! __CLOBBER_CC); \
! else \
! __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
! __CLOBBER_CC); \
! } else { \
! if (__builtin_constant_p (ah) && (ah) == 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (bh) && (bh) == 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else \
! __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! } \
} while (0)
#endif /* ! _LONG_LONG_LIMB */
--- 1467,1502 ----
__CLOBBER_CC); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
! if (__builtin_constant_p (ah) && (ah) == 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (bh) && (bh) == 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
! __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
! else \
! __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
! : "=r" (sh), "=&r" (sl) \
! : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
! "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
! __CLOBBER_CC); \
} while (0)
#endif /* ! _LONG_LONG_LIMB */
--
Torbjörn
Please encrypt, key id 0xC8601622
More information about the gmp-bugs
mailing list