[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Oct 11 13:18:59 CEST 2011
details: /var/hg/gmp/rev/f7c5e095aa09
changeset: 14309:f7c5e095aa09
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Oct 11 12:10:38 2011 +0200
description:
Back-port to z900 at the expense of 0.25 c/l.
details: /var/hg/gmp/rev/9dc642ebdbcb
changeset: 14310:9dc642ebdbcb
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Oct 11 13:14:48 2011 +0200
description:
(s390x): Add __CLOBBER_CC for relevant asm patterns.
details: /var/hg/gmp/rev/b6d739466cf1
changeset: 14311:b6d739466cf1
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Oct 11 13:15:32 2011 +0200
description:
(s390x): Add __CLOBBER_CC for relevant asm patterns.
diffstat:
longlong.h | 16 +++++++++-------
mpn/generic/mod_1_1.c | 2 +-
mpn/s390_64/copyd.asm | 44 +++++++++++++++++++++++---------------------
3 files changed, 33 insertions(+), 29 deletions(-)
diffs (181 lines):
diff -r c0e93ad9485e -r b6d739466cf1 longlong.h
--- a/longlong.h Tue Oct 11 00:09:49 2011 +0200
+++ b/longlong.h Tue Oct 11 13:15:32 2011 +0200
@@ -660,22 +660,22 @@
if (__builtin_constant_p (bl)) \
__asm__ ("alfi\t%1,%o5\n\talcr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
- : "0" (ah), "r" (bh), "%1" (al), "n" (bl)); \
+ : "0" (ah), "r" (bh), "%1" (al), "n" (bl) __CLOBBER_CC);\
else \
__asm__ ("alr\t%1,%5\n\talcr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
- : "0" (ah), "r" (bh), "%1" (al), "r" (bl)); \
+ : "0" (ah), "r" (bh), "%1" (al), "r" (bl)__CLOBBER_CC); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bl)) \
__asm__ ("slfi\t%1,%o5\n\tslbr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
- : "0" (ah), "r" (bh), "1" (al), "n" (bl)); \
+ : "0" (ah), "r" (bh), "1" (al), "n" (bl) __CLOBBER_CC); \
else \
__asm__ ("slr\t%1,%5\n\tslbr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
- : "0" (ah), "r" (bh), "1" (al), "r" (bl)); \
+ : "0" (ah), "r" (bh), "1" (al), "r" (bl) __CLOBBER_CC); \
} while (0)
#define umul_ppmm(xh, xl, m0, m1) \
do { \
@@ -733,14 +733,14 @@
__asm__ ("algr\t%1,%5\n\talcgr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
: "0" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "%1" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "%1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
__asm__ ("slgr\t%1,%5\n\tslbgr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
: "0" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "1" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC); \
} while (0)
#define umul_ppmm(xh, xl, m0, m1) \
do { \
@@ -769,7 +769,9 @@
union {unsigned int __attribute__ ((mode(TI))) __ll; \
struct {UDItype __h, __l;} __i; \
} __clr_cnt; \
- __asm__ ("flogr\t%0, %1" : "=r" (__clr_cnt.__ll) : "r" (x)); \
+ __asm__ ("flogr\t%0,%1" \
+ : "=r" (__clr_cnt.__ll) \
+ : "r" (x) __CLOBBER_CC); \
(cnt) = __clr_cnt.__i.__h; \
} while (0)
#endif
diff -r c0e93ad9485e -r b6d739466cf1 mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c Tue Oct 11 00:09:49 2011 +0200
+++ b/mpn/generic/mod_1_1.c Tue Oct 11 13:15:32 2011 +0200
@@ -104,7 +104,7 @@
"lcgr %0, %0" \
: "=r" (m), "=r" (s1), "=&r" (s0) \
: "1" ((UDItype)(a1)), "r" ((UDItype)(b1)), \
- "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)))
+ "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
#endif
#if defined (__arm__) && W_TYPE_SIZE == 32
diff -r c0e93ad9485e -r b6d739466cf1 mpn/s390_64/copyd.asm
--- a/mpn/s390_64/copyd.asm Tue Oct 11 00:09:49 2011 +0200
+++ b/mpn/s390_64/copyd.asm Tue Oct 11 13:15:32 2011 +0200
@@ -21,7 +21,7 @@
include(`../config.m4')
C cycles/limb
-C z990 1.5
+C z990 1.75 See comment in loop about how to reach 1.5 c/l
C FIXME:
C * Avoid saving/restoring callee-saves registers for n < 3. This could be
@@ -41,11 +41,12 @@
stmg %r6, %r10, 48(%r15)
sllg %r1, n, 3
- la %r10, 7(n)
+ la %r10, 8(n)
+ aghi %r1, -64
srlg %r10, %r10, 3
- lay rp, -64(%r1,rp_param)
- lay up, -64(%r1,up_param)
+ la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later
+ la up, 0(%r1,up_param) C FIXME use lay on z990 and later
lghi %r7, 7
ngr %r7, n C n mod 8
@@ -55,19 +56,20 @@
je L(b1)
jh L(b2)
-L(b0): j L(top)
+L(b0): brctg %r10, L(top)
+ j L(end)
L(b1): lg %r0, 56(up)
- lay up, -8(up)
+ aghi up, -8
stg %r0, 56(rp)
- lay rp, -8(rp)
+ aghi rp, -8
brctg %r10, L(top)
j L(end)
L(b2): lmg %r0, %r1, 48(up)
- lay up, -16(up)
+ aghi up, -16
stmg %r0, %r1, 48(rp)
- lay rp, -16(rp)
+ aghi rp, -16
brctg %r10, L(top)
j L(end)
@@ -80,44 +82,44 @@
jh L(b7)
L(b5): lmg %r0, %r4, 24(up)
- lay up, -40(up)
+ aghi up, -40
stmg %r0, %r4, 24(rp)
- lay rp, -40(rp)
+ aghi rp, -40
brctg %r10, L(top)
j L(end)
L(b3): lmg %r0, %r2, 40(up)
- lay up, -24(up)
+ aghi up, -24
stmg %r0, %r2, 40(rp)
- lay rp, -24(rp)
+ aghi rp, -24
brctg %r10, L(top)
j L(end)
L(b4): lmg %r0, %r3, 32(up)
- lay up, -32(up)
+ aghi up, -32
stmg %r0, %r3, 32(rp)
- lay rp, -32(rp)
+ aghi rp, -32
brctg %r10, L(top)
j L(end)
L(b6): lmg %r0, %r5, 16(up)
- lay up, -48(up)
+ aghi up, -48
stmg %r0, %r5, 16(rp)
- lay rp, -48(rp)
+ aghi rp, -48
brctg %r10, L(top)
j L(end)
L(b7): lmg %r0, %r6, 8(up)
- lay up, -56(up)
+ aghi up, -56
stmg %r0, %r6, 8(rp)
- lay rp, -56(rp)
+ aghi rp, -56
brctg %r10, L(top)
j L(end)
L(top): lmg %r0, %r7, 0(up)
- lay up, -64(up)
+ aghi up, -64 C FIXME using lay here saves 0.25 c/l
stmg %r0, %r7, 0(rp)
- lay rp, -64(rp)
+ aghi rp, -64
brctg %r10, L(top)
L(end): lmg %r6, %r10, 48(%r15)
More information about the gmp-commit
mailing list