[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Oct 12 22:31:22 CEST 2011
details: /var/hg/gmp/rev/0ece91594745
changeset: 14315:0ece91594745
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 12 22:29:25 2011 +0200
description:
(s390): Improve umul_ppmm and udiv_qrnnd support.
details: /var/hg/gmp/rev/582c63908d36
changeset: 14316:582c63908d36
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 12 22:29:30 2011 +0200
description:
*** empty log message ***
details: /var/hg/gmp/rev/dccaab90c2b0
changeset: 14317:dccaab90c2b0
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 12 22:30:00 2011 +0200
description:
Reduce register usage.
details: /var/hg/gmp/rev/69bb61b2a451
changeset: 14318:69bb61b2a451
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 12 22:30:37 2011 +0200
description:
Reduce register usage.
details: /var/hg/gmp/rev/22f8970a0aad
changeset: 14319:22f8970a0aad
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 12 22:31:18 2011 +0200
description:
*** empty log message ***
diffstat:
ChangeLog | 12 ++++++++++
longlong.h | 57 ++++++++++++++++++++++++++++++++++++++++++++-----
mpn/s390_64/lshift.asm | 36 +++++++++++++++---------------
mpn/s390_64/rshift.asm | 36 +++++++++++++++---------------
4 files changed, 99 insertions(+), 42 deletions(-)
diffs (289 lines):
diff -r 295da88bea21 -r 22f8970a0aad ChangeLog
--- a/ChangeLog Tue Oct 11 15:28:37 2011 +0200
+++ b/ChangeLog Wed Oct 12 22:31:18 2011 +0200
@@ -1,3 +1,15 @@
+2011-10-12 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/s390_64/lshift.asm: Reduce register usage.
+ * mpn/s390_64/rshift.asm: Likewise.
+
+ * longlong.h (s390 umul_ppmm): With new-enough gcc, avoid asm.
+
+ From Andreas Krebbel:
+ * longlong.h (s390 umul_ppmm): Support 32-bit limbs with gcc using
+ 64-bit registers.
+ (s390 udiv_qrnnd): Likewise.
+
2011-10-11 Torbjorn Granlund <tege at gmplib.org>
* configure.in (s390x): Pass -mzarch to gcc in 32-bit mode.
diff -r 295da88bea21 -r 22f8970a0aad longlong.h
--- a/longlong.h Tue Oct 11 15:28:37 2011 +0200
+++ b/longlong.h Wed Oct 12 22:31:18 2011 +0200
@@ -657,26 +657,39 @@
#if defined (__zarch__)
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (bl)) \
+/* if (__builtin_constant_p (bl)) \
__asm__ ("alfi\t%1,%o5\n\talcr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
: "0" (ah), "r" (bh), "%1" (al), "n" (bl) __CLOBBER_CC);\
else \
- __asm__ ("alr\t%1,%5\n\talcr\t%0,%3" \
+*/ __asm__ ("alr\t%1,%5\n\talcr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
: "0" (ah), "r" (bh), "%1" (al), "r" (bl)__CLOBBER_CC); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (bl)) \
+/* if (__builtin_constant_p (bl)) \
__asm__ ("slfi\t%1,%o5\n\tslbr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
: "0" (ah), "r" (bh), "1" (al), "n" (bl) __CLOBBER_CC); \
else \
- __asm__ ("slr\t%1,%5\n\tslbr\t%0,%3" \
+*/ __asm__ ("slr\t%1,%5\n\tslbr\t%0,%3" \
: "=r" (sh), "=&r" (sl) \
: "0" (ah), "r" (bh), "1" (al), "r" (bl) __CLOBBER_CC); \
} while (0)
+#if __GMP_GNUC_PREREQ (4,5)
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __x.__ll = (UDItype) m0 * (UDItype) m1; \
+ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
+ } while (0)
+#else
+#if 0
+/* FIXME: this fails if gcc knows about the 64-bit registers. Use only
+ with a new enough processor pretending we have 32-bit registers. */
#define umul_ppmm(xh, xl, m0, m1) \
do { \
union {UDItype __ll; \
@@ -687,6 +700,25 @@
: "%0" (m0), "r" (m1)); \
(xh) = __x.__i.__h; (xl) = __x.__i.__l; \
} while (0)
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ /* When we have 64-bit regs and gcc is aware of that, we cannot simply use
+ DImode for the product, since that would be allocated to a single 64-bit
+ register, whereas mlr uses the low 32-bits of an even-odd register pair.
+ */ \
+ register USItype __r0 __asm__ ("0"); \
+ register USItype __r1 __asm__ ("1") = (m0); \
+ __asm__ ("mlr\t%0,%3" \
+ : "=r" (__r0), "=r" (__r1) \
+ : "r" (__r1), "r" (m1)); \
+ (xh) = __r0; (xl) = __r1; \
+ } while (0)
+#endif /* if 0 */
+#endif
+#if 0
+/* FIXME: this fails if gcc knows about the 64-bit registers. Use only
+ with a new enough processor pretending we have 32-bit registers. */
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
union {UDItype __ll; \
@@ -699,7 +731,18 @@
(q) = __x.__i.__l; (r) = __x.__i.__h; \
} while (0)
#else
-#define smul_ppmm(xh, xl, m0, m1) \
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ register USItype __r0 __asm__ ("0") = (n1); \
+ register USItype __r1 __asm__ ("1") = (n0); \
+ __asm__ ("dlr\t%0,%4" \
+ : "=r" (__r0), "=r" (__r1) \
+ : "r" (__r0), "r" (__r1), "r" (d)); \
+ (q) = __r1; (r) = __r0; \
+ } while (0)
+#endif /* if 0 */
+/* FIXME: this fails if gcc knows about the 64-bit registers. */
+#define smul_ppmm(xh, xl, m0, m1) \
do { \
union {DItype __ll; \
struct {USItype __h, __l;} __i; \
@@ -709,7 +752,8 @@
: "%0" (m0), "r" (m1)); \
(xh) = __x.__i.__h; (xl) = __x.__i.__l; \
} while (0)
-#define sdiv_qrnnd(q, r, n1, n0, d) \
+/* FIXME: this fails if gcc knows about the 64-bit registers. */
+#define sdiv_qrnnd(q, r, n1, n0, d) \
do { \
union {DItype __ll; \
struct {USItype __h, __l;} __i; \
@@ -1757,6 +1801,7 @@
#endif /* NO_ASM */
+/* FIXME: "sidi" here is highly doubtful, should sometimes be "diti". */
#if !defined (umul_ppmm) && defined (__umulsidi3)
#define umul_ppmm(ph, pl, m0, m1) \
{ \
diff -r 295da88bea21 -r 22f8970a0aad mpn/s390_64/lshift.asm
--- a/mpn/s390_64/lshift.asm Tue Oct 11 15:28:37 2011 +0200
+++ b/mpn/s390_64/lshift.asm Wed Oct 12 22:31:18 2011 +0200
@@ -44,14 +44,14 @@
srlg %r2, %r1, 0(%r4)
br %r14
-L(gt1): stmg %r6, %r10, 48(%r15)
+L(gt1): stmg %r6, %r9, 48(%r15)
sllg %r1, n, 3
aghi up, -8
lg %r9, 0(%r1,up)
- srlg %r10, n, 2 C loop count
+ srlg %r8, n, 2 C loop count
lcgr tnc, cnt
lghi %r7, 3
@@ -59,7 +59,7 @@
ngr %r7, n C n mod 4
je L(b0)
- aghi %r10, 1
+ aghi %r8, 1
cghi %r7, 2
jl L(b1)
je L(b2)
@@ -69,36 +69,36 @@
L(b0): aghi %r1, -24
j L(m0)
L(b1): aghi %r1, -32
- aghi %r10, -1
+ aghi %r8, -1
j L(top)
L(b2): aghi %r1, -8
j L(m2)
L(top): lg %r7, 24(%r1,up)
- srlg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ srlg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
sllg %r0, %r7, 0(cnt)
- stg %r8, 24(%r1,rp)
+ stg %r4, 24(%r1,rp)
L(m0): lg %r7, 16(%r1,up)
- srlg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ srlg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
sllg %r0, %r7, 0(cnt)
- stg %r8, 16(%r1,rp)
+ stg %r4, 16(%r1,rp)
L(m3): lg %r7, 8(%r1,up)
- srlg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ srlg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
sllg %r0, %r7, 0(cnt)
- stg %r8, 8(%r1,rp)
+ stg %r4, 8(%r1,rp)
L(m2): lg %r7, 0(%r1,up)
- srlg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ srlg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
sllg %r0, %r7, 0(cnt)
- stg %r8, 0(%r1,rp)
+ stg %r4, 0(%r1,rp)
aghi %r1, -32
- brctg %r10, L(top)
+ brctg %r8, L(top)
L(end): stg %r0, 24(%r1,rp)
srlg %r2, %r9, 0(tnc)
- lmg %r6, %r10, 48(%r15)
+ lmg %r6, %r9, 48(%r15)
br %r14
EPILOGUE()
diff -r 295da88bea21 -r 22f8970a0aad mpn/s390_64/rshift.asm
--- a/mpn/s390_64/rshift.asm Tue Oct 11 15:28:37 2011 +0200
+++ b/mpn/s390_64/rshift.asm Wed Oct 12 22:31:18 2011 +0200
@@ -35,11 +35,11 @@
ASM_START()
PROLOGUE(mpn_rshift)
- stmg %r6, %r10, 48(%r15)
+ stmg %r6, %r9, 48(%r15)
lg %r9, 0(up)
- srlg %r10, n, 2 C loop count
+ srlg %r8, n, 2 C loop count
lcgr tnc, cnt
lghi %r7, 3
@@ -47,7 +47,7 @@
ngr %r7, n C n mod 4
je L(b0)
- aghi %r10, 1
+ aghi %r8, 1
cghi %r7, 2
jl L(b1)
je L(b2)
@@ -57,36 +57,36 @@
L(b0): lghi %r1, -8
j L(m0)
L(b1): lghi %r1, 0
- brctg %r10, L(top)
+ brctg %r8, L(top)
j L(end)
L(b2): lghi %r1, -24
j L(m2)
L(top): lg %r7, 8(%r1,up)
- sllg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ sllg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
srlg %r0, %r7, 0(cnt)
- stg %r8, 0(%r1,rp)
+ stg %r4, 0(%r1,rp)
L(m0): lg %r7, 16(%r1,up)
- sllg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ sllg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
srlg %r0, %r7, 0(cnt)
- stg %r8, 8(%r1,rp)
+ stg %r4, 8(%r1,rp)
L(m3): lg %r7, 24(%r1,up)
- sllg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ sllg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
srlg %r0, %r7, 0(cnt)
- stg %r8, 16(%r1,rp)
+ stg %r4, 16(%r1,rp)
L(m2): lg %r7, 32(%r1,up)
- sllg %r8, %r7, 0(tnc)
- ogr %r8, %r0
+ sllg %r4, %r7, 0(tnc)
+ ogr %r4, %r0
srlg %r0, %r7, 0(cnt)
- stg %r8, 24(%r1,rp)
+ stg %r4, 24(%r1,rp)
aghi %r1, 32
- brctg %r10, L(top)
+ brctg %r8, L(top)
L(end): stg %r0, 0(%r1,rp)
sllg %r2, %r9, 0(tnc)
- lmg %r6, %r10, 48(%r15)
+ lmg %r6, %r9, 48(%r15)
br %r14
EPILOGUE()
More information about the gmp-commit
mailing list