[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Oct 19 23:26:32 CEST 2011
details: /var/hg/gmp/rev/1156dcf1087d
changeset: 14357:1156dcf1087d
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 19 09:04:47 2011 +0200
description:
Rewrite, similar to s390_64 code.
details: /var/hg/gmp/rev/375960c6fd7c
changeset: 14358:375960c6fd7c
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 19 09:05:07 2011 +0200
description:
Add cycle table.
details: /var/hg/gmp/rev/8aec7a5a2be7
changeset: 14359:8aec7a5a2be7
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Oct 19 09:06:58 2011 +0200
description:
Add cycle table.
diffstat:
mpn/s390_32/copyd.asm | 5 ++
mpn/s390_32/copyi.asm | 4 +
mpn/s390_32/esame/add_n.asm | 81 +++++++++++++++++++++++++++-----
mpn/s390_32/esame/addmul_1.asm | 7 ++
mpn/s390_32/esame/aorslsh1_n.asm | 6 ++-
mpn/s390_32/esame/bdiv_dbm1c.asm | 6 ++
mpn/s390_32/esame/mul_1.asm | 7 ++
mpn/s390_32/esame/sqr_diag_addlsh1.asm | 6 ++-
mpn/s390_32/esame/sub_n.asm | 83 +++++++++++++++++++++++++++++----
mpn/s390_32/esame/submul_1.asm | 7 ++
10 files changed, 185 insertions(+), 27 deletions(-)
diffs (truncated from 344 to 300 lines):
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/copyd.asm
--- a/mpn/s390_32/copyd.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/copyd.asm Wed Oct 19 09:06:58 2011 +0200
@@ -21,7 +21,12 @@
include(`../config.m4')
C cycles/limb
+C cycles/limb
+C z900 1.65
C z990 1.125
+C z9 ?
+C z10 ?
+C z196 ?
C FIXME:
C * Avoid saving/restoring callee-saves registers for n < 3. This could be
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/copyi.asm
--- a/mpn/s390_32/copyi.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/copyi.asm Wed Oct 19 09:06:58 2011 +0200
@@ -21,7 +21,11 @@
include(`../config.m4')
C cycles/limb
+C z900 0.75
C z990 0.375
+C z9 ?
+C z10 ?
+C z196 ?
C NOTE
C * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/add_n.asm
--- a/mpn/s390_32/esame/add_n.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/add_n.asm Wed Oct 19 09:06:58 2011 +0200
@@ -1,4 +1,4 @@
-dnl S/390-32 mpn_add_n for systems with unsigned add/subtract instructions.
+dnl S/390-32 mpn_add_n
dnl Copyright 2011 Free Software Foundation, Inc.
@@ -17,8 +17,18 @@
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+include(`../config.m4')
-include(`../config.m4')
+C cycles/limb
+C z900 6.5
+C z990 3.5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Use r0 and save/restore one less register
C INPUT PARAMETERS
define(`rp', `%r2')
@@ -28,20 +38,65 @@
ASM_START()
PROLOGUE(mpn_add_n)
- st %r12, 48(%r15)
- lhi %r12, 0 C zero index register
- ahi %r12, 0 C clear C flag
+ stm %r6, %r12, 24(%r15)
-L(top): l %r0, 0(%r12,up)
- l %r1, 0(%r12,vp)
- alcr %r0, %r1
- st %r0, 0(%r12,rp)
- la %r12, 4(%r12)
- brct n, L(top)
+ la %r1, 3(n)
+ lhi %r7, 3
+ srl %r1, 2
+ nr %r7, n C n mod 4
+ je L(top) C The C flag is clear
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
- lhi %r2, 0
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ lm %r9, %r11, 0(vp)
+ la vp, 12(vp)
+ alr %r5, %r9
+ alcr %r6, %r10
+ alcr %r7, %r11
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ l %r9, 0(vp)
+ la vp, 4(vp)
+ alr %r5, %r9
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ lm %r9, %r10, 0(vp)
+ la vp, 8(vp)
+ alr %r5, %r9
+ alcr %r6, %r10
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(top): lm %r5, %r8, 0(up)
+ la up, 16(up)
+ lm %r9, %r12, 0(vp)
+ la vp, 16(vp)
+ alcr %r5, %r9
+ alcr %r6, %r10
+ alcr %r7, %r11
+ alcr %r8, %r12
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ brct %r1, L(top)
+
+L(end): lhi %r2, 0
alcr %r2, %r2
- l %r12, 48(%r15)
+ lm %r6, %r12, 24(%r15)
br %r14
EPILOGUE()
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/addmul_1.asm
--- a/mpn/s390_32/esame/addmul_1.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/addmul_1.asm Wed Oct 19 09:06:58 2011 +0200
@@ -19,6 +19,13 @@
include(`../config.m4')
+C cycles/limb
+C z900 18.5
+C z990 10
+C z9 ?
+C z10 ?
+C z196 ?
+
C INPUT PARAMETERS
define(`rp', `%r2')
define(`up', `%r3')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/aorslsh1_n.asm
--- a/mpn/s390_32/esame/aorslsh1_n.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/aorslsh1_n.asm Wed Oct 19 09:06:58 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 5
+C z900 9.25
+C z990 5
+C z9 ?
+C z10 ?
+C z196 ?
C TODO
C * Optimise for small n
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/bdiv_dbm1c.asm
--- a/mpn/s390_32/esame/bdiv_dbm1c.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/bdiv_dbm1c.asm Wed Oct 19 09:06:58 2011 +0200
@@ -19,6 +19,12 @@
include(`../config.m4')
+C cycles/limb
+C z900 14
+C z990 10
+C z9 ?
+C z10 ?
+C z196 ?
C INPUT PARAMETERS
define(`qp', `%r2')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/mul_1.asm
--- a/mpn/s390_32/esame/mul_1.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/mul_1.asm Wed Oct 19 09:06:58 2011 +0200
@@ -19,6 +19,13 @@
include(`../config.m4')
+C cycles/limb
+C z900 14
+C z990 9
+C z9 ?
+C z10 ?
+C z196 ?
+
C INPUT PARAMETERS
define(`rp', `%r2')
define(`up', `%r3')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/sqr_diag_addlsh1.asm
--- a/mpn/s390_32/esame/sqr_diag_addlsh1.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/sqr_diag_addlsh1.asm Wed Oct 19 09:06:58 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 8
+C z900 ?
+C z990 8
+C z9 ?
+C z10 ?
+C z196 ?
C INPUT PARAMETERS
define(`rp', `%r2')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/sub_n.asm
--- a/mpn/s390_32/esame/sub_n.asm Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/sub_n.asm Wed Oct 19 09:06:58 2011 +0200
@@ -1,4 +1,4 @@
-dnl S/390-32 mpn_sub_n for systems with unsigned add/subtract instructions.
+dnl S/390-32 mpn_sub_n
dnl Copyright 2011 Free Software Foundation, Inc.
@@ -17,8 +17,18 @@
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+include(`../config.m4')
-include(`../config.m4')
+C cycles/limb
+C z900 6.5
+C z990 3.5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Use r0 and save/restore one less register
C INPUT PARAMETERS
define(`rp', `%r2')
@@ -28,19 +38,68 @@
ASM_START()
PROLOGUE(mpn_sub_n)
- st %r12, 48(%r15)
- slr %r12, %r12 C zero index register and set C flag
+ stm %r6, %r12, 24(%r15)
-L(top): l %r0, 0(%r12,up)
- l %r1, 0(%r12,vp)
- slbr %r0, %r1
- st %r0, 0(%r12,rp)
- la %r12, 4(%r12)
- brct n, L(top)
+ la %r1, 3(n)
+ lhi %r7, 3
+ srl %r1, 2
+ nr %r7, n C n mod 4
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
- slbr %r2, %r2
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ lm %r9, %r11, 0(vp)
+ la vp, 12(vp)
+ slr %r5, %r9
+ slbr %r6, %r10
+ slbr %r7, %r11
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b0): slr %r5, %r5 C set C flag
+ j L(top)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ l %r9, 0(vp)
+ la vp, 4(vp)
+ slr %r5, %r9
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ lm %r9, %r10, 0(vp)
More information about the gmp-commit
mailing list