[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Oct 28 00:14:16 CEST 2011
details: /var/hg/gmp/rev/5eeab2fe5ed3
changeset: 14392:5eeab2fe5ed3
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Oct 28 00:13:14 2011 +0200
description:
Rewrite s390_32/esame add/sub code, move result to aors_n.asm.
details: /var/hg/gmp/rev/b31a653eb776
changeset: 14393:b31a653eb776
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Oct 28 00:13:42 2011 +0200
description:
*** empty log message ***
diffstat:
ChangeLog | 4 +
mpn/s390_32/esame/add_n.asm | 102 ----------------------------------
mpn/s390_32/esame/aors_n.asm | 126 +++++++++++++++++++++++++++++++++++++++++++
mpn/s390_32/esame/sub_n.asm | 105 -----------------------------------
4 files changed, 130 insertions(+), 207 deletions(-)
diffs (truncated from 356 to 300 lines):
diff -r e198ae588b47 -r b31a653eb776 ChangeLog
--- a/ChangeLog Thu Oct 27 15:58:31 2011 +0200
+++ b/ChangeLog Fri Oct 28 00:13:42 2011 +0200
@@ -1,3 +1,7 @@
+2011-10-28 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/s390_32/esame/aors_n.asm: New file, with rewritten add/sub code.
+
2011-10-27 Torbjorn Granlund <tege at gmplib.org>
From Per Olofsson:
diff -r e198ae588b47 -r b31a653eb776 mpn/s390_32/esame/add_n.asm
--- a/mpn/s390_32/esame/add_n.asm Thu Oct 27 15:58:31 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-dnl S/390-32 mpn_add_n
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C z900 6.5
-C z990 3.5
-C z9 ?
-C z10 ?
-C z196 ?
-
-C TODO
-C * Optimise for small n
-C * Use r0 and save/restore one less register
-
-C INPUT PARAMETERS
-define(`rp', `%r2')
-define(`up', `%r3')
-define(`vp', `%r4')
-define(`n', `%r5')
-
-ASM_START()
-PROLOGUE(mpn_add_n)
- stm %r6, %r12, 24(%r15)
-
- la %r1, 3(n)
- lhi %r7, 3
- srl %r1, 2
- nr %r7, n C n mod 4
- je L(top) C The C flag is clear
- chi %r7, 2
- jl L(b1)
- je L(b2)
-
-L(b3): lm %r5, %r7, 0(up)
- la up, 12(up)
- lm %r9, %r11, 0(vp)
- la vp, 12(vp)
- alr %r5, %r9
- alcr %r6, %r10
- alcr %r7, %r11
- stm %r5, %r7, 0(rp)
- la rp, 12(rp)
- brct %r1, L(top)
- j L(end)
-
-L(b1): l %r5, 0(up)
- la up, 4(up)
- l %r9, 0(vp)
- la vp, 4(vp)
- alr %r5, %r9
- st %r5, 0(rp)
- la rp, 4(rp)
- brct %r1, L(top)
- j L(end)
-
-L(b2): lm %r5, %r6, 0(up)
- la up, 8(up)
- lm %r9, %r10, 0(vp)
- la vp, 8(vp)
- alr %r5, %r9
- alcr %r6, %r10
- stm %r5, %r6, 0(rp)
- la rp, 8(rp)
- brct %r1, L(top)
- j L(end)
-
-L(top): lm %r5, %r8, 0(up)
- la up, 16(up)
- lm %r9, %r12, 0(vp)
- la vp, 16(vp)
- alcr %r5, %r9
- alcr %r6, %r10
- alcr %r7, %r11
- alcr %r8, %r12
- stm %r5, %r8, 0(rp)
- la rp, 16(rp)
- brct %r1, L(top)
-
-L(end): lhi %r2, 0
- alcr %r2, %r2
-
- lm %r6, %r12, 24(%r15)
- br %r14
-EPILOGUE()
diff -r e198ae588b47 -r b31a653eb776 mpn/s390_32/esame/aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_32/esame/aors_n.asm Fri Oct 28 00:13:42 2011 +0200
@@ -0,0 +1,126 @@
+dnl S/390-32 mpn_add_n and mpn_sub_n.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 2.75-3 (fast for even n, slow for odd n)
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Use r0 and save/restore one less register
+C * Using logops_n's v1 inner loop operand order make the loop about 20%
+C faster, at the expense of highly alignment-dependent performance.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_add_n', `
+ define(ADSB, al)
+ define(ADSBCR, alcr)
+ define(ADSBC, alc)
+ define(RETVAL,`dnl
+ lhi %r2, 0
+ alcr %r2, %r2')
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADSB, sl)
+ define(ADSBCR, slbr)
+ define(ADSBC, slb)
+ define(RETVAL,`dnl
+ slbr %r2, %r2
+ lcr %r2, %r2')
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ stm %r6, %r8, 24(%r15)
+
+ ahi n, 3
+ lhi %r7, 3
+ lr %r1, n
+ srl %r1, 2
+ nr %r7, n C n mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(b0)
+
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+ ADSBC %r7, 8(vp)
+ la vp, 12(vp)
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b0): lm %r5, %r8, 0(up) C This redundant insns is no mistake,
+ la up, 16(up) C it is needed to make main loop run
+ ADSB %r5, 0(vp) C fast for n = 0 (mod 4).
+ ADSBC %r6, 4(vp)
+ j L(m0)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ ADSB %r5, 0(vp)
+ la vp, 4(vp)
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+ la vp, 8(vp)
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(top): lm %r5, %r8, 0(up)
+ la up, 16(up)
+ ADSBC %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+L(m0): ADSBC %r7, 8(vp)
+ ADSBC %r8, 12(vp)
+ la vp, 16(vp)
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ brct %r1, L(top)
+
+L(end): RETVAL
+ lm %r6, %r8, 24(%r15)
+ br %r14
+EPILOGUE()
diff -r e198ae588b47 -r b31a653eb776 mpn/s390_32/esame/sub_n.asm
--- a/mpn/s390_32/esame/sub_n.asm Thu Oct 27 15:58:31 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-dnl S/390-32 mpn_sub_n
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C z900 6.5
-C z990 3.5
-C z9 ?
-C z10 ?
-C z196 ?
-
-C TODO
-C * Optimise for small n
-C * Use r0 and save/restore one less register
-
-C INPUT PARAMETERS
-define(`rp', `%r2')
-define(`up', `%r3')
-define(`vp', `%r4')
-define(`n', `%r5')
-
-ASM_START()
-PROLOGUE(mpn_sub_n)
- stm %r6, %r12, 24(%r15)
-
- la %r1, 3(n)
- lhi %r7, 3
- srl %r1, 2
- nr %r7, n C n mod 4
- je L(b0)
- chi %r7, 2
- jl L(b1)
More information about the gmp-commit
mailing list