[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Oct 23 02:24:01 CEST 2011
details: /var/hg/gmp/rev/777063a077fd
changeset: 14369:777063a077fd
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 23 02:17:23 2011 +0200
description:
Save/restore only used registers.
details: /var/hg/gmp/rev/6ab67d99bfd3
changeset: 14370:6ab67d99bfd3
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 23 02:20:52 2011 +0200
description:
*** empty log message ***
details: /var/hg/gmp/rev/856307c460f1
changeset: 14371:856307c460f1
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 23 02:22:20 2011 +0200
description:
Reorg s390_64 {add,sub}lsh1_n code, add rsblsh1_n.
details: /var/hg/gmp/rev/8a0fc314ae69
changeset: 14372:8a0fc314ae69
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 23 02:23:23 2011 +0200
description:
Shave off 1 c/l.
details: /var/hg/gmp/rev/1d60286bb006
changeset: 14373:1d60286bb006
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 23 02:23:26 2011 +0200
description:
*** empty log message ***
diffstat:
ChangeLog | 10 ++
mpn/s390_64/aorrlsh1_n.asm | 157 ++++++++++++++++++++++++++++++++++++++++++++
mpn/s390_64/aors_n.asm | 11 +-
mpn/s390_64/aorslsh1_n.asm | 160 ---------------------------------------------
mpn/s390_64/bdiv_dbm1c.asm | 6 +-
mpn/s390_64/logops_n.asm | 136 +++++++++++++++++++-------------------
mpn/s390_64/sublsh1_n.asm | 158 ++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 402 insertions(+), 236 deletions(-)
diffs (truncated from 825 to 300 lines):
diff -r f67fd2d23588 -r 1d60286bb006 ChangeLog
--- a/ChangeLog Sat Oct 22 19:29:24 2011 +0200
+++ b/ChangeLog Sun Oct 23 02:23:26 2011 +0200
@@ -1,5 +1,15 @@
+2011-10-23 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/s390_64/bdiv_dbm1c.asm: Shave off 1 c/l.
+
+ * mpn/s390_64/aorrlsh1_n.asm: New file, developed from aorslsh1_n.asm.
+ * mpn/s390_64/sublsh1_n.asm: New file.
+ * mpn/s390_64/aorslsh1_n.asm: Remove file.
+
2011-10-22 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/s390_64/logops_n.asm: New file.
+
* mpn/s390_64/aors_n.asm: New file, with rewritten add/sub code.
2011-10-20 Torbjorn Granlund <tege at gmplib.org>
diff -r f67fd2d23588 -r 1d60286bb006 mpn/s390_64/aorrlsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/aorrlsh1_n.asm Sun Oct 23 02:23:26 2011 +0200
@@ -0,0 +1,157 @@
+dnl S/390-64 mpn_addlsh1_n and mpn_rsblsh1_n.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 10
+C z990 5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n, avoid 'la' like in aors_n.asm.
+C * Tune to reach 4 c/l. For addlsh1, we could let the main alcgr propagate
+C carry to the lsh1 alcgr. But even for sublsh1_n 5 c/l cannot be optimal.
+C * Compute RETVAL for sublsh1_n less stupidly.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADSB, alg)
+ define(ADSBC, alcg)
+ define(INITCY, `lghi %r9, -1')
+ define(RETVAL, `la %r2, 2(%r1,%r9)')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_rsblsh1_n',`
+ define(ADSB, slg)
+ define(ADSBC, slbg)
+ define(INITCY, `lghi %r9, 0')
+ define(RETVAL,`dnl
+ algr %r1, %r9
+ lghi %r2, 1
+ algr %r2, %r1')
+ define(func, mpn_rsblsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+ stmg %r6, %r9, 48(%r15)
+
+ aghi n, 3
+ lghi %r7, 3
+ srlg %r0, n, 2
+ ngr %r7, n C n mod 4
+ je L(b1)
+ cghi %r7, 2
+ jl L(b2)
+ jne L(b0)
+
+L(b3): lmg %r5, %r7, 0(vp)
+ la vp, 24(vp)
+
+ algr %r5, %r5
+ alcgr %r6, %r6
+ alcgr %r7, %r7
+ slbgr %r1, %r1
+
+ ADSB %r5, 0(up)
+ ADSBC %r6, 8(up)
+ ADSBC %r7, 16(up)
+ la up, 24(up)
+ slbgr %r9, %r9
+
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b0): lghi %r1, -1
+ INITCY
+ j L(top)
+
+L(b1): lg %r5, 0(vp)
+ la vp, 8(vp)
+
+ algr %r5, %r5
+ slbgr %r1, %r1
+ ADSB %r5, 0(up)
+ la up, 8(up)
+ slbgr %r9, %r9
+
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b2): lmg %r5, %r6, 0(vp)
+ la vp, 16(vp)
+
+ algr %r5, %r5
+ alcgr %r6, %r6
+ slbgr %r1, %r1
+
+ ADSB %r5, 0(up)
+ ADSBC %r6, 8(up)
+ la up, 16(up)
+ slbgr %r9, %r9
+
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(top): lmg %r5, %r8, 0(vp)
+ la vp, 32(vp)
+
+ aghi %r1, 1 C restore carry
+
+ alcgr %r5, %r5
+ alcgr %r6, %r6
+ alcgr %r7, %r7
+ alcgr %r8, %r8
+
+ slbgr %r1, %r1 C save carry
+
+ aghi %r9, 1 C restore carry
+
+ ADSBC %r5, 0(up)
+ ADSBC %r6, 8(up)
+ ADSBC %r7, 16(up)
+ ADSBC %r8, 24(up)
+ la up, 32(up)
+
+ slbgr %r9, %r9 C save carry
+
+ stmg %r5, %r8, 0(rp)
+ la rp, 32(rp)
+ brctg %r0, L(top)
+
+L(end): RETVAL
+ lmg %r6, %r9, 48(%r15)
+ br %r14
+EPILOGUE()
diff -r f67fd2d23588 -r 1d60286bb006 mpn/s390_64/aors_n.asm
--- a/mpn/s390_64/aors_n.asm Sat Oct 22 19:29:24 2011 +0200
+++ b/mpn/s390_64/aors_n.asm Sun Oct 23 02:23:26 2011 +0200
@@ -29,6 +29,8 @@
C TODO
C * Optimise for small n
C * Use r0 and save/restore one less register
+C * Using logops_n's v1 inner loop operand order make the loop about 20%
+C faster, at the expense of highly alignment-dependent performance.
C INPUT PARAMETERS
define(`rp', `%r2')
@@ -40,7 +42,7 @@
define(ADSB, alg)
define(ADSBCR, alcgr)
define(ADSBC, alcg)
- define(RETVAL,`
+ define(RETVAL,`dnl
lghi %r2, 0
alcgr %r2, %r2')
define(func, mpn_add_n)
@@ -49,7 +51,7 @@
define(ADSB, slg)
define(ADSBCR, slbgr)
define(ADSBC, slbg)
- define(RETVAL,`
+ define(RETVAL,`dnl
slbgr %r2, %r2
lcgr %r2, %r2')
define(func, mpn_sub_n)
@@ -59,7 +61,7 @@
ASM_START()
PROLOGUE(func)
- stmg %r6, %r12, 48(%r15)
+ stmg %r6, %r8, 48(%r15)
aghi n, 3
lghi %r7, 3
@@ -118,7 +120,6 @@
brctg %r1, L(top)
L(end): RETVAL
-
- lmg %r6, %r12, 48(%r15)
+ lmg %r6, %r8, 48(%r15)
br %r14
EPILOGUE()
diff -r f67fd2d23588 -r 1d60286bb006 mpn/s390_64/aorslsh1_n.asm
--- a/mpn/s390_64/aorslsh1_n.asm Sat Oct 22 19:29:24 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-dnl S/390-64 mpn_addlsh1_n
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C z900 10
-C z990 5
-C z9 ?
-C z10 ?
-C z196 ?
-
-C TODO
-C * Optimise for small n
-C * Compute RETVAL for sublsh1_n less stupidly
-
-C INPUT PARAMETERS
-define(`rp', `%r2')
-define(`up', `%r3')
-define(`vp', `%r4')
-define(`n', `%r5')
-
-ifdef(`OPERATION_addlsh1_n',`
- define(ADDSUBC, algr)
- define(ADDSUBE, alcgr)
- define(INITCY, `lghi %r13, -1')
- define(RETVAL, `la %r2, 2(%r1,%r13)')
- define(func, mpn_addlsh1_n)
-')
-ifdef(`OPERATION_sublsh1_n',`
- define(ADDSUBC, slgr)
- define(ADDSUBE, slbgr)
- define(INITCY, `lghi %r13, 0')
- define(RETVAL, `slgr %r1, %r13
- lghi %r2, 1
- algr %r2, %r1')
- define(func, mpn_sublsh1_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-PROLOGUE(func)
- stmg %r6, %r13, 48(%r15)
-
- la %r0, 3(n)
- lghi %r7, 3
- srlg %r0, %r0, 2
- ngr %r7, n C n mod 4
- je L(b0)
- cghi %r7, 2
- jl L(b1)
More information about the gmp-commit
mailing list