[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Oct 16 16:20:52 CEST 2011
details: /var/hg/gmp/rev/2d423e7d9ac2
changeset: 14340:2d423e7d9ac2
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 15:39:21 2011 +0200
description:
New file.
details: /var/hg/gmp/rev/08b11b58bf3c
changeset: 14341:08b11b58bf3c
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 15:41:16 2011 +0200
description:
Rewrite along the lines of glibc memcpy.
details: /var/hg/gmp/rev/cb65b3da11cc
changeset: 14342:cb65b3da11cc
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 15:43:25 2011 +0200
description:
Optimise.
details: /var/hg/gmp/rev/fa155bc8116f
changeset: 14343:fa155bc8116f
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 15:44:03 2011 +0200
description:
Add TODO item.
details: /var/hg/gmp/rev/d2b94b78f045
changeset: 14344:d2b94b78f045
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 15:44:15 2011 +0200
description:
*** empty log message ***
details: /var/hg/gmp/rev/a01f95e7ccc3
changeset: 14345:a01f95e7ccc3
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 15:50:49 2011 +0200
description:
Overhaul low-level mpn test code.
diffstat:
ChangeLog | 6 +
mpn/s390_64/add_n.asm | 1 +
mpn/s390_64/aorslsh1_n.asm | 153 +++++++++++++++++++++++++++++++++++++++++++++
mpn/s390_64/copyd.asm | 11 +-
mpn/s390_64/copyi.asm | 27 +++++--
mpn/s390_64/sub_n.asm | 3 +-
tests/devel/addmul_N.c | 34 +++++++--
tests/devel/addsub_n.c | 25 +++++--
tests/devel/anymul_1.c | 14 ++-
tests/devel/aors_n.c | 48 ++++++++++++-
tests/devel/copy.c | 44 +++++++++++-
tests/devel/mul_N.c | 34 +++++++--
tests/devel/shift.c | 47 ++++++++++++-
13 files changed, 386 insertions(+), 61 deletions(-)
diffs (truncated from 870 to 300 lines):
diff -r 92d555288849 -r a01f95e7ccc3 ChangeLog
--- a/ChangeLog Sun Oct 16 00:30:53 2011 +0200
+++ b/ChangeLog Sun Oct 16 15:50:49 2011 +0200
@@ -1,5 +1,11 @@
2011-10-16 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/s390_64/copyd.asm: Optimise.
+
+ * mpn/s390_64/copyi.asm: Rewrite along the lines of glibc memcpy.
+
+ * mpn/s390_64/aorslsh1_n.asm: New file.
+
* mpn/s390_64/mod_34lsub1.asm: New file.
* mpn/s390_64/sqr_diag_addlsh1.asm: New file.
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/add_n.asm
--- a/mpn/s390_64/add_n.asm Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/add_n.asm Sun Oct 16 15:50:49 2011 +0200
@@ -24,6 +24,7 @@
C TODO
C * Optimise for small n
+C * Use r0 and save/restore one less register
C INPUT PARAMETERS
define(`rp', `%r2')
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/aorslsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/aorslsh1_n.asm Sun Oct 16 15:50:49 2011 +0200
@@ -0,0 +1,153 @@
+dnl S/390-64 mpn_addlsh1_n
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z990 5
+
+C TODO
+C * Optimise for small n
+C * Compute RETVAL for sublsh1_n less stupidly
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADDSUBC, algr)
+ define(ADDSUBE, alcgr)
+ define(INITCY, `lghi %r13, -1')
+ define(RETVAL, `lghi %r2, 1
+ alcgr %r2, %r13')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+ define(ADDSUBC, slgr)
+ define(ADDSUBE, slbgr)
+ define(INITCY, `lghi %r13, 0')
+ define(RETVAL, `slbgr %r2, %r2
+ lcgr %r2, %r2
+ algr %r2, %r13
+ aghi %r2, 1')
+ define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+ stmg %r6, %r13, 48(%r15)
+
+ INITCY
+
+ la %r0, 3(n)
+ lghi %r7, 3
+ srlg %r0, %r0, 2
+ ngr %r7, n C n mod 4
+ je L(top) C The C flag is clear
+ cghi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): lmg %r5, %r7, 0(up)
+ la up, 24(up)
+ lmg %r9, %r11, 0(vp)
+ la vp, 24(vp)
+
+ algr %r9, %r9
+ alcgr %r10, %r10
+ alcgr %r11, %r11
+
+ slbgr %r13, %r13
+
+ ADDSUBC %r5, %r9
+ ADDSUBE %r6, %r10
+ ADDSUBE %r7, %r11
+
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b1): lg %r5, 0(up)
+ la up, 8(up)
+ lg %r9, 0(vp)
+ la vp, 8(vp)
+
+ algr %r9, %r9
+ slbgr %r13, %r13
+ ADDSUBC %r5, %r9
+
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b2): lmg %r5, %r6, 0(up)
+ la up, 16(up)
+ lmg %r9, %r10, 0(vp)
+ la vp, 16(vp)
+
+ algr %r9, %r9
+ alcgr %r10, %r10
+
+ slbgr %r13, %r13
+
+ ADDSUBC %r5, %r9
+ ADDSUBE %r6, %r10
+
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(top): lmg %r9, %r12, 0(vp)
+ la vp, 32(vp)
+
+ alcgr %r9, %r9
+ alcgr %r10, %r10
+ alcgr %r11, %r11
+ alcgr %r12, %r12
+
+ slbgr %r1, %r1 C save carry
+
+ lmg %r5, %r8, 0(up)
+ la up, 32(up)
+
+ aghi %r13, 1 C restore carry
+
+ ADDSUBE %r5, %r9
+ ADDSUBE %r6, %r10
+ ADDSUBE %r7, %r11
+ ADDSUBE %r8, %r12
+
+ lgr %r13, %r1
+
+ stmg %r5, %r8, 0(rp)
+ la rp, 32(rp)
+ brctg %r0, L(top)
+
+L(end):
+ RETVAL
+ lmg %r6, %r13, 48(%r15)
+ br %r14
+EPILOGUE()
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/copyd.asm
--- a/mpn/s390_64/copyd.asm Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/copyd.asm Sun Oct 16 15:50:49 2011 +0200
@@ -21,7 +21,7 @@
include(`../config.m4')
C cycles/limb
-C z990 1.75 See comment in loop about how to reach 1.5 c/l
+C z990 1.5
C FIXME:
C * Avoid saving/restoring callee-saves registers for n < 3. This could be
@@ -38,12 +38,13 @@
ASM_START()
PROLOGUE(mpn_copyd)
- stmg %r6, %r10, 48(%r15)
+ stmg %r6, %r11, 48(%r15)
sllg %r1, n, 3
la %r10, 8(n)
aghi %r1, -64
srlg %r10, %r10, 3
+ lghi %r11, -64
la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later
la up, 0(%r1,up_param) C FIXME use lay on z990 and later
@@ -117,11 +118,11 @@
j L(end)
L(top): lmg %r0, %r7, 0(up)
- aghi up, -64 C FIXME using lay here saves 0.25 c/l
+ la up, 0(%r11,up)
stmg %r0, %r7, 0(rp)
- aghi rp, -64
+ la rp, 0(%r11,rp)
brctg %r10, L(top)
-L(end): lmg %r6, %r10, 48(%r15)
+L(end): lmg %r6, %r11, 48(%r15)
br %r14
EPILOGUE()
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/copyi.asm
--- a/mpn/s390_64/copyi.asm Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/copyi.asm Sun Oct 16 15:50:49 2011 +0200
@@ -21,10 +21,10 @@
include(`../config.m4')
C cycles/limb
-C z990 1 fluctuates somewhat
+C z990 0.75
-C FIXME
-C * Optimise. GNU memcpy beats us at 0.75 c/l.
+C NOTE
+C * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
C INPUT PARAMETERS
define(`rp', `%r2')
@@ -33,12 +33,21 @@
ASM_START()
PROLOGUE(mpn_copyi)
- lgr %r0, %r3
- sllg %r3, %r4, 3
- sllg %r1, %r4, 3
+ ltgr %r4, %r4
+ sllg %r4, %r4, 3
+ je L(rtn)
+ aghi %r4, -1
+ srlg %r5, %r4, 8
+ ltgr %r5, %r5 C < 256 bytes to copy?
+ je L(1)
-L(top): mvcle %r2, %r0, 0(0)
- jne L(top)
+L(top): mvc 0(256, rp), 0(up)
+ la rp, 256(rp)
+ la up, 256(up)
+ brctg %r5, L(top)
- br %r14
+L(1): bras %r5, L(2) C make r5 point to mvc insn
+ mvc 0(1, rp), 0(up)
+L(2): ex %r4, 0(%r5) C execute mvc with length ((n-1) mod 256)+1
+L(rtn): br %r14
EPILOGUE()
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/sub_n.asm
--- a/mpn/s390_64/sub_n.asm Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/sub_n.asm Sun Oct 16 15:50:49 2011 +0200
@@ -24,6 +24,7 @@
C TODO
C * Optimise for small n
+C * Use r0 and save/restore one less register
C INPUT PARAMETERS
define(`rp', `%r2')
@@ -56,7 +57,7 @@
brctg %r1, L(top)
j L(end)
-L(b0): slgr %r0, %r0 C set C flag
+L(b0): slgr %r5, %r5 C set C flag
j L(top)
L(b1): lg %r5, 0(up)
diff -r 92d555288849 -r a01f95e7ccc3 tests/devel/addmul_N.c
--- a/tests/devel/addmul_N.c Sun Oct 16 00:30:53 2011 +0200
+++ b/tests/devel/addmul_N.c Sun Oct 16 15:50:49 2011 +0200
@@ -1,6 +1,6 @@
/*
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2007 Free
-Software Foundation, Inc.
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2007 Free Software
+Foundation, Inc.
More information about the gmp-commit
mailing list