[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Oct 16 16:20:52 CEST 2011


details:   /var/hg/gmp/rev/2d423e7d9ac2
changeset: 14340:2d423e7d9ac2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 16 15:39:21 2011 +0200
description:
New file.

details:   /var/hg/gmp/rev/08b11b58bf3c
changeset: 14341:08b11b58bf3c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 16 15:41:16 2011 +0200
description:
Rewrite along the lines of glibc memcpy.

details:   /var/hg/gmp/rev/cb65b3da11cc
changeset: 14342:cb65b3da11cc
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 16 15:43:25 2011 +0200
description:
Optimise.

details:   /var/hg/gmp/rev/fa155bc8116f
changeset: 14343:fa155bc8116f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 16 15:44:03 2011 +0200
description:
Add TODO item.

details:   /var/hg/gmp/rev/d2b94b78f045
changeset: 14344:d2b94b78f045
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 16 15:44:15 2011 +0200
description:
*** empty log message ***

details:   /var/hg/gmp/rev/a01f95e7ccc3
changeset: 14345:a01f95e7ccc3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 16 15:50:49 2011 +0200
description:
Overhaul low-level mpn test code.

diffstat:

 ChangeLog                  |    6 +
 mpn/s390_64/add_n.asm      |    1 +
 mpn/s390_64/aorslsh1_n.asm |  153 +++++++++++++++++++++++++++++++++++++++++++++
 mpn/s390_64/copyd.asm      |   11 +-
 mpn/s390_64/copyi.asm      |   27 +++++--
 mpn/s390_64/sub_n.asm      |    3 +-
 tests/devel/addmul_N.c     |   34 +++++++--
 tests/devel/addsub_n.c     |   25 +++++--
 tests/devel/anymul_1.c     |   14 ++-
 tests/devel/aors_n.c       |   48 ++++++++++++-
 tests/devel/copy.c         |   44 +++++++++++-
 tests/devel/mul_N.c        |   34 +++++++--
 tests/devel/shift.c        |   47 ++++++++++++-
 13 files changed, 386 insertions(+), 61 deletions(-)

diffs (truncated from 870 to 300 lines):

diff -r 92d555288849 -r a01f95e7ccc3 ChangeLog
--- a/ChangeLog	Sun Oct 16 00:30:53 2011 +0200
+++ b/ChangeLog	Sun Oct 16 15:50:49 2011 +0200
@@ -1,5 +1,11 @@
 2011-10-16  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/s390_64/copyd.asm: Optimise.
+
+	* mpn/s390_64/copyi.asm: Rewrite along the lines of glibc memcpy.
+
+	* mpn/s390_64/aorslsh1_n.asm: New file.
+
 	* mpn/s390_64/mod_34lsub1.asm: New file.
 
 	* mpn/s390_64/sqr_diag_addlsh1.asm: New file.
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/add_n.asm
--- a/mpn/s390_64/add_n.asm	Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/add_n.asm	Sun Oct 16 15:50:49 2011 +0200
@@ -24,6 +24,7 @@
 
 C TODO
 C  * Optimise for small n
+C  * Use r0 and save/restore one less register
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/aorslsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/aorslsh1_n.asm	Sun Oct 16 15:50:49 2011 +0200
@@ -0,0 +1,153 @@
+dnl  S/390-64 mpn_addlsh1_n
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z990           5
+
+C TODO
+C  * Optimise for small n
+C  * Compute RETVAL for sublsh1_n less stupidly
+
+C INPUT PARAMETERS
+define(`rp',	`%r2')
+define(`up',	`%r3')
+define(`vp',	`%r4')
+define(`n',	`%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADDSUBC,       algr)
+  define(ADDSUBE,       alcgr)
+  define(INITCY,        `lghi	%r13, -1')
+  define(RETVAL,        `lghi	%r2, 1
+			alcgr	%r2, %r13')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADDSUBC,       slgr)
+  define(ADDSUBE,       slbgr)
+  define(INITCY,        `lghi	%r13, 0')
+  define(RETVAL,        `slbgr	%r2, %r2
+			lcgr	%r2, %r2
+			algr	%r2, %r13
+			aghi	%r2, 1')
+  define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+	stmg	%r6, %r13, 48(%r15)
+
+	INITCY
+
+	la	%r0, 3(n)
+	lghi	%r7, 3
+	srlg	%r0, %r0, 2
+	ngr	%r7, n			C n mod 4
+	je	L(top)			C The C flag is clear
+	cghi	%r7, 2
+	jl	L(b1)
+	je	L(b2)
+
+L(b3):	lmg	%r5, %r7, 0(up)
+	la	up, 24(up)
+	lmg	%r9, %r11, 0(vp)
+	la	vp, 24(vp)
+
+	algr	%r9, %r9
+	alcgr	%r10, %r10
+	alcgr	%r11, %r11
+
+	slbgr	%r13, %r13
+
+	ADDSUBC	%r5, %r9
+	ADDSUBE	%r6, %r10
+	ADDSUBE	%r7, %r11
+
+	stmg	%r5, %r7, 0(rp)
+	la	rp, 24(rp)
+	brctg	%r0, L(top)
+	j	L(end)
+
+L(b1):	lg	%r5, 0(up)
+	la	up, 8(up)
+	lg	%r9, 0(vp)
+	la	vp, 8(vp)
+
+	algr	%r9, %r9
+	slbgr	%r13, %r13
+	ADDSUBC	%r5, %r9
+
+	stg	%r5, 0(rp)
+	la	rp, 8(rp)
+	brctg	%r0, L(top)
+	j	L(end)
+
+L(b2):	lmg	%r5, %r6, 0(up)
+	la	up, 16(up)
+	lmg	%r9, %r10, 0(vp)
+	la	vp, 16(vp)
+
+	algr	%r9, %r9
+	alcgr	%r10, %r10
+
+	slbgr	%r13, %r13
+
+	ADDSUBC	%r5, %r9
+	ADDSUBE	%r6, %r10
+
+	stmg	%r5, %r6, 0(rp)
+	la	rp, 16(rp)
+	brctg	%r0, L(top)
+	j	L(end)
+
+L(top):	lmg	%r9, %r12, 0(vp)
+	la	vp, 32(vp)
+
+	alcgr	%r9, %r9
+	alcgr	%r10, %r10
+	alcgr	%r11, %r11
+	alcgr	%r12, %r12
+
+	slbgr	%r1, %r1		C save carry
+
+	lmg	%r5, %r8, 0(up)
+	la	up, 32(up)
+
+	aghi	%r13, 1			C restore carry
+
+	ADDSUBE	%r5, %r9
+	ADDSUBE	%r6, %r10
+	ADDSUBE	%r7, %r11
+	ADDSUBE	%r8, %r12
+
+	lgr	%r13, %r1
+
+	stmg	%r5, %r8, 0(rp)
+	la	rp, 32(rp)
+	brctg	%r0, L(top)
+
+L(end):
+	RETVAL
+	lmg	%r6, %r13, 48(%r15)
+	br	%r14
+EPILOGUE()
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/copyd.asm
--- a/mpn/s390_64/copyd.asm	Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/copyd.asm	Sun Oct 16 15:50:49 2011 +0200
@@ -21,7 +21,7 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           1.75		See comment in loop about how to reach 1.5 c/l
+C z990           1.5
 
 C FIXME:
 C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
@@ -38,12 +38,13 @@
 
 ASM_START()
 PROLOGUE(mpn_copyd)
-	stmg	%r6, %r10, 48(%r15)
+	stmg	%r6, %r11, 48(%r15)
 
 	sllg	%r1, n, 3
 	la	%r10, 8(n)
 	aghi	%r1, -64
 	srlg	%r10, %r10, 3
+	lghi	%r11, -64
 
 	la	rp, 0(%r1,rp_param)	C FIXME use lay on z990 and later
 	la	up, 0(%r1,up_param)	C FIXME use lay on z990 and later
@@ -117,11 +118,11 @@
 	j	L(end)
 
 L(top):	lmg	%r0, %r7, 0(up)
-	aghi	up, -64			C FIXME using lay here saves 0.25 c/l
+	la	up, 0(%r11,up)
 	stmg	%r0, %r7, 0(rp)
-	aghi	rp, -64
+	la	rp, 0(%r11,rp)
 	brctg	%r10, L(top)
 
-L(end):	lmg	%r6, %r10, 48(%r15)
+L(end):	lmg	%r6, %r11, 48(%r15)
 	br	%r14
 EPILOGUE()
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/copyi.asm
--- a/mpn/s390_64/copyi.asm	Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/copyi.asm	Sun Oct 16 15:50:49 2011 +0200
@@ -21,10 +21,10 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           1		fluctuates somewhat
+C z990           0.75
 
-C FIXME
-C  * Optimise.  GNU memcpy beats us at 0.75 c/l.
+C NOTE
+C  * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
@@ -33,12 +33,21 @@
 
 ASM_START()
 PROLOGUE(mpn_copyi)
-	lgr	%r0, %r3
-	sllg	%r3, %r4, 3
-	sllg	%r1, %r4, 3
+	ltgr	%r4, %r4
+	sllg	%r4, %r4, 3
+	je	L(rtn)
+	aghi	%r4, -1
+	srlg	%r5, %r4, 8
+	ltgr	%r5, %r5		C < 256 bytes to copy?
+	je	L(1)
 
-L(top):	mvcle	%r2, %r0, 0(0)
-	jne	L(top)
+L(top):	mvc	0(256, rp), 0(up)
+	la	rp, 256(rp)
+	la	up, 256(up)
+	brctg	%r5, L(top)
 
-	br	%r14
+L(1):	bras	%r5, L(2)		C make r5 point to mvc insn
+	mvc	0(1, rp), 0(up)
+L(2):	ex	%r4, 0(%r5)		C execute mvc with length ((n-1) mod 256)+1
+L(rtn):	br	%r14
 EPILOGUE()
diff -r 92d555288849 -r a01f95e7ccc3 mpn/s390_64/sub_n.asm
--- a/mpn/s390_64/sub_n.asm	Sun Oct 16 00:30:53 2011 +0200
+++ b/mpn/s390_64/sub_n.asm	Sun Oct 16 15:50:49 2011 +0200
@@ -24,6 +24,7 @@
 
 C TODO
 C  * Optimise for small n
+C  * Use r0 and save/restore one less register
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
@@ -56,7 +57,7 @@
 	brctg	%r1, L(top)
 	j	L(end)
 
-L(b0):	slgr	%r0, %r0		C set C flag
+L(b0):	slgr	%r5, %r5		C set C flag
 	j	L(top)
 	
 L(b1):	lg	%r5, 0(up)
diff -r 92d555288849 -r a01f95e7ccc3 tests/devel/addmul_N.c
--- a/tests/devel/addmul_N.c	Sun Oct 16 00:30:53 2011 +0200
+++ b/tests/devel/addmul_N.c	Sun Oct 16 15:50:49 2011 +0200
@@ -1,6 +1,6 @@
 /*
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2007 Free
-Software Foundation, Inc.
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2007 Free Software
+Foundation, Inc.
 


More information about the gmp-commit mailing list