[Gmp-commit] /var/hg/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Oct 23 02:24:01 CEST 2011


details:   /var/hg/gmp/rev/777063a077fd
changeset: 14369:777063a077fd
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 23 02:17:23 2011 +0200
description:
Save/restore only used registers.

details:   /var/hg/gmp/rev/6ab67d99bfd3
changeset: 14370:6ab67d99bfd3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 23 02:20:52 2011 +0200
description:
*** empty log message ***

details:   /var/hg/gmp/rev/856307c460f1
changeset: 14371:856307c460f1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 23 02:22:20 2011 +0200
description:
Reorg s390_64 {add,sub}lsh1_n code, add rsblsh1_n.

details:   /var/hg/gmp/rev/8a0fc314ae69
changeset: 14372:8a0fc314ae69
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 23 02:23:23 2011 +0200
description:
Shave off 1 c/l.

details:   /var/hg/gmp/rev/1d60286bb006
changeset: 14373:1d60286bb006
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Oct 23 02:23:26 2011 +0200
description:
*** empty log message ***

diffstat:

 ChangeLog                  |   10 ++
 mpn/s390_64/aorrlsh1_n.asm |  157 ++++++++++++++++++++++++++++++++++++++++++++
 mpn/s390_64/aors_n.asm     |   11 +-
 mpn/s390_64/aorslsh1_n.asm |  160 ---------------------------------------------
 mpn/s390_64/bdiv_dbm1c.asm |    6 +-
 mpn/s390_64/logops_n.asm   |  136 +++++++++++++++++++-------------------
 mpn/s390_64/sublsh1_n.asm  |  158 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 402 insertions(+), 236 deletions(-)

diffs (truncated from 825 to 300 lines):

diff -r f67fd2d23588 -r 1d60286bb006 ChangeLog
--- a/ChangeLog	Sat Oct 22 19:29:24 2011 +0200
+++ b/ChangeLog	Sun Oct 23 02:23:26 2011 +0200
@@ -1,5 +1,15 @@
+2011-10-23  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/s390_64/bdiv_dbm1c.asm: Shave off 1 c/l.
+
+	* mpn/s390_64/aorrlsh1_n.asm: New file, developed from aorslsh1_n.asm.
+	* mpn/s390_64/sublsh1_n.asm: New file.
+	* mpn/s390_64/aorslsh1_n.asm: Remove file.
+
 2011-10-22  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/s390_64/logops_n.asm: New file.
+
 	* mpn/s390_64/aors_n.asm: New file, with rewritten add/sub code.
 
 2011-10-20  Torbjorn Granlund  <tege at gmplib.org>
diff -r f67fd2d23588 -r 1d60286bb006 mpn/s390_64/aorrlsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/aorrlsh1_n.asm	Sun Oct 23 02:23:26 2011 +0200
@@ -0,0 +1,157 @@
+dnl  S/390-64 mpn_addlsh1_n and mpn_rsblsh1_n.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900		10
+C z990		 5
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
+C TODO
+C  * Optimise for small n, avoid 'la' like in aors_n.asm.
+C  * Tune to reach 4 c/l.  For addlsh1, we could let the main alcgr propagate
+C    carry to the lsh1 alcgr.  But even for sublsh1_n 5 c/l cannot be optimal.
+C  * Compute RETVAL for sublsh1_n less stupidly.
+
+C INPUT PARAMETERS
+define(`rp',	`%r2')
+define(`up',	`%r3')
+define(`vp',	`%r4')
+define(`n',	`%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADSB,		alg)
+  define(ADSBC,		alcg)
+  define(INITCY,	`lghi	%r9, -1')
+  define(RETVAL,	`la	%r2, 2(%r1,%r9)')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_rsblsh1_n',`
+  define(ADSB,		slg)
+  define(ADSBC,		slbg)
+  define(INITCY,	`lghi	%r9, 0')
+  define(RETVAL,`dnl
+	algr	%r1, %r9
+	lghi	%r2, 1
+	algr	%r2, %r1')
+  define(func, mpn_rsblsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+	stmg	%r6, %r9, 48(%r15)
+
+	aghi	n, 3
+	lghi	%r7, 3
+	srlg	%r0, n, 2
+	ngr	%r7, n			C n mod 4
+	je	L(b1)
+	cghi	%r7, 2
+	jl	L(b2)
+	jne	L(b0)
+
+L(b3):	lmg	%r5, %r7, 0(vp)
+	la	vp, 24(vp)
+
+	algr	%r5, %r5
+	alcgr	%r6, %r6
+	alcgr	%r7, %r7
+	slbgr	%r1, %r1
+
+	ADSB	%r5, 0(up)
+	ADSBC	%r6, 8(up)
+	ADSBC	%r7, 16(up)
+	la	up, 24(up)
+	slbgr	%r9, %r9
+
+	stmg	%r5, %r7, 0(rp)
+	la	rp, 24(rp)
+	brctg	%r0, L(top)
+	j	L(end)
+
+L(b0):	lghi	%r1, -1
+	INITCY
+	j	L(top)
+
+L(b1):	lg	%r5, 0(vp)
+	la	vp, 8(vp)
+
+	algr	%r5, %r5
+	slbgr	%r1, %r1
+	ADSB	%r5, 0(up)
+	la	up, 8(up)
+	slbgr	%r9, %r9
+
+	stg	%r5, 0(rp)
+	la	rp, 8(rp)
+	brctg	%r0, L(top)
+	j	L(end)
+
+L(b2):	lmg	%r5, %r6, 0(vp)
+	la	vp, 16(vp)
+
+	algr	%r5, %r5
+	alcgr	%r6, %r6
+	slbgr	%r1, %r1
+
+	ADSB	%r5, 0(up)
+	ADSBC	%r6, 8(up)
+	la	up, 16(up)
+	slbgr	%r9, %r9
+
+	stmg	%r5, %r6, 0(rp)
+	la	rp, 16(rp)
+	brctg	%r0, L(top)
+	j	L(end)
+
+L(top):	lmg	%r5, %r8, 0(vp)
+	la	vp, 32(vp)
+
+	aghi	%r1, 1			C restore carry
+
+	alcgr	%r5, %r5
+	alcgr	%r6, %r6
+	alcgr	%r7, %r7
+	alcgr	%r8, %r8
+
+	slbgr	%r1, %r1		C save carry
+
+	aghi	%r9, 1			C restore carry
+
+	ADSBC	%r5, 0(up)
+	ADSBC	%r6, 8(up)
+	ADSBC	%r7, 16(up)
+	ADSBC	%r8, 24(up)
+	la	up, 32(up)
+
+	slbgr	%r9, %r9		C save carry
+
+	stmg	%r5, %r8, 0(rp)
+	la	rp, 32(rp)
+	brctg	%r0, L(top)
+
+L(end):	RETVAL
+	lmg	%r6, %r9, 48(%r15)
+	br	%r14
+EPILOGUE()
diff -r f67fd2d23588 -r 1d60286bb006 mpn/s390_64/aors_n.asm
--- a/mpn/s390_64/aors_n.asm	Sat Oct 22 19:29:24 2011 +0200
+++ b/mpn/s390_64/aors_n.asm	Sun Oct 23 02:23:26 2011 +0200
@@ -29,6 +29,8 @@
 C TODO
 C  * Optimise for small n
 C  * Use r0 and save/restore one less register
+C  * Using logops_n's v1 inner loop operand order make the loop about 20%
+C    faster, at the expense of highly alignment-dependent performance.
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
@@ -40,7 +42,7 @@
   define(ADSB,		alg)
   define(ADSBCR,	alcgr)
   define(ADSBC,		alcg)
-  define(RETVAL,`
+  define(RETVAL,`dnl
 	lghi	%r2, 0
 	alcgr	%r2, %r2')
   define(func,		mpn_add_n)
@@ -49,7 +51,7 @@
   define(ADSB,		slg)
   define(ADSBCR,	slbgr)
   define(ADSBC,		slbg)
-  define(RETVAL,`
+  define(RETVAL,`dnl
 	slbgr	%r2, %r2
 	lcgr	%r2, %r2')
   define(func,		mpn_sub_n)
@@ -59,7 +61,7 @@
 
 ASM_START()
 PROLOGUE(func)
-	stmg	%r6, %r12, 48(%r15)
+	stmg	%r6, %r8, 48(%r15)
 
 	aghi	n, 3
 	lghi	%r7, 3
@@ -118,7 +120,6 @@
 	brctg	%r1, L(top)
 
 L(end):	RETVAL
-
-	lmg	%r6, %r12, 48(%r15)
+	lmg	%r6, %r8, 48(%r15)
 	br	%r14
 EPILOGUE()
diff -r f67fd2d23588 -r 1d60286bb006 mpn/s390_64/aorslsh1_n.asm
--- a/mpn/s390_64/aorslsh1_n.asm	Sat Oct 22 19:29:24 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-dnl  S/390-64 mpn_addlsh1_n
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		10
-C z990		 5
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  * Optimise for small n
-C  * Compute RETVAL for sublsh1_n less stupidly
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ifdef(`OPERATION_addlsh1_n',`
-  define(ADDSUBC,       algr)
-  define(ADDSUBE,       alcgr)
-  define(INITCY,        `lghi	%r13, -1')
-  define(RETVAL,        `la	%r2, 2(%r1,%r13)')
-  define(func, mpn_addlsh1_n)
-')
-ifdef(`OPERATION_sublsh1_n',`
-  define(ADDSUBC,       slgr)
-  define(ADDSUBE,       slbgr)
-  define(INITCY,        `lghi	%r13, 0')
-  define(RETVAL,        `slgr	%r1, %r13
-			lghi	%r2, 1
-			algr	%r2, %r1')
-  define(func, mpn_sublsh1_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-PROLOGUE(func)
-	stmg	%r6, %r13, 48(%r15)
-
-	la	%r0, 3(n)
-	lghi	%r7, 3
-	srlg	%r0, %r0, 2
-	ngr	%r7, n			C n mod 4
-	je	L(b0)
-	cghi	%r7, 2
-	jl	L(b1)


More information about the gmp-commit mailing list