[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Oct 28 00:14:16 CEST 2011


details:   /var/hg/gmp/rev/5eeab2fe5ed3
changeset: 14392:5eeab2fe5ed3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Oct 28 00:13:14 2011 +0200
description:
Rewrite s390_32/esame add/sub code, move result to aors_n.asm.

details:   /var/hg/gmp/rev/b31a653eb776
changeset: 14393:b31a653eb776
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Oct 28 00:13:42 2011 +0200
description:
*** empty log message ***

diffstat:

 ChangeLog                    |    4 +
 mpn/s390_32/esame/add_n.asm  |  102 ----------------------------------
 mpn/s390_32/esame/aors_n.asm |  126 +++++++++++++++++++++++++++++++++++++++++++
 mpn/s390_32/esame/sub_n.asm  |  105 -----------------------------------
 4 files changed, 130 insertions(+), 207 deletions(-)

diffs (truncated from 356 to 300 lines):

diff -r e198ae588b47 -r b31a653eb776 ChangeLog
--- a/ChangeLog	Thu Oct 27 15:58:31 2011 +0200
+++ b/ChangeLog	Fri Oct 28 00:13:42 2011 +0200
@@ -1,3 +1,7 @@
+2011-10-28  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/s390_32/esame/aors_n.asm: New file, with rewritten add/sub code.
+
 2011-10-27  Torbjorn Granlund  <tege at gmplib.org>
 
 	From Per Olofsson:
diff -r e198ae588b47 -r b31a653eb776 mpn/s390_32/esame/add_n.asm
--- a/mpn/s390_32/esame/add_n.asm	Thu Oct 27 15:58:31 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-dnl  S/390-32 mpn_add_n
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 6.5
-C z990		 3.5
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  * Optimise for small n
-C  * Use r0 and save/restore one less register
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_add_n)
-	stm	%r6, %r12, 24(%r15)
-
-	la	%r1, 3(n)
-	lhi	%r7, 3
-	srl	%r1, 2
-	nr	%r7, n			C n mod 4
-	je	L(top)			C The C flag is clear
-	chi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	lm	%r5, %r7, 0(up)
-	la	up, 12(up)
-	lm	%r9, %r11, 0(vp)
-	la	vp, 12(vp)
-	alr	%r5, %r9
-	alcr	%r6, %r10
-	alcr	%r7, %r11
-	stm	%r5, %r7, 0(rp)
-	la	rp, 12(rp)
-	brct	%r1, L(top)
-	j	L(end)
-
-L(b1):	l	%r5, 0(up)
-	la	up, 4(up)
-	l	%r9, 0(vp)
-	la	vp, 4(vp)
-	alr	%r5, %r9
-	st	%r5, 0(rp)
-	la	rp, 4(rp)
-	brct	%r1, L(top)
-	j	L(end)
-
-L(b2):	lm	%r5, %r6, 0(up)
-	la	up, 8(up)
-	lm	%r9, %r10, 0(vp)
-	la	vp, 8(vp)
-	alr	%r5, %r9
-	alcr	%r6, %r10
-	stm	%r5, %r6, 0(rp)
-	la	rp, 8(rp)
-	brct	%r1, L(top)
-	j	L(end)
-
-L(top):	lm	%r5, %r8, 0(up)
-	la	up, 16(up)
-	lm	%r9, %r12, 0(vp)
-	la	vp, 16(vp)
-	alcr	%r5, %r9
-	alcr	%r6, %r10
-	alcr	%r7, %r11
-	alcr	%r8, %r12
-	stm	%r5, %r8, 0(rp)
-	la	rp, 16(rp)
-	brct	%r1, L(top)
-
-L(end):	lhi	%r2, 0
-	alcr	%r2, %r2
-
-	lm	%r6, %r12, 24(%r15)
-	br	%r14
-EPILOGUE()
diff -r e198ae588b47 -r b31a653eb776 mpn/s390_32/esame/aors_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_32/esame/aors_n.asm	Fri Oct 28 00:13:42 2011 +0200
@@ -0,0 +1,126 @@
+dnl  S/390-32 mpn_add_n and mpn_sub_n.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900		 ?
+C z990	      2.75-3		(fast for even n, slow for odd n)
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
+C TODO
+C  * Optimise for small n
+C  * Use r0 and save/restore one less register
+C  * Using logops_n's v1 inner loop operand order make the loop about 20%
+C    faster, at the expense of highly alignment-dependent performance.
+
+C INPUT PARAMETERS
+define(`rp',	`%r2')
+define(`up',	`%r3')
+define(`vp',	`%r4')
+define(`n',	`%r5')
+
+ifdef(`OPERATION_add_n', `
+  define(ADSB,		al)
+  define(ADSBCR,	alcr)
+  define(ADSBC,		alc)
+  define(RETVAL,`dnl
+	lhi	%r2, 0
+	alcr	%r2, %r2')
+  define(func,		mpn_add_n)
+  define(func_nc,	mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(ADSB,		sl)
+  define(ADSBCR,	slbr)
+  define(ADSBC,		slb)
+  define(RETVAL,`dnl
+	slbr	%r2, %r2
+	lcr	%r2, %r2')
+  define(func,		mpn_sub_n)
+  define(func_nc,	mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	stm	%r6, %r8, 24(%r15)
+
+	ahi	n, 3
+	lhi	%r7, 3
+	lr	%r1, n
+	srl	%r1, 2
+	nr	%r7, n			C n mod 4
+	je	L(b1)
+	chi	%r7, 2
+	jl	L(b2)
+	jne	L(b0)
+
+L(b3):	lm	%r5, %r7, 0(up)
+	la	up, 12(up)
+	ADSB	%r5, 0(vp)
+	ADSBC	%r6, 4(vp)
+	ADSBC	%r7, 8(vp)
+	la	vp, 12(vp)
+	stm	%r5, %r7, 0(rp)
+	la	rp, 12(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(b0):	lm	%r5, %r8, 0(up)		C This redundant insns is no mistake,
+	la	up, 16(up)		C it is needed to make main loop run
+	ADSB	%r5, 0(vp)		C fast for n = 0 (mod 4).
+	ADSBC	%r6, 4(vp)
+	j	L(m0)
+
+L(b1):	l	%r5, 0(up)
+	la	up, 4(up)
+	ADSB	%r5, 0(vp)
+	la	vp, 4(vp)
+	st	%r5, 0(rp)
+	la	rp, 4(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(b2):	lm	%r5, %r6, 0(up)
+	la	up, 8(up)
+	ADSB	%r5, 0(vp)
+	ADSBC	%r6, 4(vp)
+	la	vp, 8(vp)
+	stm	%r5, %r6, 0(rp)
+	la	rp, 8(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(top):	lm	%r5, %r8, 0(up)
+	la	up, 16(up)
+	ADSBC	%r5, 0(vp)
+	ADSBC	%r6, 4(vp)
+L(m0):	ADSBC	%r7, 8(vp)
+	ADSBC	%r8, 12(vp)
+	la	vp, 16(vp)
+	stm	%r5, %r8, 0(rp)
+	la	rp, 16(rp)
+	brct	%r1, L(top)
+
+L(end):	RETVAL
+	lm	%r6, %r8, 24(%r15)
+	br	%r14
+EPILOGUE()
diff -r e198ae588b47 -r b31a653eb776 mpn/s390_32/esame/sub_n.asm
--- a/mpn/s390_32/esame/sub_n.asm	Thu Oct 27 15:58:31 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-dnl  S/390-32 mpn_sub_n
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 6.5
-C z990		 3.5
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  * Optimise for small n
-C  * Use r0 and save/restore one less register
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_sub_n)
-	stm	%r6, %r12, 24(%r15)
-
-	la	%r1, 3(n)
-	lhi	%r7, 3
-	srl	%r1, 2
-	nr	%r7, n			C n mod 4
-	je	L(b0)
-	chi	%r7, 2
-	jl	L(b1)


More information about the gmp-commit mailing list