[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Oct 19 23:26:32 CEST 2011


details:   /var/hg/gmp/rev/1156dcf1087d
changeset: 14357:1156dcf1087d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Oct 19 09:04:47 2011 +0200
description:
Rewrite, similar to s390_64 code.

details:   /var/hg/gmp/rev/375960c6fd7c
changeset: 14358:375960c6fd7c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Oct 19 09:05:07 2011 +0200
description:
Add cycle table.

details:   /var/hg/gmp/rev/8aec7a5a2be7
changeset: 14359:8aec7a5a2be7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Oct 19 09:06:58 2011 +0200
description:
Add cycle table.

diffstat:

 mpn/s390_32/copyd.asm                  |   5 ++
 mpn/s390_32/copyi.asm                  |   4 +
 mpn/s390_32/esame/add_n.asm            |  81 +++++++++++++++++++++++++++-----
 mpn/s390_32/esame/addmul_1.asm         |   7 ++
 mpn/s390_32/esame/aorslsh1_n.asm       |   6 ++-
 mpn/s390_32/esame/bdiv_dbm1c.asm       |   6 ++
 mpn/s390_32/esame/mul_1.asm            |   7 ++
 mpn/s390_32/esame/sqr_diag_addlsh1.asm |   6 ++-
 mpn/s390_32/esame/sub_n.asm            |  83 +++++++++++++++++++++++++++++----
 mpn/s390_32/esame/submul_1.asm         |   7 ++
 10 files changed, 185 insertions(+), 27 deletions(-)

diffs (truncated from 344 to 300 lines):

diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/copyd.asm
--- a/mpn/s390_32/copyd.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/copyd.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -21,7 +21,12 @@
 include(`../config.m4')
 
 C            cycles/limb
+C            cycles/limb
+C z900		 1.65
 C z990           1.125
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C FIXME:
 C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/copyi.asm
--- a/mpn/s390_32/copyi.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/copyi.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -21,7 +21,11 @@
 include(`../config.m4')
 
 C            cycles/limb
+C z900		 0.75
 C z990           0.375
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C NOTE
 C  * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/add_n.asm
--- a/mpn/s390_32/esame/add_n.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/add_n.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -1,4 +1,4 @@
-dnl  S/390-32 mpn_add_n for systems with unsigned add/subtract instructions.
+dnl  S/390-32 mpn_add_n
 
 dnl  Copyright 2011 Free Software Foundation, Inc.
 
@@ -17,8 +17,18 @@
 dnl  You should have received a copy of the GNU Lesser General Public License
 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
+include(`../config.m4')
 
-include(`../config.m4')
+C            cycles/limb
+C z900		 6.5
+C z990		 3.5
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
+C TODO
+C  * Optimise for small n
+C  * Use r0 and save/restore one less register
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
@@ -28,20 +38,65 @@
 
 ASM_START()
 PROLOGUE(mpn_add_n)
-	st	%r12, 48(%r15)
-	lhi	%r12, 0			C zero index register
-	ahi	%r12, 0			C clear C flag
+	stm	%r6, %r12, 24(%r15)
 
-L(top):	l	%r0, 0(%r12,up)
-	l	%r1, 0(%r12,vp)
-	alcr	%r0, %r1
-	st	%r0, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	n, L(top)
+	la	%r1, 3(n)
+	lhi	%r7, 3
+	srl	%r1, 2
+	nr	%r7, n			C n mod 4
+	je	L(top)			C The C flag is clear
+	chi	%r7, 2
+	jl	L(b1)
+	je	L(b2)
 
-	lhi	%r2, 0
+L(b3):	lm	%r5, %r7, 0(up)
+	la	up, 12(up)
+	lm	%r9, %r11, 0(vp)
+	la	vp, 12(vp)
+	alr	%r5, %r9
+	alcr	%r6, %r10
+	alcr	%r7, %r11
+	stm	%r5, %r7, 0(rp)
+	la	rp, 12(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(b1):	l	%r5, 0(up)
+	la	up, 4(up)
+	l	%r9, 0(vp)
+	la	vp, 4(vp)
+	alr	%r5, %r9
+	st	%r5, 0(rp)
+	la	rp, 4(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(b2):	lm	%r5, %r6, 0(up)
+	la	up, 8(up)
+	lm	%r9, %r10, 0(vp)
+	la	vp, 8(vp)
+	alr	%r5, %r9
+	alcr	%r6, %r10
+	stm	%r5, %r6, 0(rp)
+	la	rp, 8(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(top):	lm	%r5, %r8, 0(up)
+	la	up, 16(up)
+	lm	%r9, %r12, 0(vp)
+	la	vp, 16(vp)
+	alcr	%r5, %r9
+	alcr	%r6, %r10
+	alcr	%r7, %r11
+	alcr	%r8, %r12
+	stm	%r5, %r8, 0(rp)
+	la	rp, 16(rp)
+	brct	%r1, L(top)
+
+L(end):	lhi	%r2, 0
 	alcr	%r2, %r2
 
-	l	%r12, 48(%r15)
+	lm	%r6, %r12, 24(%r15)
 	br	%r14
 EPILOGUE()
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/addmul_1.asm
--- a/mpn/s390_32/esame/addmul_1.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/addmul_1.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -19,6 +19,13 @@
 
 include(`../config.m4')
 
+C            cycles/limb
+C z900		18.5
+C z990		10
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
 C INPUT PARAMETERS
 define(`rp',	`%r2')
 define(`up',	`%r3')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/aorslsh1_n.asm
--- a/mpn/s390_32/esame/aorslsh1_n.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/aorslsh1_n.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           5
+C z900		 9.25
+C z990		 5
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C TODO
 C  * Optimise for small n
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/bdiv_dbm1c.asm
--- a/mpn/s390_32/esame/bdiv_dbm1c.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/bdiv_dbm1c.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -19,6 +19,12 @@
 
 include(`../config.m4')
 
+C            cycles/limb
+C z900		14
+C z990		10
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C INPUT PARAMETERS
 define(`qp',	  `%r2')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/mul_1.asm
--- a/mpn/s390_32/esame/mul_1.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/mul_1.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -19,6 +19,13 @@
 
 include(`../config.m4')
 
+C            cycles/limb
+C z900		14
+C z990		 9
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
 C INPUT PARAMETERS
 define(`rp',	`%r2')
 define(`up',	`%r3')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/sqr_diag_addlsh1.asm
--- a/mpn/s390_32/esame/sqr_diag_addlsh1.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/sqr_diag_addlsh1.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           8
+C z900		 ?
+C z990		 8
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
diff -r 082efa54e608 -r 8aec7a5a2be7 mpn/s390_32/esame/sub_n.asm
--- a/mpn/s390_32/esame/sub_n.asm	Tue Oct 18 00:14:11 2011 +0200
+++ b/mpn/s390_32/esame/sub_n.asm	Wed Oct 19 09:06:58 2011 +0200
@@ -1,4 +1,4 @@
-dnl  S/390-32 mpn_sub_n for systems with unsigned add/subtract instructions.
+dnl  S/390-32 mpn_sub_n
 
 dnl  Copyright 2011 Free Software Foundation, Inc.
 
@@ -17,8 +17,18 @@
 dnl  You should have received a copy of the GNU Lesser General Public License
 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
+include(`../config.m4')
 
-include(`../config.m4')
+C            cycles/limb
+C z900		 6.5
+C z990		 3.5
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
+C TODO
+C  * Optimise for small n
+C  * Use r0 and save/restore one less register
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
@@ -28,19 +38,68 @@
 
 ASM_START()
 PROLOGUE(mpn_sub_n)
-	st	%r12, 48(%r15)
-	slr	%r12, %r12		C zero index register and set C flag
+	stm	%r6, %r12, 24(%r15)
 
-L(top):	l	%r0, 0(%r12,up)
-	l	%r1, 0(%r12,vp)
-	slbr	%r0, %r1
-	st	%r0, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	n, L(top)
+	la	%r1, 3(n)
+	lhi	%r7, 3
+	srl	%r1, 2
+	nr	%r7, n			C n mod 4
+	je	L(b0)
+	chi	%r7, 2
+	jl	L(b1)
+	je	L(b2)
 
-	slbr	%r2, %r2
+L(b3):	lm	%r5, %r7, 0(up)
+	la	up, 12(up)
+	lm	%r9, %r11, 0(vp)
+	la	vp, 12(vp)
+	slr	%r5, %r9
+	slbr	%r6, %r10
+	slbr	%r7, %r11
+	stm	%r5, %r7, 0(rp)
+	la	rp, 12(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(b0):	slr	%r5, %r5		C set C flag
+	j	L(top)
+	
+L(b1):	l	%r5, 0(up)
+	la	up, 4(up)
+	l	%r9, 0(vp)
+	la	vp, 4(vp)
+	slr	%r5, %r9
+	st	%r5, 0(rp)
+	la	rp, 4(rp)
+	brct	%r1, L(top)
+	j	L(end)
+
+L(b2):	lm	%r5, %r6, 0(up)
+	la	up, 8(up)
+	lm	%r9, %r10, 0(vp)


More information about the gmp-commit mailing list