[Gmp-commit] /var/hg/gmp: Rewrite sparc32/ultrasparct1 *mul_1 code.

mercurial at gmplib.org mercurial at gmplib.org
Tue Mar 5 13:47:08 CET 2013


details:   /var/hg/gmp/rev/dff8f0d02b13
changeset: 15527:dff8f0d02b13
user:      davem at davemloft.net
date:      Tue Mar 05 13:47:00 2013 +0100
description:
Rewrite sparc32/ultrasparct1 *mul_1 code.

diffstat:

 ChangeLog                             |   7 +++
 mpn/sparc32/ultrasparct1/addmul_1.asm |  71 +++++++++++++++++++++++----------
 mpn/sparc32/ultrasparct1/mul_1.asm    |  46 ++++++++++++++++-----
 mpn/sparc32/ultrasparct1/submul_1.asm |  72 ++++++++++++++++++++++++----------
 4 files changed, 140 insertions(+), 56 deletions(-)

diffs (275 lines):

diff -r eeae8c24e11e -r dff8f0d02b13 ChangeLog
--- a/ChangeLog	Mon Mar 04 23:20:00 2013 +0100
+++ b/ChangeLog	Tue Mar 05 13:47:00 2013 +0100
@@ -1,3 +1,10 @@
+2013-03-05  David Miller  <davem at davemloft.net>
+
+	* mpn/sparc32/ultrasparct1/mul_1.asm: Unroll main loop one time, add
+	T2/T3/T4 timings.
+	* mpn/sparc32/ultrasparct1/addmul_1.asm: Likewise.	
+	* mpn/sparc32/ultrasparct1/submul_1.asm: Likewise.
+
 2013-03-04  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/arm/neon/lorrshift.asm: New file.
diff -r eeae8c24e11e -r dff8f0d02b13 mpn/sparc32/ultrasparct1/addmul_1.asm
--- a/mpn/sparc32/ultrasparct1/addmul_1.asm	Mon Mar 04 23:20:00 2013 +0100
+++ b/mpn/sparc32/ultrasparct1/addmul_1.asm	Tue Mar 05 13:47:00 2013 +0100
@@ -1,6 +1,8 @@
 dnl  SPARC T1 32-bit mpn_addmul_1.
 
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,33 +22,58 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T1:       27
+C UltraSPARC T1:       24
+C UltraSPARC T2:       19
+C UltraSPARC T3:       19
+C UltraSPARC T4:       5
 
 C INPUT PARAMETERS
-define(`rp',	`%o0')
-define(`up',	`%o1')
-define(`n',	`%o2')
-define(`v0',	`%o3')
+define(`rp',	`%i0')
+define(`up',	`%i1')
+define(`n',	`%i2')
+define(`v0',	`%i3')
 
 ASM_START()
 PROLOGUE(mpn_addmul_1)
-	mov	0, %g4
-	srl	v0, 0, v0
-	srl	n, 0, n
-	dec	n			C n--
+	save	%sp, -96, %sp
+	srl	n, 0, %o4
+	srl	v0, 0, %g1
+	subcc	%o4, 1, %o4
+	be	L(final_one)
+	 clr	%o5
 
-L(top):	lduw	[up+0], %g1
-	add	up, 4, up		C up++
-	mulx	%g1, v0, %g3
-	lduw	[rp+0], %g2
-	add	%g2, %g3, %g3
-	add	%g4, %g3, %g3
+L(top):	lduw	[up+0], %l0
+	lduw	[rp+0], %l2
+	lduw	[up+4], %l1
+	lduw	[rp+4], %l3
+	mulx	%l0, %g1, %g3
+	add	up, 8, up
+	mulx	%l1, %g1, %o3
+	sub	%o4, 2, %o4
+	add	rp, 8, rp
+	add	%l2, %g3, %g3
+	add	%o5, %g3, %g3
+	stw	%g3, [rp-8]
+	srlx	%g3, 32, %o5
+	add	%l3, %o3, %o3
+	add	%o5, %o3, %o3
+	stw	%o3, [rp-4]
+	brgz	%o4, L(top)
+	 srlx	%o3, 32, %o5
+
+	brlz,pt	%o4, L(done)
+	 nop
+
+L(final_one):
+	lduw	[up+0], %l0
+	lduw	[rp+0], %l2
+	mulx	%l0, %g1, %g3
+	add	%l2, %g3, %g3
+	add	%o5, %g3, %g3
 	stw	%g3, [rp+0]
-	add	rp, 4, rp		C rp++
-	srlx	%g3, 32, %g4
-	brnz	n, L(top)
-	dec	n			C n--
+	srlx	%g3, 32, %o5
 
-	retl
-	mov	%g4, %o0		C return value
+L(done):
+	ret
+	 restore %o5, 0, %o0
 EPILOGUE()
diff -r eeae8c24e11e -r dff8f0d02b13 mpn/sparc32/ultrasparct1/mul_1.asm
--- a/mpn/sparc32/ultrasparct1/mul_1.asm	Mon Mar 04 23:20:00 2013 +0100
+++ b/mpn/sparc32/ultrasparct1/mul_1.asm	Tue Mar 05 13:47:00 2013 +0100
@@ -1,6 +1,8 @@
 dnl  SPARC T1 32-bit mpn_mul_1.
 
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,7 +22,10 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T1:       23
+C UltraSPARC T1:       20
+C UltraSPARC T2:       18
+C UltraSPARC T3:       18
+C UltraSPARC T4:       4
 
 C INPUT PARAMETERS
 define(`rp',	`%o0')
@@ -30,21 +35,38 @@
 
 ASM_START()
 PROLOGUE(mpn_mul_1)
-	mov	0, %g4
+	srl	n, 0, n
 	srl	v0, 0, v0
-	srl	n, 0, n
-	dec	n			C n--
+	subcc	n, 1, n
+	be	L(final_one)
+	 clr	%o5
 
 L(top):	lduw	[up+0], %g1
-	add	up, 4, up		C up++
+	lduw	[up+4], %g2
 	mulx	%g1, v0, %g3
-	add	%g4, %g3, %g3
+	add	up, 8, up
+	mulx	%g2, v0, %o4
+	sub	n, 2, n
+	add	rp, 8, rp
+	add	%o5, %g3, %g3
+	stw	%g3, [rp-8]
+	srlx	%g3, 32, %o5
+	add	%o5, %o4, %o4
+	stw	%o4, [rp-4]
+	brgz	n, L(top)
+	 srlx	%o4, 32, %o5
+
+	brlz,pt	n, L(done)
+	 nop
+
+L(final_one):
+	lduw	[up+0], %g1
+	mulx	%g1, v0, %g3
+	add	%o5, %g3, %g3
 	stw	%g3, [rp+0]
-	add	rp, 4, rp		C rp++
-	srlx	%g3, 32, %g4
-	brnz	n, L(top)
-	dec	n			C n--
+	srlx	%g3, 32, %o5
 
+L(done):
 	retl
-	mov	%g4, %o0		C return value
+	 mov	%o5, %o0
 EPILOGUE()
diff -r eeae8c24e11e -r dff8f0d02b13 mpn/sparc32/ultrasparct1/submul_1.asm
--- a/mpn/sparc32/ultrasparct1/submul_1.asm	Mon Mar 04 23:20:00 2013 +0100
+++ b/mpn/sparc32/ultrasparct1/submul_1.asm	Tue Mar 05 13:47:00 2013 +0100
@@ -1,6 +1,8 @@
 dnl  SPARC T1 32-bit mpn_submul_1.
 
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,33 +22,59 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T1:       27
+C UltraSPARC T1:       24
+C UltraSPARC T2:       19
+C UltraSPARC T3:       19
+C UltraSPARC T4:       5
 
 C INPUT PARAMETERS
-define(`rp',	`%o0')
-define(`up',	`%o1')
-define(`n',	`%o2')
-define(`v0',	`%o3')
+define(`rp',	`%i0')
+define(`up',	`%i1')
+define(`n',	`%i2')
+define(`v0',	`%i3')
 
 ASM_START()
 PROLOGUE(mpn_submul_1)
-	subcc	%g0, %g0, %g4		C clear CF and g4
-	srl	v0, 0, v0
-	srl	n, 0, n
-	dec	n			C n--
+	save	%sp, -96, %sp
+	srl	n, 0, %o4
+	srl	v0, 0, %g1
+	subcc	%o4, 1, %o4
+	be	L(final_one)
+	 subcc	%g0, 0, %o5
 
-L(top):	lduw	[up+0], %g1
-	add	up, 4, up		C up++
-	mulx	%g1, v0, %g3
-	lduw	[rp+0], %g2
-	addx	%g4, %g3, %g3
-	srlx	%g3, 32, %g4
-	subcc	%g2, %g3, %g3
+L(top):	lduw	[up+0], %l0
+	lduw	[rp+0], %l2
+	lduw	[up+4], %l1
+	lduw	[rp+4], %l3
+	mulx	%l0, %g1, %g3
+	add	up, 8, up
+	mulx	%l1, %g1, %o3
+	sub	%o4, 2, %o4
+	add	rp, 8, rp
+	addx	%o5, %g3, %g3
+	srlx	%g3, 32, %o5
+	subcc	%l2, %g3, %g3
+	stw	%g3, [rp-8]
+	addx	%o5, %o3, %o3
+	srlx	%o3, 32, %o5
+	subcc	%l3, %o3, %o3
+	brgz	%o4, L(top)
+	 stw	%o3, [rp-4]
+
+	brlz,pt	%o4, L(done)
+	 nop
+
+L(final_one):
+	lduw	[up+0], %l0
+	lduw	[rp+0], %l2
+	mulx	%l0, %g1, %g3
+	addx	%o5, %g3, %g3
+	srlx	%g3, 32, %o5
+	subcc	%l2, %g3, %g3
 	stw	%g3, [rp+0]
-	add	rp, 4, rp		C rp++
-	brnz	n, L(top)
-	dec	n			C n--
 
-	retl
-	addx	%g4, 0, %o0		C return value
+L(done):
+	addx	%o5, 0, %o5
+	ret
+	 restore %o5, 0, %o0
 EPILOGUE()


More information about the gmp-commit mailing list