[PATCH] T3/T4 sparc shifts, plus more timings

David Miller davem at davemloft.net
Tue Mar 26 05:50:30 CET 2013


These give a modest speedup compared to the T1 routines.
I also added missing T3 timings to existing code.

Also, I worked on a copyi/copyd for T3/T4 that uses cache-initializing
stores (basically, if you're going to write a full aligned 64-byte
cache line, you tell the chip by using a special ASI in the stores,
and the cpu will simply clear the entire cache line on write to the
first word of the cache line, eliminating all the memory traffic).

However, the setup for this has a bit of overhead as we have to align
the destination to 64-bytes and I'm not so sure that it's a win
overall in common usage.

2013-03-26  David S. Miller  <davem at davemloft.net>

	* mpn/sparc64/ultrasparct3/aormul_2.asm: Add T3 timings.
	* mpn/sparc64/ultrasparct3/hamdist.asm: Likewise.
	* mpn/sparc64/ultrasparct3/popcount.asm: Likewise.
	* mpn/sparc64/ultrasparct3/submul_1.asm: Likewise.
	* mpn/sparc64/ultrasparct3/lshift.asm: New file.
	* mpn/sparc64/ultrasparct3/lshiftc.asm: New file.
	* mpn/sparc64/ultrasparct3/rshift.asm: New file.

diff -r 82870a93a714 mpn/sparc64/ultrasparct3/aormul_2.asm
--- a/mpn/sparc64/ultrasparct3/aormul_2.asm	Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/aormul_2.asm	Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
 
 C		    cycles/limb     cycles/limb
 C		       mul_2          addmul_2
-C UltraSPARC T3:	 ?		 ?
+C UltraSPARC T3:	23		24
 C UltraSPARC T4:	~3.5		~4
 
 
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/hamdist.asm
--- a/mpn/sparc64/ultrasparct3/hamdist.asm	Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/hamdist.asm	Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T3:	 ?
+C UltraSPARC T3:	18
 C UltraSPARC T4:	 3.5
 
 C INPUT PARAMETERS
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/lshift.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/ultrasparct3/lshift.asm	Mon Mar 25 21:39:18 2013 -0700
@@ -0,0 +1,89 @@
+dnl  SPARC v9 mpn_lshift for T3/T4.
+
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C UltraSPARC T3:	 10
+C UltraSPARC T4:	 3.8
+
+C INPUT PARAMETERS
+define(`rp',     `%o0')
+define(`up',     `%o1')
+define(`n',      `%o2')
+define(`cnt',    `%o3')
+
+define(`tcnt',   `%o4')
+define(`retval', `%o5')
+
+ASM_START()
+	REGISTER(%g2,#scratch)
+	REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+	sllx	n, 3, %g1
+	add	up, %g1, up
+	sub	%g0, cnt, tcnt
+	add	rp, %g1, rp
+	ldx	[up - 8], %g2
+	sub	up, 8, up
+	sub	n, 2, n
+	srlx	%g2, tcnt, retval
+	brlz,pn	n, L(done)
+	 sllx	%g2, cnt, %g1
+	brz,pn	n, L(final_one)
+	 nop
+L(top):
+	ldx	[up - 8], %g2
+
+	ldx	[up - 16], %g5
+	sub	n, 2, n
+
+	sub	up, 16, up
+	sub	rp, 16, rp
+
+	srlx	%g2, tcnt, %g3
+
+	srlx	%g5, tcnt, %g4
+	or	%g1, %g3, %g1
+
+	stx	%g1, [rp + 8]
+	sllx	%g2, cnt, %g1
+
+	or	%g1, %g4, %g1
+	stx	%g1, [rp + 0]
+
+	brgz	n, L(top)
+	 sllx	%g5, cnt, %g1
+
+	brlz,pt	n, L(done)
+	 nop
+L(final_one):
+	ldx	[up - 8], %g2
+	srlx	%g2, tcnt, %g3
+	or	%g1, %g3, %g1
+	stx	%g1, [rp - 8]
+	sllx	%g2, cnt, %g1
+	sub	rp, 8, rp
+L(done):
+	stx	%g1, [rp - 8]
+	retl
+	 mov	retval, %o0
+EPILOGUE()
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/lshiftc.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/ultrasparct3/lshiftc.asm	Mon Mar 25 21:39:18 2013 -0700
@@ -0,0 +1,94 @@
+dnl  SPARC v9 mpn_lshiftc for T3/T4.
+
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C UltraSPARC T3:	10.5
+C UltraSPARC T4:	 4.5
+
+C INPUT PARAMETERS
+define(`rp',     `%o0')
+define(`up',     `%o1')
+define(`n',      `%o2')
+define(`cnt',    `%o3')
+
+define(`tcnt',   `%o4')
+define(`retval', `%o5')
+
+ASM_START()
+	REGISTER(%g2,#scratch)
+	REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+	sllx	n, 3, %g1
+	add	up, %g1, up
+	sub	%g0, cnt, tcnt
+	add	rp, %g1, rp
+	ldx	[up - 8], %g2
+	sub	up, 8, up
+	sub	n, 2, n
+	srlx	%g2, tcnt, retval
+	brlz,pn	n, L(done)
+	 sllx	%g2, cnt, %g1
+	brz,pn	n, L(final_one)
+	 nop
+L(top):
+	ldx	[up - 8], %g2
+	xnor	%g1, %g0, %g1
+
+	ldx	[up - 16], %g5
+	sub	n, 2, n
+
+	sub	up, 16, up
+	sub	rp, 16, rp
+
+	srlx	%g2, tcnt, %g3
+
+	srlx	%g5, tcnt, %g4
+	andn	%g1, %g3, %g1
+
+	stx	%g1, [rp + 8]
+	sllx	%g2, cnt, %g1
+
+	xnor	%g1, %g0, %g1
+
+	andn	%g1, %g4, %g1
+	stx	%g1, [rp + 0]
+
+	brgz	n, L(top)
+	 sllx	%g5, cnt, %g1
+
+	brlz,pt	n, L(done)
+	 nop
+L(final_one):
+	ldx	[up - 8], %g2
+	xnor	%g1, %g0, %g1
+	srlx	%g2, tcnt, %g3
+	andn	%g1, %g3, %g1
+	stx	%g1, [rp - 8]
+	sllx	%g2, cnt, %g1
+	sub	rp, 8, rp
+L(done):
+	xnor	%g1, %g0, %g1
+	stx	%g1, [rp - 8]
+	retl
+	 mov	retval, %o0
+EPILOGUE()
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/popcount.asm
--- a/mpn/sparc64/ultrasparct3/popcount.asm	Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/popcount.asm	Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T3:	 ?
+C UltraSPARC T3:	15
 C UltraSPARC T4:	 2.5
 
 C INPUT PARAMETERS
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/rshift.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/ultrasparct3/rshift.asm	Mon Mar 25 21:39:18 2013 -0700
@@ -0,0 +1,86 @@
+dnl  SPARC v9 mpn_rshift for T3/T4.
+
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C UltraSPARC T3:	10
+C UltraSPARC T4:	 3.8
+
+C INPUT PARAMETERS
+define(`rp',     `%o0')
+define(`up',     `%o1')
+define(`n',      `%o2')
+define(`cnt',    `%o3')
+
+define(`tcnt',   `%o4')
+define(`retval', `%o5')
+
+ASM_START()
+	REGISTER(%g2,#scratch)
+	REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+	sub	%g0, cnt, tcnt
+	ldx	[up + 0], %g2
+	add	up, 8, up
+	sub	n, 2, n
+	sllx	%g2, tcnt, retval
+	brlz,pn	n, L(done)
+	 srlx	%g2, cnt, %g1
+	brz,pn	n, L(final_one)
+	 nop
+L(top):
+	ldx	[up + 0], %g2
+
+	ldx	[up + 8], %g5
+	sub	n, 2, n
+
+	add	up, 16, up
+	add	rp, 16, rp
+
+	sllx	%g2, tcnt, %g3
+
+	sllx	%g5, tcnt, %g4
+	or	%g1, %g3, %g1
+
+	stx	%g1, [rp - 16]
+	srlx	%g2, cnt, %g1
+
+	or	%g1, %g4, %g1
+	stx	%g1, [rp - 8]
+
+	brgz	n, L(top)
+	 srlx	%g5, cnt, %g1
+
+	brlz,pt	n, L(done)
+	 nop
+L(final_one):
+	ldx	[up + 0], %g2
+	sllx	%g2, tcnt, %g3
+	or	%g1, %g3, %g1
+	stx	%g1, [rp + 0]
+	srlx	%g2, cnt, %g1
+	add	rp, 8, rp
+L(done):
+	stx	%g1, [rp + 0]
+	retl
+	 mov	retval, %o0
+EPILOGUE()
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/submul_1.asm
--- a/mpn/sparc64/ultrasparct3/submul_1.asm	Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/submul_1.asm	Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T3:	 ?
+C UltraSPARC T3:	29
 C UltraSPARC T4:	 5.8
 
 C INPUT PARAMETERS


More information about the gmp-devel mailing list