[PATCH] T3/T4 sparc shifts, plus more timings
David Miller
davem at davemloft.net
Tue Mar 26 05:50:30 CET 2013
These give a modest speedup compared to the T1 routines.
I also added missing T3 timings to existing code.
Also, I worked on a copyi/copyd for T3/T4 that uses cache-initializing
stores (basically, if you're going to write a full aligned 64-byte
cache line, you tell the chip by using a special ASI in the stores,
and the cpu will simply clear the entire cache line on write to the
first word of the cache line, eliminating all the memory traffic).
However, the setup for this has a bit of overhead as we have to align
the destination to 64-bytes and I'm not so sure that it's a win
overall in common usage.
2013-03-26 David S. Miller <davem at davemloft.net>
* mpn/sparc64/ultrasparct3/aormul_2.asm: Add T3 timings.
* mpn/sparc64/ultrasparct3/hamdist.asm: Likewise.
* mpn/sparc64/ultrasparct3/popcount.asm: Likewise.
* mpn/sparc64/ultrasparct3/submul_1.asm: Likewise.
* mpn/sparc64/ultrasparct3/lshift.asm: New file.
* mpn/sparc64/ultrasparct3/lshiftc.asm: New file.
* mpn/sparc64/ultrasparct3/rshift.asm: New file.
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/aormul_2.asm
--- a/mpn/sparc64/ultrasparct3/aormul_2.asm Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/aormul_2.asm Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
C cycles/limb cycles/limb
C mul_2 addmul_2
-C UltraSPARC T3: ? ?
+C UltraSPARC T3: 23 24
C UltraSPARC T4: ~3.5 ~4
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/hamdist.asm
--- a/mpn/sparc64/ultrasparct3/hamdist.asm Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/hamdist.asm Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: ?
+C UltraSPARC T3: 18
C UltraSPARC T4: 3.5
C INPUT PARAMETERS
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/lshift.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/ultrasparct3/lshift.asm Mon Mar 25 21:39:18 2013 -0700
@@ -0,0 +1,89 @@
+dnl SPARC v9 mpn_lshift for T3/T4.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 10
+C UltraSPARC T4: 3.8
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`cnt', `%o3')
+
+define(`tcnt', `%o4')
+define(`retval', `%o5')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+ sllx n, 3, %g1
+ add up, %g1, up
+ sub %g0, cnt, tcnt
+ add rp, %g1, rp
+ ldx [up - 8], %g2
+ sub up, 8, up
+ sub n, 2, n
+ srlx %g2, tcnt, retval
+ brlz,pn n, L(done)
+ sllx %g2, cnt, %g1
+ brz,pn n, L(final_one)
+ nop
+L(top):
+ ldx [up - 8], %g2
+
+ ldx [up - 16], %g5
+ sub n, 2, n
+
+ sub up, 16, up
+ sub rp, 16, rp
+
+ srlx %g2, tcnt, %g3
+
+ srlx %g5, tcnt, %g4
+ or %g1, %g3, %g1
+
+ stx %g1, [rp + 8]
+ sllx %g2, cnt, %g1
+
+ or %g1, %g4, %g1
+ stx %g1, [rp + 0]
+
+ brgz n, L(top)
+ sllx %g5, cnt, %g1
+
+ brlz,pt n, L(done)
+ nop
+L(final_one):
+ ldx [up - 8], %g2
+ srlx %g2, tcnt, %g3
+ or %g1, %g3, %g1
+ stx %g1, [rp - 8]
+ sllx %g2, cnt, %g1
+ sub rp, 8, rp
+L(done):
+ stx %g1, [rp - 8]
+ retl
+ mov retval, %o0
+EPILOGUE()
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/lshiftc.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/ultrasparct3/lshiftc.asm Mon Mar 25 21:39:18 2013 -0700
@@ -0,0 +1,94 @@
+dnl SPARC v9 mpn_lshiftc for T3/T4.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 10.5
+C UltraSPARC T4: 4.5
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`cnt', `%o3')
+
+define(`tcnt', `%o4')
+define(`retval', `%o5')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+ sllx n, 3, %g1
+ add up, %g1, up
+ sub %g0, cnt, tcnt
+ add rp, %g1, rp
+ ldx [up - 8], %g2
+ sub up, 8, up
+ sub n, 2, n
+ srlx %g2, tcnt, retval
+ brlz,pn n, L(done)
+ sllx %g2, cnt, %g1
+ brz,pn n, L(final_one)
+ nop
+L(top):
+ ldx [up - 8], %g2
+ xnor %g1, %g0, %g1
+
+ ldx [up - 16], %g5
+ sub n, 2, n
+
+ sub up, 16, up
+ sub rp, 16, rp
+
+ srlx %g2, tcnt, %g3
+
+ srlx %g5, tcnt, %g4
+ andn %g1, %g3, %g1
+
+ stx %g1, [rp + 8]
+ sllx %g2, cnt, %g1
+
+ xnor %g1, %g0, %g1
+
+ andn %g1, %g4, %g1
+ stx %g1, [rp + 0]
+
+ brgz n, L(top)
+ sllx %g5, cnt, %g1
+
+ brlz,pt n, L(done)
+ nop
+L(final_one):
+ ldx [up - 8], %g2
+ xnor %g1, %g0, %g1
+ srlx %g2, tcnt, %g3
+ andn %g1, %g3, %g1
+ stx %g1, [rp - 8]
+ sllx %g2, cnt, %g1
+ sub rp, 8, rp
+L(done):
+ xnor %g1, %g0, %g1
+ stx %g1, [rp - 8]
+ retl
+ mov retval, %o0
+EPILOGUE()
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/popcount.asm
--- a/mpn/sparc64/ultrasparct3/popcount.asm Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/popcount.asm Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: ?
+C UltraSPARC T3: 15
C UltraSPARC T4: 2.5
C INPUT PARAMETERS
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/rshift.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/ultrasparct3/rshift.asm Mon Mar 25 21:39:18 2013 -0700
@@ -0,0 +1,86 @@
+dnl SPARC v9 mpn_rshift for T3/T4.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 10
+C UltraSPARC T4: 3.8
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`cnt', `%o3')
+
+define(`tcnt', `%o4')
+define(`retval', `%o5')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+ sub %g0, cnt, tcnt
+ ldx [up + 0], %g2
+ add up, 8, up
+ sub n, 2, n
+ sllx %g2, tcnt, retval
+ brlz,pn n, L(done)
+ srlx %g2, cnt, %g1
+ brz,pn n, L(final_one)
+ nop
+L(top):
+ ldx [up + 0], %g2
+
+ ldx [up + 8], %g5
+ sub n, 2, n
+
+ add up, 16, up
+ add rp, 16, rp
+
+ sllx %g2, tcnt, %g3
+
+ sllx %g5, tcnt, %g4
+ or %g1, %g3, %g1
+
+ stx %g1, [rp - 16]
+ srlx %g2, cnt, %g1
+
+ or %g1, %g4, %g1
+ stx %g1, [rp - 8]
+
+ brgz n, L(top)
+ srlx %g5, cnt, %g1
+
+ brlz,pt n, L(done)
+ nop
+L(final_one):
+ ldx [up + 0], %g2
+ sllx %g2, tcnt, %g3
+ or %g1, %g3, %g1
+ stx %g1, [rp + 0]
+ srlx %g2, cnt, %g1
+ add rp, 8, rp
+L(done):
+ stx %g1, [rp + 0]
+ retl
+ mov retval, %o0
+EPILOGUE()
diff -r 82870a93a714 mpn/sparc64/ultrasparct3/submul_1.asm
--- a/mpn/sparc64/ultrasparct3/submul_1.asm Mon Mar 25 19:51:21 2013 +0100
+++ b/mpn/sparc64/ultrasparct3/submul_1.asm Mon Mar 25 21:39:18 2013 -0700
@@ -22,7 +22,7 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: ?
+C UltraSPARC T3: 29
C UltraSPARC T4: 5.8
C INPUT PARAMETERS
More information about the gmp-devel
mailing list