[Gmp-commit] /var/hg/gmp: Rewrite sparc32/ultrasparct1 *mul_1 code.
mercurial at gmplib.org
mercurial at gmplib.org
Tue Mar 5 13:47:08 CET 2013
details: /var/hg/gmp/rev/dff8f0d02b13
changeset: 15527:dff8f0d02b13
user: davem at davemloft.net
date: Tue Mar 05 13:47:00 2013 +0100
description:
Rewrite sparc32/ultrasparct1 *mul_1 code.
diffstat:
ChangeLog | 7 +++
mpn/sparc32/ultrasparct1/addmul_1.asm | 71 +++++++++++++++++++++++----------
mpn/sparc32/ultrasparct1/mul_1.asm | 46 ++++++++++++++++-----
mpn/sparc32/ultrasparct1/submul_1.asm | 72 ++++++++++++++++++++++++----------
4 files changed, 140 insertions(+), 56 deletions(-)
diffs (275 lines):
diff -r eeae8c24e11e -r dff8f0d02b13 ChangeLog
--- a/ChangeLog Mon Mar 04 23:20:00 2013 +0100
+++ b/ChangeLog Tue Mar 05 13:47:00 2013 +0100
@@ -1,3 +1,10 @@
+2013-03-05 David Miller <davem at davemloft.net>
+
+ * mpn/sparc32/ultrasparct1/mul_1.asm: Unroll main loop one time, add
+ T2/T3/T4 timings.
+ * mpn/sparc32/ultrasparct1/addmul_1.asm: Likewise.
+ * mpn/sparc32/ultrasparct1/submul_1.asm: Likewise.
+
2013-03-04 Torbjorn Granlund <tege at gmplib.org>
* mpn/arm/neon/lorrshift.asm: New file.
diff -r eeae8c24e11e -r dff8f0d02b13 mpn/sparc32/ultrasparct1/addmul_1.asm
--- a/mpn/sparc32/ultrasparct1/addmul_1.asm Mon Mar 04 23:20:00 2013 +0100
+++ b/mpn/sparc32/ultrasparct1/addmul_1.asm Tue Mar 05 13:47:00 2013 +0100
@@ -1,6 +1,8 @@
dnl SPARC T1 32-bit mpn_addmul_1.
-dnl Copyright 2010 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2010, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -20,33 +22,58 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T1: 27
+C UltraSPARC T1: 24
+C UltraSPARC T2: 19
+C UltraSPARC T3: 19
+C UltraSPARC T4: 5
C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`n', `%o2')
-define(`v0', `%o3')
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
ASM_START()
PROLOGUE(mpn_addmul_1)
- mov 0, %g4
- srl v0, 0, v0
- srl n, 0, n
- dec n C n--
+ save %sp, -96, %sp
+ srl n, 0, %o4
+ srl v0, 0, %g1
+ subcc %o4, 1, %o4
+ be L(final_one)
+ clr %o5
-L(top): lduw [up+0], %g1
- add up, 4, up C up++
- mulx %g1, v0, %g3
- lduw [rp+0], %g2
- add %g2, %g3, %g3
- add %g4, %g3, %g3
+L(top): lduw [up+0], %l0
+ lduw [rp+0], %l2
+ lduw [up+4], %l1
+ lduw [rp+4], %l3
+ mulx %l0, %g1, %g3
+ add up, 8, up
+ mulx %l1, %g1, %o3
+ sub %o4, 2, %o4
+ add rp, 8, rp
+ add %l2, %g3, %g3
+ add %o5, %g3, %g3
+ stw %g3, [rp-8]
+ srlx %g3, 32, %o5
+ add %l3, %o3, %o3
+ add %o5, %o3, %o3
+ stw %o3, [rp-4]
+ brgz %o4, L(top)
+ srlx %o3, 32, %o5
+
+ brlz,pt %o4, L(done)
+ nop
+
+L(final_one):
+ lduw [up+0], %l0
+ lduw [rp+0], %l2
+ mulx %l0, %g1, %g3
+ add %l2, %g3, %g3
+ add %o5, %g3, %g3
stw %g3, [rp+0]
- add rp, 4, rp C rp++
- srlx %g3, 32, %g4
- brnz n, L(top)
- dec n C n--
+ srlx %g3, 32, %o5
- retl
- mov %g4, %o0 C return value
+L(done):
+ ret
+ restore %o5, 0, %o0
EPILOGUE()
diff -r eeae8c24e11e -r dff8f0d02b13 mpn/sparc32/ultrasparct1/mul_1.asm
--- a/mpn/sparc32/ultrasparct1/mul_1.asm Mon Mar 04 23:20:00 2013 +0100
+++ b/mpn/sparc32/ultrasparct1/mul_1.asm Tue Mar 05 13:47:00 2013 +0100
@@ -1,6 +1,8 @@
dnl SPARC T1 32-bit mpn_mul_1.
-dnl Copyright 2010 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2010, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -20,7 +22,10 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T1: 23
+C UltraSPARC T1: 20
+C UltraSPARC T2: 18
+C UltraSPARC T3: 18
+C UltraSPARC T4: 4
C INPUT PARAMETERS
define(`rp', `%o0')
@@ -30,21 +35,38 @@
ASM_START()
PROLOGUE(mpn_mul_1)
- mov 0, %g4
+ srl n, 0, n
srl v0, 0, v0
- srl n, 0, n
- dec n C n--
+ subcc n, 1, n
+ be L(final_one)
+ clr %o5
L(top): lduw [up+0], %g1
- add up, 4, up C up++
+ lduw [up+4], %g2
mulx %g1, v0, %g3
- add %g4, %g3, %g3
+ add up, 8, up
+ mulx %g2, v0, %o4
+ sub n, 2, n
+ add rp, 8, rp
+ add %o5, %g3, %g3
+ stw %g3, [rp-8]
+ srlx %g3, 32, %o5
+ add %o5, %o4, %o4
+ stw %o4, [rp-4]
+ brgz n, L(top)
+ srlx %o4, 32, %o5
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ lduw [up+0], %g1
+ mulx %g1, v0, %g3
+ add %o5, %g3, %g3
stw %g3, [rp+0]
- add rp, 4, rp C rp++
- srlx %g3, 32, %g4
- brnz n, L(top)
- dec n C n--
+ srlx %g3, 32, %o5
+L(done):
retl
- mov %g4, %o0 C return value
+ mov %o5, %o0
EPILOGUE()
diff -r eeae8c24e11e -r dff8f0d02b13 mpn/sparc32/ultrasparct1/submul_1.asm
--- a/mpn/sparc32/ultrasparct1/submul_1.asm Mon Mar 04 23:20:00 2013 +0100
+++ b/mpn/sparc32/ultrasparct1/submul_1.asm Tue Mar 05 13:47:00 2013 +0100
@@ -1,6 +1,8 @@
dnl SPARC T1 32-bit mpn_submul_1.
-dnl Copyright 2010 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2010, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -20,33 +22,59 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T1: 27
+C UltraSPARC T1: 24
+C UltraSPARC T2: 19
+C UltraSPARC T3: 19
+C UltraSPARC T4: 5
C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`n', `%o2')
-define(`v0', `%o3')
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
ASM_START()
PROLOGUE(mpn_submul_1)
- subcc %g0, %g0, %g4 C clear CF and g4
- srl v0, 0, v0
- srl n, 0, n
- dec n C n--
+ save %sp, -96, %sp
+ srl n, 0, %o4
+ srl v0, 0, %g1
+ subcc %o4, 1, %o4
+ be L(final_one)
+ subcc %g0, 0, %o5
-L(top): lduw [up+0], %g1
- add up, 4, up C up++
- mulx %g1, v0, %g3
- lduw [rp+0], %g2
- addx %g4, %g3, %g3
- srlx %g3, 32, %g4
- subcc %g2, %g3, %g3
+L(top): lduw [up+0], %l0
+ lduw [rp+0], %l2
+ lduw [up+4], %l1
+ lduw [rp+4], %l3
+ mulx %l0, %g1, %g3
+ add up, 8, up
+ mulx %l1, %g1, %o3
+ sub %o4, 2, %o4
+ add rp, 8, rp
+ addx %o5, %g3, %g3
+ srlx %g3, 32, %o5
+ subcc %l2, %g3, %g3
+ stw %g3, [rp-8]
+ addx %o5, %o3, %o3
+ srlx %o3, 32, %o5
+ subcc %l3, %o3, %o3
+ brgz %o4, L(top)
+ stw %o3, [rp-4]
+
+ brlz,pt %o4, L(done)
+ nop
+
+L(final_one):
+ lduw [up+0], %l0
+ lduw [rp+0], %l2
+ mulx %l0, %g1, %g3
+ addx %o5, %g3, %g3
+ srlx %g3, 32, %o5
+ subcc %l2, %g3, %g3
stw %g3, [rp+0]
- add rp, 4, rp C rp++
- brnz n, L(top)
- dec n C n--
- retl
- addx %g4, 0, %o0 C return value
+L(done):
+ addx %o5, 0, %o5
+ ret
+ restore %o5, 0, %o0
EPILOGUE()
More information about the gmp-commit
mailing list