[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Apr 4 23:43:37 CEST 2013
details: /var/hg/gmp/rev/865f325f9188
changeset: 15682:865f325f9188
user: David Miller
date: Thu Apr 04 18:40:20 2013 +0200
description:
Align table.
details: /var/hg/gmp/rev/8ae5a88f8f3a
changeset: 15683:8ae5a88f8f3a
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Apr 04 18:45:04 2013 +0200
description:
Add cycle numbers for T3 and T4.
details: /var/hg/gmp/rev/a1d799022b53
changeset: 15684:a1d799022b53
user: David Miller
date: Thu Apr 04 23:42:21 2013 +0200
description:
Rewrite.
details: /var/hg/gmp/rev/f1776e0d115d
changeset: 15685:f1776e0d115d
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Apr 04 23:42:35 2013 +0200
description:
Remove obsolete comment.
details: /var/hg/gmp/rev/4904502d6332
changeset: 15686:4904502d6332
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Apr 04 23:43:28 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 7 ++
mpn/sparc64/copyd.asm | 7 +-
mpn/sparc64/copyi.asm | 7 +-
mpn/sparc64/gcd_1.asm | 4 +-
mpn/sparc64/ultrasparct3/add_n.asm | 87 ++++++++++++++++++------
mpn/sparc64/ultrasparct3/aormul_2.asm | 2 +-
mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm | 4 +-
mpn/sparc64/ultrasparct3/dive_1.asm | 2 +-
mpn/sparc64/ultrasparct3/invert_limb.asm | 1 +
mpn/sparc64/ultrasparct3/mod_1_4.asm | 4 +-
mpn/sparc64/ultrasparct3/mode1o.asm | 4 +-
mpn/sparc64/ultrasparct3/mul_1.asm | 14 +---
mpn/sparc64/ultrasparct3/sub_n.asm | 108 +++++++++++++++++++++++-------
13 files changed, 175 insertions(+), 76 deletions(-)
diffs (truncated from 425 to 300 lines):
diff -r 01170daebe7d -r 4904502d6332 ChangeLog
--- a/ChangeLog Thu Apr 04 03:20:03 2013 +0200
+++ b/ChangeLog Thu Apr 04 23:43:28 2013 +0200
@@ -1,3 +1,10 @@
+2013-04-04 David S. Miller <davem at davemloft.net>
+
+ * mpn/sparc64/ultrasparct3/add_n.asm: Rewrite.
+ * mpn/sparc64/ultrasparct3/sub_n.asm: Rewrite.
+
+ * mpn/sparc64/ultrasparct3/invert_limb.asm: Align table.
+
2013-04-04 Torbjorn Granlund <tege at gmplib.org>
* mpn/sparc32/sparc-defs.m4: Provide dummy lzcnt.
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/copyd.asm
--- a/mpn/sparc64/copyd.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/copyd.asm Thu Apr 04 23:43:28 2013 +0200
@@ -20,8 +20,11 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC 1&2: 2
-C UltraSPARC 3: 2.5
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5
+C UltraSPARC T1: 17
+C UltraSPARC T3: 6
+C UltraSPARC T4/T5: 2
C INPUT PARAMETERS
C rptr %o0
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/copyi.asm
--- a/mpn/sparc64/copyi.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/copyi.asm Thu Apr 04 23:43:28 2013 +0200
@@ -20,8 +20,11 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC 1&2: 2
-C UltraSPARC 3: 2.5
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5
+C UltraSPARC T1: 17
+C UltraSPARC T3: 6
+C UltraSPARC T4/T5: 2
C INPUT PARAMETERS
C rptr %o0
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/gcd_1.asm
--- a/mpn/sparc64/gcd_1.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/gcd_1.asm Thu Apr 04 23:43:28 2013 +0200
@@ -28,8 +28,8 @@
C UltraSPARC 1&2: 5.1
C UltraSPARC 3: 5.0
C UltraSPARC T1: 11.4
-C UltraSPARC T3: ?
-C UltraSPARC T4: ?
+C UltraSPARC T3: 10
+C UltraSPARC T4: 6
C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/add_n.asm
--- a/mpn/sparc64/ultrasparct3/add_n.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/add_n.asm Thu Apr 04 23:43:28 2013 +0200
@@ -22,46 +22,86 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: 9
-C UltraSPARC T4: 3.5
+C UltraSPARC T3: 8
+C UltraSPARC T4: 3
C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
-define(`n', `%o3')
-define(`cy', `%o4')
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`vp', `%i2')
+define(`n', `%i3')
+define(`cy', `%i4')
+
+define(`u0_off', `%l2')
+define(`u1_off', `%l3')
+define(`loop_n', `%l6')
+define(`tmp', `%l7')
ASM_START()
REGISTER(%g2,#scratch)
REGISTER(%g3,#scratch)
PROLOGUE(mpn_add_nc)
+ save %sp, -176, %sp
b,a L(ent)
EPILOGUE()
PROLOGUE(mpn_add_n)
+ save %sp, -176, %sp
+
mov 0, cy
L(ent):
subcc n, 1, n
be L(final_one)
cmp %g0, cy
+
+ ldx [up + 0], %o4
+ sllx n, 3, tmp
+
+ ldx [vp + 0], %o5
+ add up, tmp, u0_off
+
+ ldx [up + 8], %g5
+ neg tmp, loop_n
+
+ ldx [vp + 8], %g1
+ add u0_off, 8, u1_off
+
+ sub loop_n, -(2 * 8), loop_n
+
+ brgez,pn loop_n, L(loop_tail)
+ add vp, (2 * 8), vp
+
+ b,a L(top)
+ ALIGN(16)
L(top):
- ldx [up+0], %o4
- add up, 16, up
- ldx [vp+0], %o5
- add vp, 16, vp
- ldx [up-8], %g1
- add rp, 16, rp
- ldx [vp-8], %g2
- sub n, 2, n
+ addxccc(%o4, %o5, tmp)
+ ldx [vp + 0], %o5
+
+ add rp, (2 * 8), rp
+ ldx [loop_n + u0_off], %o4
+
+ add vp, (2 * 8), vp
+ stx tmp, [rp - 16]
+
+ addxccc(%g1, %g5, tmp)
+ ldx [vp - 8], %g1
+
+ ldx [loop_n + u1_off], %g5
+ sub loop_n, -(2 * 8), loop_n
+
+ brlz loop_n, L(top)
+ stx tmp, [rp - 8]
+
+L(loop_tail):
addxccc(%o4, %o5, %g3)
- stx %g3, [rp-16]
- addxccc(%g1, %g2, %g2)
- brgz n, L(top)
- stx %g2, [rp-8]
+ add loop_n, u0_off, up
- brlz,pt n, L(done)
- nop
+ addxccc(%g1, %g5, %g5)
+ stx %g3, [rp + 0]
+ brgz,pt loop_n, L(done)
+ stx %g5, [rp + 8]
+
+ add rp, (2 * 8), rp
L(final_one):
ldx [up+0], %o4
ldx [vp+0], %o5
@@ -69,6 +109,7 @@
stx %g3, [rp+0]
L(done):
- retl
- addxc( %g0, %g0, %o0)
+ addxc(%g0, %g0, %i0)
+ ret
+ restore
EPILOGUE()
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/aormul_2.asm
--- a/mpn/sparc64/ultrasparct3/aormul_2.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/aormul_2.asm Thu Apr 04 23:43:28 2013 +0200
@@ -22,7 +22,7 @@
C cycles/limb cycles/limb
C mul_2 addmul_2
-C UltraSPARC T3: ? ?
+C UltraSPARC T3: 22.5 23.5
C UltraSPARC T4: 3.25 3.75
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm
--- a/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm Thu Apr 04 23:43:28 2013 +0200
@@ -22,8 +22,8 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: ?
-C UltraSPARC T4/T5: 4 hopefully
+C UltraSPARC T3: 25
+C UltraSPARC T4/T5: 4
C INPUT PARAMETERS
define(`qp', `%i0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/dive_1.asm
--- a/mpn/sparc64/ultrasparct3/dive_1.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/dive_1.asm Thu Apr 04 23:43:28 2013 +0200
@@ -23,7 +23,7 @@
C cycles/limb
C UltraSPARC T3: ?
-C UltraSPARC T4/T5: ?
+C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops
C INPUT PARAMETERS
define(`qp', `%i0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/invert_limb.asm
--- a/mpn/sparc64/ultrasparct3/invert_limb.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/invert_limb.asm Thu Apr 04 23:43:28 2013 +0200
@@ -73,6 +73,7 @@
EPILOGUE()
RODATA
+ ALIGN(2)
TYPE( approx_tab, object)
SIZE( approx_tab, 512)
approx_tab:
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/mod_1_4.asm
--- a/mpn/sparc64/ultrasparct3/mod_1_4.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/mod_1_4.asm Thu Apr 04 23:43:28 2013 +0200
@@ -22,8 +22,8 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: ?
-C UltraSPARC T4/T5: ?
+C UltraSPARC T3: 30
+C UltraSPARC T4/T5: 4
C INPUT PARAMETERS
define(`ap', `%o0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/mode1o.asm
--- a/mpn/sparc64/ultrasparct3/mode1o.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/mode1o.asm Thu Apr 04 23:43:28 2013 +0200
@@ -22,8 +22,8 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: ?
-C UltraSPARC T4/T5: ?
+C UltraSPARC T3: 30
+C UltraSPARC T4/T5: 26
C INPUT PARAMETERS
define(`ap', `%o0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/mul_1.asm
--- a/mpn/sparc64/ultrasparct3/mul_1.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/mul_1.asm Thu Apr 04 23:43:28 2013 +0200
@@ -22,18 +22,8 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: < 25.5 hopefully
-C UltraSPARC T4: 3 hopefully
-
-C If this does not run at 3 c/l, here is what to try:
-C (1) Reorder the bookkeeping insn at loop end, putting the n update at cycle
-C 10 and the rp update in the delay slot. To make the loop take the right
-C # of iterations, add 4 to every delay slot n update in the feed in code,
-C and put a new 'add n, 4, n' in the L(b0) delay slot. Motive: This makes
-C n available to the loop branch from a feed-forward bus instead of the
-C register bank.
-C (1) Swap every the mulx insns in the loop with the addxccc immediately
-C preceding it.
+C UltraSPARC T3: 23
+C UltraSPARC T4: 3
C INPUT PARAMETERS
define(`rp', `%i0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/sub_n.asm
--- a/mpn/sparc64/ultrasparct3/sub_n.asm Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/sub_n.asm Thu Apr 04 23:43:28 2013 +0200
@@ -22,48 +22,101 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: 10
-C UltraSPARC T4: 4
+C UltraSPARC T3: 8
+C UltraSPARC T4: 3
C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
More information about the gmp-commit
mailing list