[Gmp-commit] /var/hg/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Apr 4 23:43:37 CEST 2013


details:   /var/hg/gmp/rev/865f325f9188
changeset: 15682:865f325f9188
user:      David Miller
date:      Thu Apr 04 18:40:20 2013 +0200
description:
Align table.

details:   /var/hg/gmp/rev/8ae5a88f8f3a
changeset: 15683:8ae5a88f8f3a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Apr 04 18:45:04 2013 +0200
description:
Add cycle numbers for T3 and T4.

details:   /var/hg/gmp/rev/a1d799022b53
changeset: 15684:a1d799022b53
user:      David Miller
date:      Thu Apr 04 23:42:21 2013 +0200
description:
Rewrite.

details:   /var/hg/gmp/rev/f1776e0d115d
changeset: 15685:f1776e0d115d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Apr 04 23:42:35 2013 +0200
description:
Remove obsolete comment.

details:   /var/hg/gmp/rev/4904502d6332
changeset: 15686:4904502d6332
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Apr 04 23:43:28 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                                |    7 ++
 mpn/sparc64/copyd.asm                    |    7 +-
 mpn/sparc64/copyi.asm                    |    7 +-
 mpn/sparc64/gcd_1.asm                    |    4 +-
 mpn/sparc64/ultrasparct3/add_n.asm       |   87 ++++++++++++++++++------
 mpn/sparc64/ultrasparct3/aormul_2.asm    |    2 +-
 mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm  |    4 +-
 mpn/sparc64/ultrasparct3/dive_1.asm      |    2 +-
 mpn/sparc64/ultrasparct3/invert_limb.asm |    1 +
 mpn/sparc64/ultrasparct3/mod_1_4.asm     |    4 +-
 mpn/sparc64/ultrasparct3/mode1o.asm      |    4 +-
 mpn/sparc64/ultrasparct3/mul_1.asm       |   14 +---
 mpn/sparc64/ultrasparct3/sub_n.asm       |  108 +++++++++++++++++++++++-------
 13 files changed, 175 insertions(+), 76 deletions(-)

diffs (truncated from 425 to 300 lines):

diff -r 01170daebe7d -r 4904502d6332 ChangeLog
--- a/ChangeLog	Thu Apr 04 03:20:03 2013 +0200
+++ b/ChangeLog	Thu Apr 04 23:43:28 2013 +0200
@@ -1,3 +1,10 @@
+2013-04-04  David S. Miller  <davem at davemloft.net>
+
+	* mpn/sparc64/ultrasparct3/add_n.asm: Rewrite.
+	* mpn/sparc64/ultrasparct3/sub_n.asm: Rewrite.
+
+	* mpn/sparc64/ultrasparct3/invert_limb.asm: Align table.
+
 2013-04-04  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/sparc32/sparc-defs.m4: Provide dummy lzcnt.
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/copyd.asm
--- a/mpn/sparc64/copyd.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/copyd.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -20,8 +20,11 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC 1&2:     2
-C UltraSPARC 3:	      2.5
+C UltraSPARC 1&2:	 2
+C UltraSPARC 3:		 2.5
+C UltraSPARC T1:	17
+C UltraSPARC T3:	 6
+C UltraSPARC T4/T5:	 2
 
 C INPUT PARAMETERS
 C rptr	%o0
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/copyi.asm
--- a/mpn/sparc64/copyi.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/copyi.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -20,8 +20,11 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC 1&2:     2
-C UltraSPARC 3:	      2.5
+C UltraSPARC 1&2:	 2
+C UltraSPARC 3:		 2.5
+C UltraSPARC T1:	17
+C UltraSPARC T3:	 6
+C UltraSPARC T4/T5:	 2
 
 C INPUT PARAMETERS
 C rptr	%o0
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/gcd_1.asm
--- a/mpn/sparc64/gcd_1.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/gcd_1.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -28,8 +28,8 @@
 C UltraSPARC 1&2:	 5.1
 C UltraSPARC 3:		 5.0
 C UltraSPARC T1:	11.4
-C UltraSPARC T3:	 ?
-C UltraSPARC T4:	 ?
+C UltraSPARC T3:	10
+C UltraSPARC T4:	 6
 C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
 
 C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/add_n.asm
--- a/mpn/sparc64/ultrasparct3/add_n.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/add_n.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -22,46 +22,86 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T3:	 9
-C UltraSPARC T4:	 3.5
+C UltraSPARC T3:	 8
+C UltraSPARC T4:	 3
 
 C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
-define(`n',  `%o3')
-define(`cy', `%o4')
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`vp', `%i2')
+define(`n',  `%i3')
+define(`cy', `%i4')
+
+define(`u0_off', `%l2')
+define(`u1_off', `%l3')
+define(`loop_n', `%l6')
+define(`tmp', `%l7')
 
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
 PROLOGUE(mpn_add_nc)
+	save	%sp, -176, %sp
 	b,a	L(ent)
 EPILOGUE()
 PROLOGUE(mpn_add_n)
+	save	%sp, -176, %sp
+
 	mov	0, cy
 L(ent):
 	subcc	n, 1, n
 	be	L(final_one)
 	 cmp	%g0, cy
+
+	ldx	[up + 0], %o4
+	sllx	n, 3, tmp
+
+	ldx	[vp + 0], %o5
+	add	up, tmp, u0_off
+
+	ldx	[up + 8], %g5
+	neg	tmp, loop_n
+
+	ldx	[vp + 8], %g1
+	add	u0_off, 8, u1_off
+
+	sub	loop_n, -(2 * 8), loop_n
+
+	brgez,pn loop_n, L(loop_tail)
+	 add	vp, (2 * 8), vp
+
+	b,a	L(top)
+	ALIGN(16)
 L(top):
-	ldx	[up+0], %o4
-	add	up, 16, up
-	ldx	[vp+0], %o5
-	add	vp, 16, vp
-	ldx	[up-8], %g1
-	add	rp, 16, rp
-	ldx	[vp-8], %g2
-	sub	n, 2, n
+	addxccc(%o4, %o5, tmp)
+	ldx	[vp + 0], %o5
+
+	add	rp, (2 * 8), rp
+	ldx	[loop_n + u0_off], %o4
+
+	add	vp, (2 * 8), vp
+	stx	tmp, [rp - 16]
+
+	addxccc(%g1, %g5, tmp)
+	ldx	[vp - 8], %g1
+
+	ldx	[loop_n + u1_off], %g5
+	sub	loop_n, -(2 * 8), loop_n
+
+	brlz	loop_n, L(top)
+	 stx	tmp, [rp - 8]
+
+L(loop_tail):
 	addxccc(%o4, %o5, %g3)
-	stx	%g3, [rp-16]
-	addxccc(%g1, %g2, %g2)
-	brgz	n, L(top)
-	 stx	%g2, [rp-8]
+	add	loop_n, u0_off, up
 
-	brlz,pt	n, L(done)
-	 nop
+	addxccc(%g1, %g5, %g5)
+	stx	%g3, [rp + 0]
 
+	brgz,pt	loop_n, L(done)
+	 stx	%g5, [rp + 8]
+
+	add	rp, (2 * 8), rp
 L(final_one):
 	ldx	[up+0], %o4
 	ldx	[vp+0], %o5
@@ -69,6 +109,7 @@
 	stx	%g3, [rp+0]
 
 L(done):
-	retl
-	 addxc(	%g0, %g0, %o0)
+	addxc(%g0, %g0, %i0)
+	ret
+	 restore
 EPILOGUE()
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/aormul_2.asm
--- a/mpn/sparc64/ultrasparct3/aormul_2.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/aormul_2.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -22,7 +22,7 @@
 
 C		    cycles/limb      cycles/limb
 C		       mul_2           addmul_2
-C UltraSPARC T3:	 ?		 ?
+C UltraSPARC T3:	22.5		 23.5
 C UltraSPARC T4:	 3.25		 3.75
 
 
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm
--- a/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -22,8 +22,8 @@
 include(`../config.m4')
 
 C                  cycles/limb
-C UltraSPARC T3:	 ?
-C UltraSPARC T4/T5:	4  hopefully
+C UltraSPARC T3:	25
+C UltraSPARC T4/T5:	 4
 
 C INPUT PARAMETERS
 define(`qp',  `%i0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/dive_1.asm
--- a/mpn/sparc64/ultrasparct3/dive_1.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/dive_1.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -23,7 +23,7 @@
 
 C                  cycles/limb
 C UltraSPARC T3:	 ?
-C UltraSPARC T4/T5:	 ?
+C UltraSPARC T4/T5:	20-26  hits 20 early, then sharply drops
 
 C INPUT PARAMETERS
 define(`qp',  `%i0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/invert_limb.asm
--- a/mpn/sparc64/ultrasparct3/invert_limb.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/invert_limb.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -73,6 +73,7 @@
 EPILOGUE()
 
 	RODATA
+	ALIGN(2)
 	TYPE(	approx_tab, object)
 	SIZE(	approx_tab, 512)
 approx_tab:
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/mod_1_4.asm
--- a/mpn/sparc64/ultrasparct3/mod_1_4.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/mod_1_4.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -22,8 +22,8 @@
 include(`../config.m4')
 
 C                  cycles/limb
-C UltraSPARC T3:	 ?
-C UltraSPARC T4/T5:	 ?
+C UltraSPARC T3:	30
+C UltraSPARC T4/T5:	 4
 
 C INPUT PARAMETERS
 define(`ap',  `%o0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/mode1o.asm
--- a/mpn/sparc64/ultrasparct3/mode1o.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/mode1o.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -22,8 +22,8 @@
 include(`../config.m4')
 
 C                  cycles/limb
-C UltraSPARC T3:	 ?
-C UltraSPARC T4/T5:	 ?
+C UltraSPARC T3:	30
+C UltraSPARC T4/T5:	26
 
 C INPUT PARAMETERS
 define(`ap',  `%o0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/mul_1.asm
--- a/mpn/sparc64/ultrasparct3/mul_1.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/mul_1.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -22,18 +22,8 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T3:	< 25.5  hopefully
-C UltraSPARC T4:	 3  hopefully
-
-C If this does not run at 3 c/l, here is what to try:
-C  (1) Reorder the bookkeeping insn at loop end, putting the n update at cycle
-C      10 and the rp update in the delay slot.  To make the loop take the right
-C      # of iterations, add 4 to every delay slot n update in the feed in code,
-C      and put a new 'add n, 4, n' in the L(b0) delay slot.  Motive: This makes
-C      n available to the loop branch from a feed-forward bus instead of the
-C      register bank.
-C  (1) Swap every the mulx insns in the loop with the addxccc immediately
-C      preceding it.
+C UltraSPARC T3:	23
+C UltraSPARC T4:	 3
 
 C INPUT PARAMETERS
 define(`rp', `%i0')
diff -r 01170daebe7d -r 4904502d6332 mpn/sparc64/ultrasparct3/sub_n.asm
--- a/mpn/sparc64/ultrasparct3/sub_n.asm	Thu Apr 04 03:20:03 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/sub_n.asm	Thu Apr 04 23:43:28 2013 +0200
@@ -22,48 +22,101 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T3:	10
-C UltraSPARC T4:	 4
+C UltraSPARC T3:	 8
+C UltraSPARC T4:	 3
 
 C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')


More information about the gmp-commit mailing list