[Gmp-commit] /var/hg/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Apr 30 00:17:05 CEST 2013


details:   /var/hg/gmp/rev/1c177a3e78aa
changeset: 15769:1c177a3e78aa
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 29 23:56:37 2013 +0200
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/9452afe9d2b2
changeset: 15770:9452afe9d2b2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 29 23:57:14 2013 +0200
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/00dea7960959
changeset: 15771:00dea7960959
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 29 23:57:38 2013 +0200
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/fb637ceca47e
changeset: 15772:fb637ceca47e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Apr 30 00:15:07 2013 +0200
description:
Rewrite SPARC T3 addmul_1.

details:   /var/hg/gmp/rev/db90f82f171c
changeset: 15773:db90f82f171c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Apr 30 00:15:23 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                               |    2 +
 mpn/arm/v7a/cora15/com.asm              |    2 +-
 mpn/arm/v7a/cora15/logops_n.asm         |    2 +-
 mpn/sparc64/ultrasparct3/addmul_1.asm   |  168 ++++++++++++++++++++++++-------
 mpn/sparc64/ultrasparct3/cnd_aors_n.asm |   30 ++--
 5 files changed, 148 insertions(+), 56 deletions(-)

diffs (284 lines):

diff -r 4fc35535b362 -r db90f82f171c ChangeLog
--- a/ChangeLog	Mon Apr 29 23:45:12 2013 +0200
+++ b/ChangeLog	Tue Apr 30 00:15:23 2013 +0200
@@ -1,5 +1,7 @@
 2013-04-29  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/sparc64/ultrasparct3/addmul_1.asm: Rewrite.
+
 	* mpn/sparc64/ultrasparct3/submul_1.asm: Rewrite.
 
 	* mpn/sparc64/ultrasparct3/cnd_aors_n.asm: New file.
diff -r 4fc35535b362 -r db90f82f171c mpn/arm/v7a/cora15/com.asm
--- a/mpn/arm/v7a/cora15/com.asm	Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/arm/v7a/cora15/com.asm	Tue Apr 30 00:15:23 2013 +0200
@@ -41,7 +41,7 @@
 C v7a	-
 
 define(`FEEDIN_VARIANT', 1)	C alternatives: 0 1 2
-define(`UNROLL', 4x2)	 	C alternatives: 4 4x2
+define(`UNROLL', 4x2)		C alternatives: 4 4x2
 
 define(`rp', `r0')
 define(`up', `r1')
diff -r 4fc35535b362 -r db90f82f171c mpn/arm/v7a/cora15/logops_n.asm
--- a/mpn/arm/v7a/cora15/logops_n.asm	Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/arm/v7a/cora15/logops_n.asm	Tue Apr 30 00:15:23 2013 +0200
@@ -42,7 +42,7 @@
 C v7a	-
 
 define(`FEEDIN_VARIANT', 1)	C alternatives: 0 1 2
-define(`UNROLL', 4x2)	 	C alternatives: 4 4x2
+define(`UNROLL', 4x2)		C alternatives: 4 4x2
 
 define(`rp', `r0')
 define(`up', `r1')
diff -r 4fc35535b362 -r db90f82f171c mpn/sparc64/ultrasparct3/addmul_1.asm
--- a/mpn/sparc64/ultrasparct3/addmul_1.asm	Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/addmul_1.asm	Tue Apr 30 00:15:23 2013 +0200
@@ -1,6 +1,6 @@
-dnl  SPARC v9 mpn_addmul_1 for T3/T4.
+dnl  SPARC v9 mpn_addmul_1 for T3/T4/T5.
 
-dnl  Contributed to the GNU project by David Miller.
+dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
 
 dnl  Copyright 2013 Free Software Foundation, Inc.
 
@@ -22,8 +22,8 @@
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC T3:	28
-C UltraSPARC T4:	 5.5
+C UltraSPARC T3:	?
+C UltraSPARC T4:	4.25  hopefully
 
 C INPUT PARAMETERS
 define(`rp', `%i0')
@@ -31,51 +31,141 @@
 define(`n',  `%i2')
 define(`v0', `%i3')
 
+define(`u0',  `%l0')
+define(`u1',  `%l1')
+define(`u2',  `%l2')
+define(`u3',  `%l3')
+define(`r0',  `%l4')
+define(`r1',  `%l5')
+define(`r2',  `%l6')
+define(`r3',  `%l7')
+
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
 PROLOGUE(mpn_addmul_1)
 	save	%sp, -176, %sp
-	subcc	n, 1, n
-	be	L(final_one)
-	 subcc	%g0, %g0, %o5
+	ldx	[up+0], %g1
 
-L(top):
-	ldx	[up+0], %l0
-	ldx	[up+8], %l1
-	ldx	[rp+0], %l2
-	ldx	[rp+8], %l3
-	mulx	%l0, v0, %o0
-	add	up, 16, up
-	umulxhi(%l0, v0, %o1)
-	add	rp, 16, rp
-	mulx	%l1, v0, %o2
-	sub	n, 2, n
-	umulxhi(%l1, v0, %o3)
-	addxccc(%o5, %o0, %o0)
-	addxccc(%o1, %o2, %o2)
-	addxc(	%g0, %o3, %o5)
-	addcc	%l2, %o0, %o0
-	stx	%o0, [rp-16]
-	addxccc(%l3, %o2, %o2)
-	brgz	n, L(top)
-	 stx	%o2, [rp-8]
+	and	n, 3, %g3
+	brz	%g3, L(b0)
+	 addcc	%g0, %g0, %g5			C clear carry limb, flag
+	cmp	%g3, 2
+	bcs	%xcc, L(b01)
+	 nop
+	be	%xcc, L(b10)
+	 ldx	[up+8], %g5
 
-	brlz,pt	n, L(done)
+L(b11):	ldx	[up+16], u3
+	mulx	%g1, v0, %o2
+	umulxhi(%g1, v0, %o3)
+	ldx	[rp+0], r1
+	mulx	%g5, v0, %o4
+	ldx	[rp+8], r2
+	umulxhi(%g5, v0, %o5)
+	ldx	[rp+16], r3
+	mulx	u3, v0, %g4
+	umulxhi(u3, v0, %g5)
+	addcc	%o3, %o4, %o4
+	addxccc(%o5, %g4, %g4)
+	addxc(	%g0, %g5, %g5)
+	addcc	r1, %o2, r1
+	stx	r1, [rp+0]
+	addxccc(r2, %o4, r2)
+	stx	r2, [rp+8]
+	addxccc(r3, %g4, r3)
+	stx	r3, [rp+16]
+	add	n, -3, n
+	add	up, 24, up
+	brz	n, L(xit)
+	 add	rp, 24, rp
+	b	L(com)
 	 nop
 
-L(final_one):
-	ldx	[up+0], %l0
-	ldx	[rp+0], %l2
-	mulx	%l0, v0, %o0
-	umulxhi(%l0, v0, %o1)
-	addxccc(%o5, %o0, %o0)
-	addxc(	%g0, %o1, %o5)
-	addcc	%l2, %o0, %o0
-	stx	%o0, [rp+0]
+L(b10):	mulx	%g1, v0, %o4
+	ldx	[rp+0], r2
+	umulxhi(%g1, v0, %o5)
+	ldx	[rp+8], r3
+	mulx	%g5, v0, %g4
+	umulxhi(%g5, v0, %g5)
+	addcc	%o5, %g4, %g4
+	addxc(	%g0, %g5, %g5)
+	addcc	r2, %o4, r2
+	stx	r2, [rp+0]
+	addxccc(r3, %g4, r3)
+	stx	r3, [rp+8]
+	add	n, -2, n
+	add	up, 16, up
+	brz	n, L(xit)
+	 add	rp, 16, rp
+	b	L(com)
+	 nop
 
-L(done):
-	addxc(	%g0, %o5, %i0)
+L(b01):	ldx	[rp+0], r3
+	mulx	%g1, v0, %g4
+	umulxhi(%g1, v0, %g5)
+	addcc	r3, %g4, r3
+	stx	r3, [rp+0]
+	add	n, -1, n
+	add	up, 8, up
+	brz	n, L(xit)
+	 add	rp, 8, rp
+
+L(com):	ldx	[up+0], %g1
+L(b0):	ldx	[up+8], u1
+	ldx	[up+16], u2
+	ldx	[up+24], u3
+	mulx	%g1, v0, %o0
+	umulxhi(%g1, v0, %o1)
+	b	L(lo0)
+	 nop
+
+	ALIGN(16)
+L(top):	ldx	[up+0], u0
+	addxc(	%g0, %g5, %g5)		C propagate carry into carry limb
+	ldx	[up+8], u1
+	addcc	r0, %o0, r0
+	ldx	[up+16], u2
+	addxccc(r1, %o2, r1)
+	ldx	[up+24], u3
+	addxccc(r2, %o4, r2)
+	stx	r0, [rp-32]
+	addxccc(r3, %g4, r3)
+	stx	r1, [rp-24]
+	mulx	u0, v0, %o0
+	stx	r2, [rp-16]
+	umulxhi(u0, v0, %o1)
+	stx	r3, [rp-8]
+L(lo0):	mulx	u1, v0, %o2
+	ldx	[rp+0], r0
+	umulxhi(u1, v0, %o3)
+	ldx	[rp+8], r1
+	mulx	u2, v0, %o4
+	ldx	[rp+16], r2
+	umulxhi(u2, v0, %o5)
+	ldx	[rp+24], r3
+	mulx	u3, v0, %g4
+	addxccc(%g5, %o0, %o0)
+	umulxhi(u3, v0, %g5)
+	add	up, 32, up
+	addxccc(%o1, %o2, %o2)
+	add	rp, 32, rp
+	addxccc(%o3, %o4, %o4)
+	add	n, -4, n
+	addxccc(%o5, %g4, %g4)
+	brgz	n, L(top)
+	 nop
+
+	addxc(	%g0, %g5, %g5)
+	addcc	r0, %o0, r0
+	stx	r0, [rp-32]
+	addxccc(r1, %o2, r1)
+	stx	r1, [rp-24]
+	addxccc(r2, %o4, r2)
+	stx	r2, [rp-16]
+	addxccc(r3, %g4, r3)
+	stx	r3, [rp-8]
+L(xit):	addxc(	%g0, %g5, %i0)
 	ret
 	 restore
 EPILOGUE()
diff -r 4fc35535b362 -r db90f82f171c mpn/sparc64/ultrasparct3/cnd_aors_n.asm
--- a/mpn/sparc64/ultrasparct3/cnd_aors_n.asm	Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/cnd_aors_n.asm	Tue Apr 30 00:15:23 2013 +0200
@@ -25,8 +25,8 @@
 C UltraSPARC T3:	 ?
 C UltraSPARC T4:	 3 hopefully
 
-C We use a double-pointer trick to allow indexed addressing.  Its setup cost
-C might be prohibitive in these functions, since we don't expect huge n
+C We use a double-pointer trick to allow indexed addressing.  Its setup
+C cost might be a problem in these functions, since we don't expect huge n
 C arguments.
 C
 C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can
@@ -49,21 +49,21 @@
 define(`w0',  `%g1')  define(`w1',  `%g3')
 
 ifdef(`OPERATION_cnd_add_n',`
-      define(`LOGOP',   `and	$1, $2, $3')
-      define(`MAKEMASK',`cmp	%g0, $1
-      			subc	%g0, %g0, $2')
-      define(`INITCY',  `addcc	%g0, 0, %g0')
-      define(`RETVAL',  `addxc(	%g0, %g0, %i0)')
-      define(`func',    `mpn_cnd_add_n')
+  define(`LOGOP',   `and	$1, $2, $3')
+  define(`MAKEMASK',`cmp	%g0, $1
+		     subc	%g0, %g0, $2')
+  define(`INITCY',  `addcc	%g0, 0, %g0')
+  define(`RETVAL',  `addxc(	%g0, %g0, %i0)')
+  define(`func',    `mpn_cnd_add_n')
 ')
 ifdef(`OPERATION_cnd_sub_n',`
-      define(`LOGOP',   `orn	$2, $1, $3')
-      define(`MAKEMASK',`cmp	$1, 1
-      			subc	%g0, %g0, $2')
-      define(`INITCY',  `subcc	%g0, 1, %g0')
-      define(`RETVAL',  `addxc(	%g0, %g0, %i0)
-      			xor	%i0, 1, %i0')
-      define(`func',    `mpn_cnd_sub_n')
+  define(`LOGOP',   `orn	$2, $1, $3')
+  define(`MAKEMASK',`cmp	$1, 1
+		     subc	%g0, %g0, $2')
+  define(`INITCY',  `subcc	%g0, 1, %g0')
+  define(`RETVAL',  `addxc(	%g0, %g0, %i0)
+		     xor	%i0, 1, %i0')
+  define(`func',    `mpn_cnd_sub_n')
 ')
 
 MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)


More information about the gmp-commit mailing list