[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Apr 30 00:17:05 CEST 2013
details: /var/hg/gmp/rev/1c177a3e78aa
changeset: 15769:1c177a3e78aa
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 29 23:56:37 2013 +0200
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/9452afe9d2b2
changeset: 15770:9452afe9d2b2
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 29 23:57:14 2013 +0200
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/00dea7960959
changeset: 15771:00dea7960959
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 29 23:57:38 2013 +0200
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/fb637ceca47e
changeset: 15772:fb637ceca47e
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Apr 30 00:15:07 2013 +0200
description:
Rewrite SPARC T3 addmul_1.
details: /var/hg/gmp/rev/db90f82f171c
changeset: 15773:db90f82f171c
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Apr 30 00:15:23 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 2 +
mpn/arm/v7a/cora15/com.asm | 2 +-
mpn/arm/v7a/cora15/logops_n.asm | 2 +-
mpn/sparc64/ultrasparct3/addmul_1.asm | 168 ++++++++++++++++++++++++-------
mpn/sparc64/ultrasparct3/cnd_aors_n.asm | 30 ++--
5 files changed, 148 insertions(+), 56 deletions(-)
diffs (284 lines):
diff -r 4fc35535b362 -r db90f82f171c ChangeLog
--- a/ChangeLog Mon Apr 29 23:45:12 2013 +0200
+++ b/ChangeLog Tue Apr 30 00:15:23 2013 +0200
@@ -1,5 +1,7 @@
2013-04-29 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/sparc64/ultrasparct3/addmul_1.asm: Rewrite.
+
* mpn/sparc64/ultrasparct3/submul_1.asm: Rewrite.
* mpn/sparc64/ultrasparct3/cnd_aors_n.asm: New file.
diff -r 4fc35535b362 -r db90f82f171c mpn/arm/v7a/cora15/com.asm
--- a/mpn/arm/v7a/cora15/com.asm Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/arm/v7a/cora15/com.asm Tue Apr 30 00:15:23 2013 +0200
@@ -41,7 +41,7 @@
C v7a -
define(`FEEDIN_VARIANT', 1) C alternatives: 0 1 2
-define(`UNROLL', 4x2) C alternatives: 4 4x2
+define(`UNROLL', 4x2) C alternatives: 4 4x2
define(`rp', `r0')
define(`up', `r1')
diff -r 4fc35535b362 -r db90f82f171c mpn/arm/v7a/cora15/logops_n.asm
--- a/mpn/arm/v7a/cora15/logops_n.asm Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/arm/v7a/cora15/logops_n.asm Tue Apr 30 00:15:23 2013 +0200
@@ -42,7 +42,7 @@
C v7a -
define(`FEEDIN_VARIANT', 1) C alternatives: 0 1 2
-define(`UNROLL', 4x2) C alternatives: 4 4x2
+define(`UNROLL', 4x2) C alternatives: 4 4x2
define(`rp', `r0')
define(`up', `r1')
diff -r 4fc35535b362 -r db90f82f171c mpn/sparc64/ultrasparct3/addmul_1.asm
--- a/mpn/sparc64/ultrasparct3/addmul_1.asm Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/addmul_1.asm Tue Apr 30 00:15:23 2013 +0200
@@ -1,6 +1,6 @@
-dnl SPARC v9 mpn_addmul_1 for T3/T4.
+dnl SPARC v9 mpn_addmul_1 for T3/T4/T5.
-dnl Contributed to the GNU project by David Miller.
+dnl Contributed to the GNU project by David Miller and Torbjörn Granlund.
dnl Copyright 2013 Free Software Foundation, Inc.
@@ -22,8 +22,8 @@
include(`../config.m4')
C cycles/limb
-C UltraSPARC T3: 28
-C UltraSPARC T4: 5.5
+C UltraSPARC T3: ?
+C UltraSPARC T4: 4.25 hopefully
C INPUT PARAMETERS
define(`rp', `%i0')
@@ -31,51 +31,141 @@
define(`n', `%i2')
define(`v0', `%i3')
+define(`u0', `%l0')
+define(`u1', `%l1')
+define(`u2', `%l2')
+define(`u3', `%l3')
+define(`r0', `%l4')
+define(`r1', `%l5')
+define(`r2', `%l6')
+define(`r3', `%l7')
+
ASM_START()
REGISTER(%g2,#scratch)
REGISTER(%g3,#scratch)
PROLOGUE(mpn_addmul_1)
save %sp, -176, %sp
- subcc n, 1, n
- be L(final_one)
- subcc %g0, %g0, %o5
+ ldx [up+0], %g1
-L(top):
- ldx [up+0], %l0
- ldx [up+8], %l1
- ldx [rp+0], %l2
- ldx [rp+8], %l3
- mulx %l0, v0, %o0
- add up, 16, up
- umulxhi(%l0, v0, %o1)
- add rp, 16, rp
- mulx %l1, v0, %o2
- sub n, 2, n
- umulxhi(%l1, v0, %o3)
- addxccc(%o5, %o0, %o0)
- addxccc(%o1, %o2, %o2)
- addxc( %g0, %o3, %o5)
- addcc %l2, %o0, %o0
- stx %o0, [rp-16]
- addxccc(%l3, %o2, %o2)
- brgz n, L(top)
- stx %o2, [rp-8]
+ and n, 3, %g3
+ brz %g3, L(b0)
+ addcc %g0, %g0, %g5 C clear carry limb, flag
+ cmp %g3, 2
+ bcs %xcc, L(b01)
+ nop
+ be %xcc, L(b10)
+ ldx [up+8], %g5
- brlz,pt n, L(done)
+L(b11): ldx [up+16], u3
+ mulx %g1, v0, %o2
+ umulxhi(%g1, v0, %o3)
+ ldx [rp+0], r1
+ mulx %g5, v0, %o4
+ ldx [rp+8], r2
+ umulxhi(%g5, v0, %o5)
+ ldx [rp+16], r3
+ mulx u3, v0, %g4
+ umulxhi(u3, v0, %g5)
+ addcc %o3, %o4, %o4
+ addxccc(%o5, %g4, %g4)
+ addxc( %g0, %g5, %g5)
+ addcc r1, %o2, r1
+ stx r1, [rp+0]
+ addxccc(r2, %o4, r2)
+ stx r2, [rp+8]
+ addxccc(r3, %g4, r3)
+ stx r3, [rp+16]
+ add n, -3, n
+ add up, 24, up
+ brz n, L(xit)
+ add rp, 24, rp
+ b L(com)
nop
-L(final_one):
- ldx [up+0], %l0
- ldx [rp+0], %l2
- mulx %l0, v0, %o0
- umulxhi(%l0, v0, %o1)
- addxccc(%o5, %o0, %o0)
- addxc( %g0, %o1, %o5)
- addcc %l2, %o0, %o0
- stx %o0, [rp+0]
+L(b10): mulx %g1, v0, %o4
+ ldx [rp+0], r2
+ umulxhi(%g1, v0, %o5)
+ ldx [rp+8], r3
+ mulx %g5, v0, %g4
+ umulxhi(%g5, v0, %g5)
+ addcc %o5, %g4, %g4
+ addxc( %g0, %g5, %g5)
+ addcc r2, %o4, r2
+ stx r2, [rp+0]
+ addxccc(r3, %g4, r3)
+ stx r3, [rp+8]
+ add n, -2, n
+ add up, 16, up
+ brz n, L(xit)
+ add rp, 16, rp
+ b L(com)
+ nop
-L(done):
- addxc( %g0, %o5, %i0)
+L(b01): ldx [rp+0], r3
+ mulx %g1, v0, %g4
+ umulxhi(%g1, v0, %g5)
+ addcc r3, %g4, r3
+ stx r3, [rp+0]
+ add n, -1, n
+ add up, 8, up
+ brz n, L(xit)
+ add rp, 8, rp
+
+L(com): ldx [up+0], %g1
+L(b0): ldx [up+8], u1
+ ldx [up+16], u2
+ ldx [up+24], u3
+ mulx %g1, v0, %o0
+ umulxhi(%g1, v0, %o1)
+ b L(lo0)
+ nop
+
+ ALIGN(16)
+L(top): ldx [up+0], u0
+ addxc( %g0, %g5, %g5) C propagate carry into carry limb
+ ldx [up+8], u1
+ addcc r0, %o0, r0
+ ldx [up+16], u2
+ addxccc(r1, %o2, r1)
+ ldx [up+24], u3
+ addxccc(r2, %o4, r2)
+ stx r0, [rp-32]
+ addxccc(r3, %g4, r3)
+ stx r1, [rp-24]
+ mulx u0, v0, %o0
+ stx r2, [rp-16]
+ umulxhi(u0, v0, %o1)
+ stx r3, [rp-8]
+L(lo0): mulx u1, v0, %o2
+ ldx [rp+0], r0
+ umulxhi(u1, v0, %o3)
+ ldx [rp+8], r1
+ mulx u2, v0, %o4
+ ldx [rp+16], r2
+ umulxhi(u2, v0, %o5)
+ ldx [rp+24], r3
+ mulx u3, v0, %g4
+ addxccc(%g5, %o0, %o0)
+ umulxhi(u3, v0, %g5)
+ add up, 32, up
+ addxccc(%o1, %o2, %o2)
+ add rp, 32, rp
+ addxccc(%o3, %o4, %o4)
+ add n, -4, n
+ addxccc(%o5, %g4, %g4)
+ brgz n, L(top)
+ nop
+
+ addxc( %g0, %g5, %g5)
+ addcc r0, %o0, r0
+ stx r0, [rp-32]
+ addxccc(r1, %o2, r1)
+ stx r1, [rp-24]
+ addxccc(r2, %o4, r2)
+ stx r2, [rp-16]
+ addxccc(r3, %g4, r3)
+ stx r3, [rp-8]
+L(xit): addxc( %g0, %g5, %i0)
ret
restore
EPILOGUE()
diff -r 4fc35535b362 -r db90f82f171c mpn/sparc64/ultrasparct3/cnd_aors_n.asm
--- a/mpn/sparc64/ultrasparct3/cnd_aors_n.asm Mon Apr 29 23:45:12 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/cnd_aors_n.asm Tue Apr 30 00:15:23 2013 +0200
@@ -25,8 +25,8 @@
C UltraSPARC T3: ?
C UltraSPARC T4: 3 hopefully
-C We use a double-pointer trick to allow indexed addressing. Its setup cost
-C might be prohibitive in these functions, since we don't expect huge n
+C We use a double-pointer trick to allow indexed addressing. Its setup
+C cost might be a problem in these functions, since we don't expect huge n
C arguments.
C
C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can
@@ -49,21 +49,21 @@
define(`w0', `%g1') define(`w1', `%g3')
ifdef(`OPERATION_cnd_add_n',`
- define(`LOGOP', `and $1, $2, $3')
- define(`MAKEMASK',`cmp %g0, $1
- subc %g0, %g0, $2')
- define(`INITCY', `addcc %g0, 0, %g0')
- define(`RETVAL', `addxc( %g0, %g0, %i0)')
- define(`func', `mpn_cnd_add_n')
+ define(`LOGOP', `and $1, $2, $3')
+ define(`MAKEMASK',`cmp %g0, $1
+ subc %g0, %g0, $2')
+ define(`INITCY', `addcc %g0, 0, %g0')
+ define(`RETVAL', `addxc( %g0, %g0, %i0)')
+ define(`func', `mpn_cnd_add_n')
')
ifdef(`OPERATION_cnd_sub_n',`
- define(`LOGOP', `orn $2, $1, $3')
- define(`MAKEMASK',`cmp $1, 1
- subc %g0, %g0, $2')
- define(`INITCY', `subcc %g0, 1, %g0')
- define(`RETVAL', `addxc( %g0, %g0, %i0)
- xor %i0, 1, %i0')
- define(`func', `mpn_cnd_sub_n')
+ define(`LOGOP', `orn $2, $1, $3')
+ define(`MAKEMASK',`cmp $1, 1
+ subc %g0, %g0, $2')
+ define(`INITCY', `subcc %g0, 1, %g0')
+ define(`RETVAL', `addxc( %g0, %g0, %i0)
+ xor %i0, 1, %i0')
+ define(`func', `mpn_cnd_sub_n')
')
MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
More information about the gmp-commit
mailing list