[Gmp-commit] /var/hg/gmp: Rewrite switching-into-loop code.
mercurial at gmplib.org
mercurial at gmplib.org
Wed Jun 19 17:25:59 CEST 2013
details: /var/hg/gmp/rev/53d1f20e8169
changeset: 15840:53d1f20e8169
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jun 19 17:25:56 2013 +0200
description:
Rewrite switching-into-loop code.
diffstat:
ChangeLog | 6 ++++++
mpn/powerpc64/p6/lshift.asm | 25 +++++++++++--------------
mpn/powerpc64/p6/lshiftc.asm | 25 +++++++++++--------------
mpn/powerpc64/p6/rshift.asm | 17 +++++++----------
4 files changed, 35 insertions(+), 38 deletions(-)
diffs (195 lines):
diff -r bb95c3d7691e -r 53d1f20e8169 ChangeLog
--- a/ChangeLog Mon Jun 17 23:08:05 2013 +0200
+++ b/ChangeLog Wed Jun 19 17:25:56 2013 +0200
@@ -1,3 +1,9 @@
+2013-06-19 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/p6/lshift.asm: Rewrite switching-into-loop code.
+ * mpn/powerpc64/p6/rshift.asm: Likewise.
+ * mpn/powerpc64/p6/lshiftc.asm: Likewise.
+
2013-06-17 Torbjorn Granlund <tege at gmplib.org>
* mpn/powerpc64/p6/lshift.asm: Fix typo in label reference.
diff -r bb95c3d7691e -r 53d1f20e8169 mpn/powerpc64/p6/lshift.asm
--- a/mpn/powerpc64/p6/lshift.asm Mon Jun 17 23:08:05 2013 +0200
+++ b/mpn/powerpc64/p6/lshift.asm Wed Jun 19 17:25:56 2013 +0200
@@ -27,7 +27,7 @@
C TODO
C * Micro-optimise header code
-C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
C bytes, 4-way code would become about 50% larger.
C INPUT PARAMETERS
@@ -47,38 +47,34 @@
rldicl n, n, 0,32 C FIXME: avoid this zero extend
')
mflr r12
- bcl 20, 31, L(r) C get pc using a local "call"
-L(r): mflr r11
- sldi r0, n, 3
+ sldi r8, n, 3
sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
- addi r11, r11, L(e1)-L(r)-64 C address of L(e1) label in SHIFT(1)
- add up, up, r0 C make up point at end of up[]
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
+ add up, up, r8 C make up point at end of up[]
add r11, r11, r10 C address of L(oN) for N = cnt
- add rp, rp_param, r0 C make rp point at end of rp[]
+ srdi r10, n, 1
+ add rp, rp_param, r8 C make rp point at end of rp[]
subfic tnc, cnt, 64
- rlwinm. r8, n, 0,31,31 C extract bit 0
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
beq L(evn)
L(odd): ld r9, -8(up)
cmpdi cr0, n, 1 C n = 1?
beq L(1)
ld r8, -16(up)
- addi r11, r11, L(o1)-L(e1)
+ addi r11, r11, -84 C L(o1) - L(e1) - 64
mtlr r11
- srdi r11, n, 1
srd r3, r9, tnc C retval
addi up, up, 8
addi rp, rp, -8
- mtctr r11
blr C branch to L(oN)
L(evn): ld r8, -8(up)
ld r9, -16(up)
+ addi r11, r11, -64
mtlr r11
- addi n, n, 1
- srdi r10, n, 1
srd r3, r8, tnc C retval
- mtctr r10
blr C branch to L(eN)
L(1): srd r3, r9, tnc C retval
@@ -122,3 +118,4 @@
')
blr
EPILOGUE()
+ASM_END()
diff -r bb95c3d7691e -r 53d1f20e8169 mpn/powerpc64/p6/lshiftc.asm
--- a/mpn/powerpc64/p6/lshiftc.asm Mon Jun 17 23:08:05 2013 +0200
+++ b/mpn/powerpc64/p6/lshiftc.asm Wed Jun 19 17:25:56 2013 +0200
@@ -27,7 +27,7 @@
C TODO
C * Micro-optimise header code
-C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
C bytes, 4-way code would become about 50% larger.
C INPUT PARAMETERS
@@ -47,38 +47,34 @@
rldicl n, n, 0,32 C FIXME: avoid this zero extend
')
mflr r12
- bcl 20, 31, L(r) C get pc using a local "call"
-L(r): mflr r11
- sldi r0, n, 3
+ sldi r8, n, 3
sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
- addi r11, r11, L(e1)-L(r)-64 C address of L(e1) label in SHIFT(1)
- add up, up, r0 C make up point at end of up[]
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
+ add up, up, r8 C make up point at end of up[]
add r11, r11, r10 C address of L(oN) for N = cnt
- add rp, rp_param, r0 C make rp point at end of rp[]
+ srdi r10, n, 1
+ add rp, rp_param, r8 C make rp point at end of rp[]
subfic tnc, cnt, 64
- rlwinm. r8, n, 0,31,31 C extract bit 0
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
beq L(evn)
L(odd): ld r9, -8(up)
cmpdi cr0, n, 1 C n = 1?
beq L(1)
ld r8, -16(up)
- addi r11, r11, L(o1)-L(e1)
+ addi r11, r11, -88 C L(o1) - L(e1) - 64
mtlr r11
- srdi r11, n, 1
srd r3, r9, tnc C retval
addi up, up, 8
addi rp, rp, -8
- mtctr r11
blr C branch to L(oN)
L(evn): ld r8, -8(up)
ld r9, -16(up)
+ addi r11, r11, -64
mtlr r11
- addi n, n, 1
- srdi r10, n, 1
srd r3, r8, tnc C retval
- mtctr r10
blr C branch to L(eN)
L(1): srd r3, r9, tnc C retval
@@ -126,3 +122,4 @@
')
blr
EPILOGUE()
+ASM_END()
diff -r bb95c3d7691e -r 53d1f20e8169 mpn/powerpc64/p6/rshift.asm
--- a/mpn/powerpc64/p6/rshift.asm Mon Jun 17 23:08:05 2013 +0200
+++ b/mpn/powerpc64/p6/rshift.asm Wed Jun 19 17:25:56 2013 +0200
@@ -47,37 +47,33 @@
rldicl n, n, 0,32 C FIXME: avoid this zero extend
')
mflr r12
- bcl 20, 31, L(r) C get pc using a local "call"
-L(r): mflr r11
- addi r11, r11, L(e1)-L(r)-64 C address of L(e1) label in SHIFT(1)
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
add r11, r11, r10 C address of L(oN) for N = cnt
+ srdi r10, n, 1
mr rp, rp_param
subfic tnc, cnt, 64
- rlwinm. r8, n, 0,31,31 C extract bit 0
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
beq L(evn)
L(odd): ld r9, 0(up)
cmpdi cr0, n, 1 C n = 1?
beq L(1)
ld r8, 8(up)
- addi r11, r11, L(o1)-L(e1)
+ addi r11, r11, -84 C L(o1) - L(e1) - 64
mtlr r11
- srdi r11, n, 1
sld r3, r9, tnc C retval
addi up, up, 8
addi rp, rp, 8
- mtctr r11
blr C branch to L(oN)
L(evn): ld r8, 0(up)
ld r9, 8(up)
+ addi r11, r11, -64
mtlr r11
- addi n, n, 1
- srdi r10, n, 1
sld r3, r8, tnc C retval
addi up, up, 16
- mtctr r10
blr C branch to L(eN)
L(1): sld r3, r9, tnc C retval
@@ -121,3 +117,4 @@
')
blr
EPILOGUE()
+ASM_END()
More information about the gmp-commit
mailing list