[Gmp-commit] /var/hg/gmp-5.1: Rewrite switching-into-loop code.
mercurial at gmplib.org
mercurial at gmplib.org
Sat Sep 28 14:34:01 CEST 2013
details: /var/hg/gmp-5.1/rev/fffa6dbb5c08
changeset: 15440:fffa6dbb5c08
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Sep 28 14:33:58 2013 +0200
description:
Rewrite switching-into-loop code.
diffstat:
mpn/powerpc64/p6/lshift.asm | 39 ++++++++++++++++++++++++---------------
mpn/powerpc64/p6/lshiftc.asm | 39 ++++++++++++++++++++++++---------------
mpn/powerpc64/p6/rshift.asm | 31 ++++++++++++++++++++-----------
3 files changed, 68 insertions(+), 41 deletions(-)
diffs (270 lines):
diff -r ef2244af18e0 -r fffa6dbb5c08 mpn/powerpc64/p6/lshift.asm
--- a/mpn/powerpc64/p6/lshift.asm Fri Sep 27 23:08:02 2013 +0200
+++ b/mpn/powerpc64/p6/lshift.asm Sat Sep 28 14:33:58 2013 +0200
@@ -1,6 +1,6 @@
dnl PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
-dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -27,7 +27,7 @@
C TODO
C * Micro-optimise header code
-C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
C bytes, 4-way code would become about 50% larger.
C INPUT PARAMETERS
@@ -42,45 +42,49 @@
ASM_START()
PROLOGUE(mpn_lshift)
+
+ifdef(`HAVE_ABI_mode32',`
+ rldicl n, n, 0,32 C FIXME: avoid this zero extend
+')
mflr r12
- bcl 20, 31, L(r) C get pc using a local "call"
-L(r): mflr r11
- sldi r0, n, 3
+ sldi r8, n, 3
sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
- addi r11, r11, L(e1)-L(r)-64 C address of L(e1) label in SHIFT(1)
- add up, up, r0 C make up point at end of up[]
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
+ add up, up, r8 C make up point at end of up[]
add r11, r11, r10 C address of L(oN) for N = cnt
- add rp, rp_param, r0 C make rp point at end of rp[]
+ srdi r10, n, 1
+ add rp, rp_param, r8 C make rp point at end of rp[]
subfic tnc, cnt, 64
- rlwinm. r8, n, 0,31,31 C extract bit 0
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
beq L(evn)
L(odd): ld r9, -8(up)
cmpdi cr0, n, 1 C n = 1?
beq L(1)
ld r8, -16(up)
- addi r11, r11, L(o1)-L(e1)
+ addi r11, r11, -84 C L(o1) - L(e1) - 64
mtlr r11
- srdi r11, n, 1
srd r3, r9, tnc C retval
addi up, up, 8
addi rp, rp, -8
- mtctr r11
blr C branch to L(oN)
L(evn): ld r8, -8(up)
ld r9, -16(up)
+ addi r11, r11, -64
mtlr r11
- addi n, n, 1
- srdi r10, n, 1
srd r3, r8, tnc C retval
- mtctr r10
blr C branch to L(eN)
L(1): srd r3, r9, tnc C retval
sld r8, r9, cnt
std r8, -8(rp)
mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
blr
@@ -108,5 +112,10 @@
L(com): std r10, -16(rp)
mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
blr
EPILOGUE()
+ASM_END()
diff -r ef2244af18e0 -r fffa6dbb5c08 mpn/powerpc64/p6/lshiftc.asm
--- a/mpn/powerpc64/p6/lshiftc.asm Fri Sep 27 23:08:02 2013 +0200
+++ b/mpn/powerpc64/p6/lshiftc.asm Sat Sep 28 14:33:58 2013 +0200
@@ -1,6 +1,6 @@
dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
-dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -27,7 +27,7 @@
C TODO
C * Micro-optimise header code
-C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
C bytes, 4-way code would become about 50% larger.
C INPUT PARAMETERS
@@ -42,39 +42,39 @@
ASM_START()
PROLOGUE(mpn_lshiftc)
+
+ifdef(`HAVE_ABI_mode32',`
+ rldicl n, n, 0,32 C FIXME: avoid this zero extend
+')
mflr r12
- bcl 20, 31, L(r) C get pc using a local "call"
-L(r): mflr r11
- sldi r0, n, 3
+ sldi r8, n, 3
sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
- addi r11, r11, L(e1)-L(r)-64 C address of L(e1) label in SHIFT(1)
- add up, up, r0 C make up point at end of up[]
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
+ add up, up, r8 C make up point at end of up[]
add r11, r11, r10 C address of L(oN) for N = cnt
- add rp, rp_param, r0 C make rp point at end of rp[]
+ srdi r10, n, 1
+ add rp, rp_param, r8 C make rp point at end of rp[]
subfic tnc, cnt, 64
- rlwinm. r8, n, 0,31,31 C extract bit 0
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
beq L(evn)
L(odd): ld r9, -8(up)
cmpdi cr0, n, 1 C n = 1?
beq L(1)
ld r8, -16(up)
- addi r11, r11, L(o1)-L(e1)
+ addi r11, r11, -88 C L(o1) - L(e1) - 64
mtlr r11
- srdi r11, n, 1
srd r3, r9, tnc C retval
addi up, up, 8
addi rp, rp, -8
- mtctr r11
blr C branch to L(oN)
L(evn): ld r8, -8(up)
ld r9, -16(up)
+ addi r11, r11, -64
mtlr r11
- addi n, n, 1
- srdi r10, n, 1
srd r3, r8, tnc C retval
- mtctr r10
blr C branch to L(eN)
L(1): srd r3, r9, tnc C retval
@@ -82,6 +82,10 @@
nor r8, r8, r8
std r8, -8(rp)
mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
blr
@@ -112,5 +116,10 @@
std r11, -8(rp)
std r10, -16(rp)
mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
blr
EPILOGUE()
+ASM_END()
diff -r ef2244af18e0 -r fffa6dbb5c08 mpn/powerpc64/p6/rshift.asm
--- a/mpn/powerpc64/p6/rshift.asm Fri Sep 27 23:08:02 2013 +0200
+++ b/mpn/powerpc64/p6/rshift.asm Sat Sep 28 14:33:58 2013 +0200
@@ -1,6 +1,6 @@
dnl PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
-dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -42,44 +42,48 @@
ASM_START()
PROLOGUE(mpn_rshift)
+
+ifdef(`HAVE_ABI_mode32',`
+ rldicl n, n, 0,32 C FIXME: avoid this zero extend
+')
mflr r12
- bcl 20, 31, L(r) C get pc using a local "call"
-L(r): mflr r11
- addi r11, r11, L(e1)-L(r)-64 C address of L(e1) label in SHIFT(1)
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
add r11, r11, r10 C address of L(oN) for N = cnt
+ srdi r10, n, 1
mr rp, rp_param
subfic tnc, cnt, 64
- rlwinm. r8, n, 0,31,31 C extract bit 0
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
beq L(evn)
L(odd): ld r9, 0(up)
cmpdi cr0, n, 1 C n = 1?
beq L(1)
ld r8, 8(up)
- addi r11, r11, L(o1)-L(e1)
+ addi r11, r11, -84 C L(o1) - L(e1) - 64
mtlr r11
- srdi r11, n, 1
sld r3, r9, tnc C retval
addi up, up, 8
addi rp, rp, 8
- mtctr r11
blr C branch to L(oN)
L(evn): ld r8, 0(up)
ld r9, 8(up)
+ addi r11, r11, -64
mtlr r11
- addi n, n, 1
- srdi r10, n, 1
sld r3, r8, tnc C retval
addi up, up, 16
- mtctr r10
blr C branch to L(eN)
L(1): sld r3, r9, tnc C retval
srd r8, r9, cnt
std r8, 0(rp)
mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
blr
@@ -107,5 +111,10 @@
L(com): std r10, 8(rp)
mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
blr
EPILOGUE()
+ASM_END()
More information about the gmp-commit
mailing list