[Gmp-commit] /var/hg/gmp-5.1: Rewrite switching-into-loop code.

mercurial at gmplib.org mercurial at gmplib.org
Sat Sep 28 14:34:01 CEST 2013


details:   /var/hg/gmp-5.1/rev/fffa6dbb5c08
changeset: 15440:fffa6dbb5c08
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Sep 28 14:33:58 2013 +0200
description:
Rewrite switching-into-loop code.

diffstat:

 mpn/powerpc64/p6/lshift.asm  |  39 ++++++++++++++++++++++++---------------
 mpn/powerpc64/p6/lshiftc.asm |  39 ++++++++++++++++++++++++---------------
 mpn/powerpc64/p6/rshift.asm  |  31 ++++++++++++++++++++-----------
 3 files changed, 68 insertions(+), 41 deletions(-)

diffs (270 lines):

diff -r ef2244af18e0 -r fffa6dbb5c08 mpn/powerpc64/p6/lshift.asm
--- a/mpn/powerpc64/p6/lshift.asm	Fri Sep 27 23:08:02 2013 +0200
+++ b/mpn/powerpc64/p6/lshift.asm	Sat Sep 28 14:33:58 2013 +0200
@@ -1,6 +1,6 @@
 dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
 
-dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -27,7 +27,7 @@
 
 C TODO
 C  * Micro-optimise header code
-C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4248
+C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4236
 C    bytes, 4-way code would become about 50% larger.
 
 C INPUT PARAMETERS
@@ -42,45 +42,49 @@
 
 ASM_START()
 PROLOGUE(mpn_lshift)
+
+ifdef(`HAVE_ABI_mode32',`
+	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
+')
 	mflr	r12
-	bcl	20, 31, L(r)		C get pc using a local "call"
-L(r):	mflr	r11
-	sldi	r0, n, 3
+	sldi	r8, n, 3
 	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
-	addi	r11, r11, L(e1)-L(r)-64	C address of L(e1) label in SHIFT(1)
-	add	up, up, r0		C make up point at end of up[]
+	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
+	add	up, up, r8		C make up point at end of up[]
 	add	r11, r11, r10		C address of L(oN) for N = cnt
-	add	rp, rp_param, r0	C make rp point at end of rp[]
+	srdi	r10, n, 1
+	add	rp, rp_param, r8	C make rp point at end of rp[]
 	subfic	tnc, cnt, 64
-	rlwinm.  r8, n, 0,31,31		C extract bit 0
+	rlwinm.	r8, n, 0,31,31		C extract bit 0
+	mtctr	r10
 	beq	L(evn)
 
 L(odd):	ld	r9, -8(up)
 	cmpdi	cr0, n, 1		C n = 1?
 	beq	L(1)
 	ld	r8, -16(up)
-	addi	r11, r11, L(o1)-L(e1)
+	addi	r11, r11, -84		C L(o1) - L(e1) - 64
 	mtlr	r11
-	srdi	r11, n, 1
 	srd	r3, r9, tnc		C retval
 	addi	up, up, 8
 	addi	rp, rp, -8
-	mtctr	r11
 	blr				C branch to L(oN)
 
 L(evn):	ld	r8, -8(up)
 	ld	r9, -16(up)
+	addi	r11, r11, -64
 	mtlr	r11
-	addi	n, n, 1
-	srdi	r10, n, 1
 	srd	r3, r8, tnc		C retval
-	mtctr	r10
 	blr				C branch to L(eN)
 
 L(1):	srd	r3, r9, tnc		C retval
 	sld	r8, r9, cnt
 	std	r8, -8(rp)
 	mtlr	r12
+ifdef(`HAVE_ABI_mode32',
+`	mr	r4, r3
+	srdi	r3, r3, 32
+')
 	blr
 
 
@@ -108,5 +112,10 @@
 
 L(com):	std	r10, -16(rp)
 	mtlr	r12
+ifdef(`HAVE_ABI_mode32',
+`	mr	r4, r3
+	srdi	r3, r3, 32
+')
 	blr
 EPILOGUE()
+ASM_END()
diff -r ef2244af18e0 -r fffa6dbb5c08 mpn/powerpc64/p6/lshiftc.asm
--- a/mpn/powerpc64/p6/lshiftc.asm	Fri Sep 27 23:08:02 2013 +0200
+++ b/mpn/powerpc64/p6/lshiftc.asm	Sat Sep 28 14:33:58 2013 +0200
@@ -1,6 +1,6 @@
 dnl  PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
 
-dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -27,7 +27,7 @@
 
 C TODO
 C  * Micro-optimise header code
-C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4248
+C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4236
 C    bytes, 4-way code would become about 50% larger.
 
 C INPUT PARAMETERS
@@ -42,39 +42,39 @@
 
 ASM_START()
 PROLOGUE(mpn_lshiftc)
+
+ifdef(`HAVE_ABI_mode32',`
+	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
+')
 	mflr	r12
-	bcl	20, 31, L(r)		C get pc using a local "call"
-L(r):	mflr	r11
-	sldi	r0, n, 3
+	sldi	r8, n, 3
 	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
-	addi	r11, r11, L(e1)-L(r)-64	C address of L(e1) label in SHIFT(1)
-	add	up, up, r0		C make up point at end of up[]
+	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
+	add	up, up, r8		C make up point at end of up[]
 	add	r11, r11, r10		C address of L(oN) for N = cnt
-	add	rp, rp_param, r0	C make rp point at end of rp[]
+	srdi	r10, n, 1
+	add	rp, rp_param, r8	C make rp point at end of rp[]
 	subfic	tnc, cnt, 64
-	rlwinm.  r8, n, 0,31,31		C extract bit 0
+	rlwinm.	r8, n, 0,31,31		C extract bit 0
+	mtctr	r10
 	beq	L(evn)
 
 L(odd):	ld	r9, -8(up)
 	cmpdi	cr0, n, 1		C n = 1?
 	beq	L(1)
 	ld	r8, -16(up)
-	addi	r11, r11, L(o1)-L(e1)
+	addi	r11, r11, -88		C L(o1) - L(e1) - 64
 	mtlr	r11
-	srdi	r11, n, 1
 	srd	r3, r9, tnc		C retval
 	addi	up, up, 8
 	addi	rp, rp, -8
-	mtctr	r11
 	blr				C branch to L(oN)
 
 L(evn):	ld	r8, -8(up)
 	ld	r9, -16(up)
+	addi	r11, r11, -64
 	mtlr	r11
-	addi	n, n, 1
-	srdi	r10, n, 1
 	srd	r3, r8, tnc		C retval
-	mtctr	r10
 	blr				C branch to L(eN)
 
 L(1):	srd	r3, r9, tnc		C retval
@@ -82,6 +82,10 @@
 	nor	r8, r8, r8
 	std	r8, -8(rp)
 	mtlr	r12
+ifdef(`HAVE_ABI_mode32',
+`	mr	r4, r3
+	srdi	r3, r3, 32
+')
 	blr
 
 
@@ -112,5 +116,10 @@
 	std	r11, -8(rp)
 	std	r10, -16(rp)
 	mtlr	r12
+ifdef(`HAVE_ABI_mode32',
+`	mr	r4, r3
+	srdi	r3, r3, 32
+')
 	blr
 EPILOGUE()
+ASM_END()
diff -r ef2244af18e0 -r fffa6dbb5c08 mpn/powerpc64/p6/rshift.asm
--- a/mpn/powerpc64/p6/rshift.asm	Fri Sep 27 23:08:02 2013 +0200
+++ b/mpn/powerpc64/p6/rshift.asm	Sat Sep 28 14:33:58 2013 +0200
@@ -1,6 +1,6 @@
 dnl  PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
 
-dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -42,44 +42,48 @@
 
 ASM_START()
 PROLOGUE(mpn_rshift)
+
+ifdef(`HAVE_ABI_mode32',`
+	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
+')
 	mflr	r12
-	bcl	20, 31, L(r)		C get pc using a local "call"
-L(r):	mflr	r11
-	addi	r11, r11, L(e1)-L(r)-64	C address of L(e1) label in SHIFT(1)
+	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
 	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
 	add	r11, r11, r10		C address of L(oN) for N = cnt
+	srdi	r10, n, 1
 	mr	rp, rp_param
 	subfic	tnc, cnt, 64
-	rlwinm.  r8, n, 0,31,31		C extract bit 0
+	rlwinm.	r8, n, 0,31,31		C extract bit 0
+	mtctr	r10
 	beq	L(evn)
 
 L(odd):	ld	r9, 0(up)
 	cmpdi	cr0, n, 1		C n = 1?
 	beq	L(1)
 	ld	r8, 8(up)
-	addi	r11, r11, L(o1)-L(e1)
+	addi	r11, r11, -84		C L(o1) - L(e1) - 64
 	mtlr	r11
-	srdi	r11, n, 1
 	sld	r3, r9, tnc		C retval
 	addi	up, up, 8
 	addi	rp, rp, 8
-	mtctr	r11
 	blr				C branch to L(oN)
 
 L(evn):	ld	r8, 0(up)
 	ld	r9, 8(up)
+	addi	r11, r11, -64
 	mtlr	r11
-	addi	n, n, 1
-	srdi	r10, n, 1
 	sld	r3, r8, tnc		C retval
 	addi	up, up, 16
-	mtctr	r10
 	blr				C branch to L(eN)
 
 L(1):	sld	r3, r9, tnc		C retval
 	srd	r8, r9, cnt
 	std	r8, 0(rp)
 	mtlr	r12
+ifdef(`HAVE_ABI_mode32',
+`	mr	r4, r3
+	srdi	r3, r3, 32
+')
 	blr
 
 
@@ -107,5 +111,10 @@
 
 L(com):	std	r10, 8(rp)
 	mtlr	r12
+ifdef(`HAVE_ABI_mode32',
+`	mr	r4, r3
+	srdi	r3, r3, 32
+')
 	blr
 EPILOGUE()
+ASM_END()


More information about the gmp-commit mailing list