[Gmp-commit] /var/hg/gmp: 7 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Jul 4 23:56:45 CEST 2013


details:   /var/hg/gmp/rev/117cf9e01474
changeset: 15854:117cf9e01474
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 03 11:47:57 2013 +0200
description:
Provide special power7 copyi, copyd.

details:   /var/hg/gmp/rev/e5f9e729ca6a
changeset: 15855:e5f9e729ca6a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 03 14:42:00 2013 +0200
description:
Rewrite.

details:   /var/hg/gmp/rev/dff28d38bc3b
changeset: 15856:dff28d38bc3b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Jul 04 23:36:00 2013 +0200
description:
(EXTRA_REGISTER): New define.

details:   /var/hg/gmp/rev/024764375bc7
changeset: 15857:024764375bc7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Jul 04 23:37:25 2013 +0200
description:
Provide two ppc64 gcd_1 variants.

details:   /var/hg/gmp/rev/cc0b53d04e4a
changeset: 15858:cc0b53d04e4a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Jul 04 23:38:26 2013 +0200
description:
Update cycle table.

details:   /var/hg/gmp/rev/e8c445383f6e
changeset: 15859:e8c445383f6e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Jul 04 23:54:34 2013 +0200
description:
Provide mul_2 and addmul_2 for POWER7.

details:   /var/hg/gmp/rev/c5a3e773e185
changeset: 15860:c5a3e773e185
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Jul 04 23:56:26 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                            |   20 +++++
 mpn/powerpc64/aix.m4                 |    2 +
 mpn/powerpc64/com.asm                |  124 ++++++++++++++++++++++++----------
 mpn/powerpc64/darwin.m4              |    2 +
 mpn/powerpc64/elf.m4                 |    2 +
 mpn/powerpc64/mode64/bdiv_dbm1c.asm  |    3 +-
 mpn/powerpc64/mode64/gcd_1.asm       |  112 +++++++++++++++++++++++++++++++
 mpn/powerpc64/mode64/p7/aormul_2.asm |  124 +++++++++++++++++++++++++++++++++++
 mpn/powerpc64/mode64/p7/gcd_1.asm    |  100 ++++++++++++++++++++++++++++
 mpn/powerpc64/p7/copyd.asm           |  115 ++++++++++++++++++++++++++++++++
 mpn/powerpc64/p7/copyi.asm           |  116 ++++++++++++++++++++++++++++++++
 11 files changed, 680 insertions(+), 40 deletions(-)

diffs (truncated from 814 to 300 lines):

diff -r 4c646d573f21 -r c5a3e773e185 ChangeLog
--- a/ChangeLog	Tue Jul 02 13:47:04 2013 +0200
+++ b/ChangeLog	Thu Jul 04 23:56:26 2013 +0200
@@ -1,3 +1,23 @@
+2013-07-04  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/mode64/p7/aormul_2.asm: New file.
+
+	* mpn/powerpc64/darwin.m4 (EXTRA_REGISTER): New define.
+	* mpn/powerpc64/aix.m4: New define (actually undefine).
+	* mpn/powerpc64/elf.m4: Likewise.
+
+2013-07-03  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/com.asm: Rewrite.
+
+	* mpn/powerpc64/p7/copyi.asm: New file.
+	* mpn/powerpc64/p7/copyd.asm: New file.
+
+2013-07-02  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/mode64/gcd_1.asm: New file.
+	* mpn/powerpc64/mode64/p7/gcd_1.asm: New file.
+
 2013-07-01  Torbjorn Granlund  <tege at gmplib.org>
 
 	* configure.ac: Comment out AC_PROG_F77.
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/aix.m4
--- a/mpn/powerpc64/aix.m4	Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/aix.m4	Thu Jul 04 23:56:26 2013 +0200
@@ -81,4 +81,6 @@
 
 define(`ASM_END', `TOC_ENTRY')
 
+undefine(`EXTRA_REGISTER')
+
 divert
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/com.asm
--- a/mpn/powerpc64/com.asm	Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/com.asm	Thu Jul 04 23:56:26 2013 +0200
@@ -1,6 +1,6 @@
 dnl  PowerPC-64 mpn_com.
 
-dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,58 +20,106 @@
 include(`../config.m4')
 
 C                  cycles/limb
-C POWER3/PPC630          1?
-C POWER4/PPC970          1.6
+C POWER3/PPC630          ?
+C POWER4/PPC970          1.25
 C POWER5                 ?
-C POWER6                 ?
-C POWER7                 1.45
-
-C TODO
-C  * 8-way unrolling brings timing down to about 1.3 cycles/limb.
+C POWER6                 1.32
+C POWER7                 1.13
 
 C INPUT PARAMETERS
-C rp	r3
-C up	r4
-C n	r5
+define(`rp',	`r3')
+define(`up',	`r4')
+define(`n',	`r5')
 
 ASM_START()
 PROLOGUE(mpn_com)
-	rldic.	r0, r5, 3, 59	C r0 = (r5 & 3) << 3; cr0 = (n == 4t)?
-	cmpldi	cr6, r0, 16	C cr6 = (n cmp 4t + 2)?
 
-	addi	r5, r5, 3	C compute...
 ifdef(`HAVE_ABI_mode32',
-`	rldicl	r5, r5, 62,34',	C ...branch count
-`	rldicl	r5, r5, 62, 2')	C ...branch count
-	mtctr	r5
+`	rldicl	n, n, 0,32')
 
-	add	r4, r4, r0	C offset up
-	add	r3, r3, r0	C offset rp
+	cmpdi	cr0, n, 4
+	blt	L(sml)
 
-	beq	cr0, L(L00)
-	blt	cr6, L(L01)
-	beq	cr6, L(L10)
-	b	L(L11)
+	addi	r10, n, 4
+	srdi	r10, r10, 3
+	mtctr	r10
 
-L(L00):	addi	r4, r4, 32
-	addi	r3, r3, 32
+	andi.	r0, n, 1
+	rlwinm	r11, n, 0,30,30
+	rlwinm	r12, n, 0,29,29
+	cmpdi	cr6, r11, 0
+	cmpdi	cr7, r12, 0
 
-	ALIGN(16)
-L(oop):	ld	r6, -32(r4)
+	beq	cr0, L(xx0)
+L(xx1):	ld	r6, 0(up)
+	addi	up, up, 8
 	nor	r6, r6, r6
-	std	r6, -32(r3)
-L(L11):	ld	r6, -24(r4)
+	std	r6, 0(rp)
+	addi	rp, rp, 8
+
+L(xx0):	bne	cr6, L(x10)
+L(x00):	ld	r6, 0(r4)
+	ld	r7, 8(r4)
+	bne	cr7, L(100)
+L(000):	addi	rp, rp, -32
+	b	L(lo0)
+L(100):	addi	up, up, -32
+	b	L(lo4)
+L(x10):	ld	r8, 0(r4)
+	ld	r9, 8(r4)
+	bne	cr7, L(110)
+L(010):	addi	up, up, 16
+	addi	rp, rp, -16
+	b	L(lo2)
+L(110):	addi	up, up, -16
+	addi	rp, rp, -48
+	b	L(lo6)
+
+L(sml):	mtctr	n
+L(t):	ld	r6, 0(up)
+	addi	up, up, 8
 	nor	r6, r6, r6
-	std	r6, -24(r3)
-L(L10):	ld	r6, -16(r4)
+	std	r6, 0(rp)
+	addi	rp, rp, 8
+	bdnz	L(t)
+	blr
+
+	ALIGN(32)
+L(top):	nor	r6, r6, r6
+	nor	r7, r7, r7
+	std	r6, 0(rp)
+	std	r7, 8(rp)
+L(lo2):	ld	r6, 0(up)
+	ld	r7, 8(up)
+	nor	r8, r8, r8
+	nor	r9, r9, r9
+	std	r8, 16(rp)
+	std	r9, 24(rp)
+L(lo0):	ld	r8, 16(up)
+	ld	r9, 24(up)
 	nor	r6, r6, r6
-	std	r6, -16(r3)
-L(L01):	ld	r6, -8(r4)
-	nor	r6, r6, r6
-	addi	r4, r4, 32
-	std	r6, -8(r3)
-	addi	r3, r3, 32
-	bdnz	L(oop)
+	nor	r7, r7, r7
+	std	r6, 32(rp)
+	std	r7, 40(rp)
+L(lo6):	ld	r6, 32(up)
+	ld	r7, 40(up)
+	nor	r8, r8, r8
+	nor	r9, r9, r9
+	std	r8, 48(rp)
+	std	r9, 56(rp)
+	addi	rp, rp, 64
+L(lo4):	ld	r8, 48(up)
+	ld	r9, 56(up)
+	addi	up, up, 64
+	bdnz	L(top)
 
+L(end):	nor	r6, r6, r6
+	nor	r7, r7, r7
+	std	r6, 0(rp)
+	std	r7, 8(rp)
+	nor	r8, r8, r8
+	nor	r9, r9, r9
+	std	r8, 16(rp)
+	std	r9, 24(rp)
 	blr
 EPILOGUE()
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/darwin.m4
--- a/mpn/powerpc64/darwin.m4	Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/darwin.m4	Thu Jul 04 23:56:26 2013 +0200
@@ -101,4 +101,6 @@
 
 define(`ASM_END', `dnl')
 
+define(`EXTRA_REGISTER', r2)
+
 divert
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/elf.m4
--- a/mpn/powerpc64/elf.m4	Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/elf.m4	Thu Jul 04 23:56:26 2013 +0200
@@ -84,4 +84,6 @@
 
 define(`ASM_END', `TOC_ENTRY')
 
+undefine(`EXTRA_REGISTER')
+
 divert
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/mode64/bdiv_dbm1c.asm
--- a/mpn/powerpc64/mode64/bdiv_dbm1c.asm	Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/mode64/bdiv_dbm1c.asm	Thu Jul 04 23:56:26 2013 +0200
@@ -21,10 +21,9 @@
 
 C                 cycles/limb
 C POWER3/PPC630       6-18
-C POWER4/PPC970       8.5?
+C POWER4/PPC970       8.25
 C POWER5              8.5  fluctuating as function of n % 3
 C POWER6             15
-C POWER6             15
 C POWER7              4.75
 
 C TODO
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/mode64/gcd_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/mode64/gcd_1.asm	Thu Jul 04 23:56:26 2013 +0200
@@ -0,0 +1,112 @@
+dnl  PowerPC-64 mpn_gcd_1.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012, 2013 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	 	    cycles/bit (approx)
+C POWER3/PPC630		 ?
+C POWER4/PPC970		 8.5
+C POWER5		 ?
+C POWER6		10.1
+C POWER7		 9.4
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
+
+C INPUT PARAMETERS
+define(`up',    `r3')
+define(`n',     `r4')
+define(`v0',    `r5')
+
+EXTERN_FUNC(mpn_mod_1)
+EXTERN_FUNC(mpn_modexact_1c_odd)
+
+ASM_START()
+PROLOGUE(mpn_gcd_1)
+	mflr	r0
+	std	r30, -16(r1)
+	std	r31, -8(r1)
+	std	r0, 16(r1)
+	stdu	r1, -128(r1)
+
+	ld	r7, 0(up)		C U low limb
+	or	r0, r5, r7		C x | y
+
+	neg	r6, r0
+	and	r6, r6, r0
+	cntlzd	r31, r6			C common twos
+	subfic	r31, r31, 63
+
+	neg	r6, r5
+	and	r6, r6, r5
+	cntlzd	r8, r6
+	subfic	r8, r8, 63
+	srd	r5, r5, r8
+	mr	r30, r5			C v0 saved
+
+	cmpdi	r4, BMOD_1_TO_MOD_1_THRESHOLD
+	blt	L(bmod)
+	CALL(	mpn_mod_1)
+	b	L(reduced)
+L(bmod):
+	li	r6, 0
+	CALL(	mpn_modexact_1c_odd)
+L(reduced):


More information about the gmp-commit mailing list