[Gmp-commit] /var/hg/gmp: 7 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Jul 4 23:56:45 CEST 2013
details: /var/hg/gmp/rev/117cf9e01474
changeset: 15854:117cf9e01474
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jul 03 11:47:57 2013 +0200
description:
Provide special power7 copyi, copyd.
details: /var/hg/gmp/rev/e5f9e729ca6a
changeset: 15855:e5f9e729ca6a
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jul 03 14:42:00 2013 +0200
description:
Rewrite.
details: /var/hg/gmp/rev/dff28d38bc3b
changeset: 15856:dff28d38bc3b
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Jul 04 23:36:00 2013 +0200
description:
(EXTRA_REGISTER): New define.
details: /var/hg/gmp/rev/024764375bc7
changeset: 15857:024764375bc7
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Jul 04 23:37:25 2013 +0200
description:
Provide two ppc64 gcd_1 variants.
details: /var/hg/gmp/rev/cc0b53d04e4a
changeset: 15858:cc0b53d04e4a
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Jul 04 23:38:26 2013 +0200
description:
Update cycle table.
details: /var/hg/gmp/rev/e8c445383f6e
changeset: 15859:e8c445383f6e
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Jul 04 23:54:34 2013 +0200
description:
Provide mul_2 and addmul_2 for POWER7.
details: /var/hg/gmp/rev/c5a3e773e185
changeset: 15860:c5a3e773e185
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Jul 04 23:56:26 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 20 +++++
mpn/powerpc64/aix.m4 | 2 +
mpn/powerpc64/com.asm | 124 ++++++++++++++++++++++++----------
mpn/powerpc64/darwin.m4 | 2 +
mpn/powerpc64/elf.m4 | 2 +
mpn/powerpc64/mode64/bdiv_dbm1c.asm | 3 +-
mpn/powerpc64/mode64/gcd_1.asm | 112 +++++++++++++++++++++++++++++++
mpn/powerpc64/mode64/p7/aormul_2.asm | 124 +++++++++++++++++++++++++++++++++++
mpn/powerpc64/mode64/p7/gcd_1.asm | 100 ++++++++++++++++++++++++++++
mpn/powerpc64/p7/copyd.asm | 115 ++++++++++++++++++++++++++++++++
mpn/powerpc64/p7/copyi.asm | 116 ++++++++++++++++++++++++++++++++
11 files changed, 680 insertions(+), 40 deletions(-)
diffs (truncated from 814 to 300 lines):
diff -r 4c646d573f21 -r c5a3e773e185 ChangeLog
--- a/ChangeLog Tue Jul 02 13:47:04 2013 +0200
+++ b/ChangeLog Thu Jul 04 23:56:26 2013 +0200
@@ -1,3 +1,23 @@
+2013-07-04 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/mode64/p7/aormul_2.asm: New file.
+
+ * mpn/powerpc64/darwin.m4 (EXTRA_REGISTER): New define.
+ * mpn/powerpc64/aix.m4: New define (actually undefine).
+ * mpn/powerpc64/elf.m4: Likewise.
+
+2013-07-03 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/com.asm: Rewrite.
+
+ * mpn/powerpc64/p7/copyi.asm: New file.
+ * mpn/powerpc64/p7/copyd.asm: New file.
+
+2013-07-02 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/mode64/gcd_1.asm: New file.
+ * mpn/powerpc64/mode64/p7/gcd_1.asm: New file.
+
2013-07-01 Torbjorn Granlund <tege at gmplib.org>
* configure.ac: Comment out AC_PROG_F77.
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/aix.m4
--- a/mpn/powerpc64/aix.m4 Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/aix.m4 Thu Jul 04 23:56:26 2013 +0200
@@ -81,4 +81,6 @@
define(`ASM_END', `TOC_ENTRY')
+undefine(`EXTRA_REGISTER')
+
divert
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/com.asm
--- a/mpn/powerpc64/com.asm Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/com.asm Thu Jul 04 23:56:26 2013 +0200
@@ -1,6 +1,6 @@
dnl PowerPC-64 mpn_com.
-dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -20,58 +20,106 @@
include(`../config.m4')
C cycles/limb
-C POWER3/PPC630 1?
-C POWER4/PPC970 1.6
+C POWER3/PPC630 ?
+C POWER4/PPC970 1.25
C POWER5 ?
-C POWER6 ?
-C POWER7 1.45
-
-C TODO
-C * 8-way unrolling brings timing down to about 1.3 cycles/limb.
+C POWER6 1.32
+C POWER7 1.13
C INPUT PARAMETERS
-C rp r3
-C up r4
-C n r5
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
ASM_START()
PROLOGUE(mpn_com)
- rldic. r0, r5, 3, 59 C r0 = (r5 & 3) << 3; cr0 = (n == 4t)?
- cmpldi cr6, r0, 16 C cr6 = (n cmp 4t + 2)?
- addi r5, r5, 3 C compute...
ifdef(`HAVE_ABI_mode32',
-` rldicl r5, r5, 62,34', C ...branch count
-` rldicl r5, r5, 62, 2') C ...branch count
- mtctr r5
+` rldicl n, n, 0,32')
- add r4, r4, r0 C offset up
- add r3, r3, r0 C offset rp
+ cmpdi cr0, n, 4
+ blt L(sml)
- beq cr0, L(L00)
- blt cr6, L(L01)
- beq cr6, L(L10)
- b L(L11)
+ addi r10, n, 4
+ srdi r10, r10, 3
+ mtctr r10
-L(L00): addi r4, r4, 32
- addi r3, r3, 32
+ andi. r0, n, 1
+ rlwinm r11, n, 0,30,30
+ rlwinm r12, n, 0,29,29
+ cmpdi cr6, r11, 0
+ cmpdi cr7, r12, 0
- ALIGN(16)
-L(oop): ld r6, -32(r4)
+ beq cr0, L(xx0)
+L(xx1): ld r6, 0(up)
+ addi up, up, 8
nor r6, r6, r6
- std r6, -32(r3)
-L(L11): ld r6, -24(r4)
+ std r6, 0(rp)
+ addi rp, rp, 8
+
+L(xx0): bne cr6, L(x10)
+L(x00): ld r6, 0(r4)
+ ld r7, 8(r4)
+ bne cr7, L(100)
+L(000): addi rp, rp, -32
+ b L(lo0)
+L(100): addi up, up, -32
+ b L(lo4)
+L(x10): ld r8, 0(r4)
+ ld r9, 8(r4)
+ bne cr7, L(110)
+L(010): addi up, up, 16
+ addi rp, rp, -16
+ b L(lo2)
+L(110): addi up, up, -16
+ addi rp, rp, -48
+ b L(lo6)
+
+L(sml): mtctr n
+L(t): ld r6, 0(up)
+ addi up, up, 8
nor r6, r6, r6
- std r6, -24(r3)
-L(L10): ld r6, -16(r4)
+ std r6, 0(rp)
+ addi rp, rp, 8
+ bdnz L(t)
+ blr
+
+ ALIGN(32)
+L(top): nor r6, r6, r6
+ nor r7, r7, r7
+ std r6, 0(rp)
+ std r7, 8(rp)
+L(lo2): ld r6, 0(up)
+ ld r7, 8(up)
+ nor r8, r8, r8
+ nor r9, r9, r9
+ std r8, 16(rp)
+ std r9, 24(rp)
+L(lo0): ld r8, 16(up)
+ ld r9, 24(up)
nor r6, r6, r6
- std r6, -16(r3)
-L(L01): ld r6, -8(r4)
- nor r6, r6, r6
- addi r4, r4, 32
- std r6, -8(r3)
- addi r3, r3, 32
- bdnz L(oop)
+ nor r7, r7, r7
+ std r6, 32(rp)
+ std r7, 40(rp)
+L(lo6): ld r6, 32(up)
+ ld r7, 40(up)
+ nor r8, r8, r8
+ nor r9, r9, r9
+ std r8, 48(rp)
+ std r9, 56(rp)
+ addi rp, rp, 64
+L(lo4): ld r8, 48(up)
+ ld r9, 56(up)
+ addi up, up, 64
+ bdnz L(top)
+L(end): nor r6, r6, r6
+ nor r7, r7, r7
+ std r6, 0(rp)
+ std r7, 8(rp)
+ nor r8, r8, r8
+ nor r9, r9, r9
+ std r8, 16(rp)
+ std r9, 24(rp)
blr
EPILOGUE()
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/darwin.m4
--- a/mpn/powerpc64/darwin.m4 Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/darwin.m4 Thu Jul 04 23:56:26 2013 +0200
@@ -101,4 +101,6 @@
define(`ASM_END', `dnl')
+define(`EXTRA_REGISTER', r2)
+
divert
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/elf.m4
--- a/mpn/powerpc64/elf.m4 Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/elf.m4 Thu Jul 04 23:56:26 2013 +0200
@@ -84,4 +84,6 @@
define(`ASM_END', `TOC_ENTRY')
+undefine(`EXTRA_REGISTER')
+
divert
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/mode64/bdiv_dbm1c.asm
--- a/mpn/powerpc64/mode64/bdiv_dbm1c.asm Tue Jul 02 13:47:04 2013 +0200
+++ b/mpn/powerpc64/mode64/bdiv_dbm1c.asm Thu Jul 04 23:56:26 2013 +0200
@@ -21,10 +21,9 @@
C cycles/limb
C POWER3/PPC630 6-18
-C POWER4/PPC970 8.5?
+C POWER4/PPC970 8.25
C POWER5 8.5 fluctuating as function of n % 3
C POWER6 15
-C POWER6 15
C POWER7 4.75
C TODO
diff -r 4c646d573f21 -r c5a3e773e185 mpn/powerpc64/mode64/gcd_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/mode64/gcd_1.asm Thu Jul 04 23:56:26 2013 +0200
@@ -0,0 +1,112 @@
+dnl PowerPC-64 mpn_gcd_1.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012, 2013 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C POWER3/PPC630 ?
+C POWER4/PPC970 8.5
+C POWER5 ?
+C POWER6 10.1
+C POWER7 9.4
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
+
+C INPUT PARAMETERS
+define(`up', `r3')
+define(`n', `r4')
+define(`v0', `r5')
+
+EXTERN_FUNC(mpn_mod_1)
+EXTERN_FUNC(mpn_modexact_1c_odd)
+
+ASM_START()
+PROLOGUE(mpn_gcd_1)
+ mflr r0
+ std r30, -16(r1)
+ std r31, -8(r1)
+ std r0, 16(r1)
+ stdu r1, -128(r1)
+
+ ld r7, 0(up) C U low limb
+ or r0, r5, r7 C x | y
+
+ neg r6, r0
+ and r6, r6, r0
+ cntlzd r31, r6 C common twos
+ subfic r31, r31, 63
+
+ neg r6, r5
+ and r6, r6, r5
+ cntlzd r8, r6
+ subfic r8, r8, 63
+ srd r5, r5, r8
+ mr r30, r5 C v0 saved
+
+ cmpdi r4, BMOD_1_TO_MOD_1_THRESHOLD
+ blt L(bmod)
+ CALL( mpn_mod_1)
+ b L(reduced)
+L(bmod):
+ li r6, 0
+ CALL( mpn_modexact_1c_odd)
+L(reduced):
More information about the gmp-commit
mailing list