[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Nov 3 01:21:06 CET 2011
details: /var/hg/gmp/rev/e86cdcbabc9d
changeset: 14415:e86cdcbabc9d
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Nov 03 01:02:27 2011 +0100
description:
Provide gmp-mparam.h for POWER7.
details: /var/hg/gmp/rev/cdb092533e53
changeset: 14416:cdb092533e53
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Nov 03 01:19:16 2011 +0100
description:
Add POWER7 cycle counts.
details: /var/hg/gmp/rev/24ef457368dc
changeset: 14417:24ef457368dc
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Nov 03 01:20:56 2011 +0100
description:
*** empty log message ***
diffstat:
ChangeLog | 4 +
mpn/powerpc64/com.asm | 9 +-
mpn/powerpc64/copyd.asm | 9 +-
mpn/powerpc64/copyi.asm | 9 +-
mpn/powerpc64/logops_n.asm | 9 +-
mpn/powerpc64/lshift.asm | 11 +-
mpn/powerpc64/mode64/aors_n.asm | 11 +-
mpn/powerpc64/mode64/aorslshC_n.asm | 11 +-
mpn/powerpc64/mode64/aorsmul_1.asm | 13 +-
mpn/powerpc64/mode64/bdiv_dbm1c.asm | 4 +-
mpn/powerpc64/mode64/dive_1.asm | 11 +-
mpn/powerpc64/mode64/divrem_1.asm | 13 +-
mpn/powerpc64/mode64/divrem_2.asm | 11 +-
mpn/powerpc64/mode64/invert_limb.asm | 11 +-
mpn/powerpc64/mode64/lshiftc.asm | 11 +-
mpn/powerpc64/mode64/mod_1_1.asm | 11 +-
mpn/powerpc64/mode64/mod_1_4.asm | 11 +-
mpn/powerpc64/mode64/mod_34lsub1.asm | 11 +-
mpn/powerpc64/mode64/mode1o.asm | 10 +-
mpn/powerpc64/mode64/mul_1.asm | 11 +-
mpn/powerpc64/mode64/mul_basecase.asm | 10 +-
mpn/powerpc64/mode64/p5/gmp-mparam.h | 2 +-
mpn/powerpc64/mode64/p6/gmp-mparam.h | 2 +-
mpn/powerpc64/mode64/p7/gmp-mparam.h | 155 ++++++++++++++++++++++++++++++
mpn/powerpc64/mode64/rsh1add_n.asm | 11 +-
mpn/powerpc64/mode64/rsh1sub_n.asm | 11 +-
mpn/powerpc64/mode64/sqr_diag_addlsh1.asm | 11 +-
mpn/powerpc64/rshift.asm | 11 +-
28 files changed, 303 insertions(+), 111 deletions(-)
diffs (truncated from 685 to 300 lines):
diff -r ead1147b1211 -r 24ef457368dc ChangeLog
--- a/ChangeLog Wed Nov 02 15:09:48 2011 +0100
+++ b/ChangeLog Thu Nov 03 01:20:56 2011 +0100
@@ -1,3 +1,7 @@
+2011-11-03 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/mode64/p7/gmp-mparam.h: New file.
+
2011-11-02 Torbjorn Granlund <tege at gmplib.org>
* mpn/s390_64/invert_limb.asm: Slight optimisation.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/com.asm
--- a/mpn/powerpc64/com.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/com.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,9 +19,12 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1?
-C POWER4/PPC970: 1.6
+C cycles/limb
+C POWER3/PPC630 1?
+C POWER4/PPC970 1.6
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.45
C TODO
C * 8-way unrolling brings timing down to about 1.3 cycles/limb.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/copyd.asm
--- a/mpn/powerpc64/copyd.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/copyd.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,9 +19,12 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1
-C POWER4/PPC970: 1
+C cycles/limb
+C POWER3/PPC630 1
+C POWER4/PPC970 1
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.4
C INPUT PARAMETERS
C rp r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/copyi.asm
--- a/mpn/powerpc64/copyi.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/copyi.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,9 +19,12 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1
-C POWER4/PPC970: 1
+C cycles/limb
+C POWER3/PPC630 1
+C POWER4/PPC970 1
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.4
C INPUT PARAMETERS
C rp r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/logops_n.asm
--- a/mpn/powerpc64/logops_n.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/logops_n.asm Thu Nov 03 01:20:56 2011 +0100
@@ -20,9 +20,12 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.75
-C POWER4/PPC970: 2.10
+C cycles/limb
+C POWER3/PPC630 1.75
+C POWER4/PPC970 2.10
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.75
C n POWER3/PPC630 POWER4/PPC970
C 1 15.00 15.33
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/lshift.asm
--- a/mpn/powerpc64/lshift.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/lshift.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,12 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630 ?
-C POWER4/PPC970 ?
-C POWER5 2.25
-C POWER6 9.75
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 9.75
+C POWER7 2.15
C TODO
C * Try to reduce the number of needed live registers
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/aors_n.asm
--- a/mpn/powerpc64/mode64/aors_n.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/aors_n.asm Thu Nov 03 01:20:56 2011 +0100
@@ -20,11 +20,12 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630 1.5
-C POWER4/PPC970 2
-C POWER5 2.25
-C POWER6 2.63
+C cycles/limb
+C POWER3/PPC630 1.5
+C POWER4/PPC970 2
+C POWER5 2.25
+C POWER6 2.63
+C POWER7 2.25-2.87
C This code is a little bit slower for POWER3/PPC630 than the simple code used
C previously, but it is much faster for POWER4/PPC970. The reason for the
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/aorslshC_n.asm
--- a/mpn/powerpc64/mode64/aorslshC_n.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/aorslshC_n.asm Thu Nov 03 01:20:56 2011 +0100
@@ -17,11 +17,12 @@
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-C cycles/limb
-C POWER3/PPC630 1.83 (1.5 c/l should be possible)
-C POWER4/PPC970 3 (2.0 c/l should be possible)
-C POWER5 3
-C POWER6 3.5-47
+C cycles/limb
+C POWER3/PPC630 1.83 (1.5 c/l should be possible)
+C POWER4/PPC970 3 (2.0 c/l should be possible)
+C POWER5 3
+C POWER6 3.5-47
+C POWER7 3
C STATUS
C * Try combining upx+up, and vpx+vp.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/aorsmul_1.asm
--- a/mpn/powerpc64/mode64/aorsmul_1.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/aorsmul_1.asm Thu Nov 03 01:20:56 2011 +0100
@@ -20,12 +20,13 @@
include(`../config.m4')
-C mpn_addmul_1 mpn_submul_1
-C cycles/limb cycles/limb
-C POWER3/PPC630 6-18 6-18
-C POWER4/PPC970 8 8.3
-C POWER5 8 8.25
-C POWER6 16.25 16.75
+C mpn_addmul_1 mpn_submul_1
+C cycles/limb cycles/limb
+C POWER3/PPC630 6-18 6-18
+C POWER4/PPC970 8 8.3
+C POWER5 8 8.25
+C POWER6 16.25 16.75
+C POWER7 3.77 4.9
C TODO
C * Try to reduce the number of needed live registers
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/bdiv_dbm1c.asm
--- a/mpn/powerpc64/mode64/bdiv_dbm1c.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/bdiv_dbm1c.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,13 @@
include(`../config.m4')
-C cycles/limb
+C cycles/limb
C POWER3/PPC630 6-18
C POWER4/PPC970 8.5?
C POWER5 8.5 fluctuating as function of n % 3
C POWER6 15
+C POWER6 15
+C POWER7 4.75
C TODO
C * Nothing to do...
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/dive_1.asm
--- a/mpn/powerpc64/mode64/dive_1.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/dive_1.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,12 +19,13 @@
include(`../config.m4')
-C cycles/limb
-C norm unorm
+C cycles/limb
+C norm unorm
C POWER3/PPC630 13-19
-C POWER4/PPC970 16
-C POWER5 16 16
-C POWER6 37 46
+C POWER4/PPC970 16
+C POWER5 16 16
+C POWER6 37 46
+C POWER7 12 12
C TODO
C * Check if n=1 code is really an improvement. It probably isn't.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/divrem_1.asm
--- a/mpn/powerpc64/mode64/divrem_1.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/divrem_1.asm Thu Nov 03 01:20:56 2011 +0100
@@ -20,12 +20,13 @@
include(`../config.m4')
-C cycles/limb
-C norm unorm frac
-C POWER3/PPC630 16-34 16-34 ~11
-C POWER4/PPC970 29 19
-C POWER5 29 29 ~20
-C POWER6 50 59 ~42
+C cycles/limb
+C norm unorm frac
+C POWER3/PPC630 16-34 16-34 ~11
+C POWER4/PPC970 29 19
+C POWER5 29 29 ~20
+C POWER6 50 59 ~42
+C POWER7 25 25 ~14
C INPUT PARAMETERS
C qp = r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/divrem_2.asm
--- a/mpn/powerpc64/mode64/divrem_2.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/divrem_2.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,12 +19,13 @@
include(`../config.m4')
-C cycles/limb
-C norm frac
+C cycles/limb
+C norm frac
C POWER3/PPC630
-C POWER4/PPC970 ? ?
-C POWER5 37 ?
-C POWER6 62 ?
+C POWER4/PPC970 ? ?
+C POWER5 37 ?
+C POWER6 62 ?
+C POWER6 30.5 ?
C INPUT PARAMETERS
C qp = r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/invert_limb.asm
--- a/mpn/powerpc64/mode64/invert_limb.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/invert_limb.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,12 @@
include(`../config.m4')
-C cycles/limb (approximate)
-C POWER3/PPC630 80
-C POWER4/PPC970 86
-C POWER5 86
-C POWER6 170
+C cycles/limb (approximate)
+C POWER3/PPC630 80
+C POWER4/PPC970 86
+C POWER5 86
+C POWER6 170
+C POWER7 66
ASM_START()
PROLOGUE(mpn_invert_limb)
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/lshiftc.asm
--- a/mpn/powerpc64/mode64/lshiftc.asm Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/lshiftc.asm Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,12 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630 ?
-C POWER4/PPC970 ?
-C POWER5 2.25
-C POWER6 9.5
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 9.5
+C POWER7 2.15
C TODO
C * Try to reduce the number of needed live registers
More information about the gmp-commit
mailing list