[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Nov 3 01:21:06 CET 2011


details:   /var/hg/gmp/rev/e86cdcbabc9d
changeset: 14415:e86cdcbabc9d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Nov 03 01:02:27 2011 +0100
description:
Provide gmp-mparam.h for POWER7.

details:   /var/hg/gmp/rev/cdb092533e53
changeset: 14416:cdb092533e53
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Nov 03 01:19:16 2011 +0100
description:
Add POWER7 cycle counts.

details:   /var/hg/gmp/rev/24ef457368dc
changeset: 14417:24ef457368dc
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Nov 03 01:20:56 2011 +0100
description:
*** empty log message ***

diffstat:

 ChangeLog                                 |    4 +
 mpn/powerpc64/com.asm                     |    9 +-
 mpn/powerpc64/copyd.asm                   |    9 +-
 mpn/powerpc64/copyi.asm                   |    9 +-
 mpn/powerpc64/logops_n.asm                |    9 +-
 mpn/powerpc64/lshift.asm                  |   11 +-
 mpn/powerpc64/mode64/aors_n.asm           |   11 +-
 mpn/powerpc64/mode64/aorslshC_n.asm       |   11 +-
 mpn/powerpc64/mode64/aorsmul_1.asm        |   13 +-
 mpn/powerpc64/mode64/bdiv_dbm1c.asm       |    4 +-
 mpn/powerpc64/mode64/dive_1.asm           |   11 +-
 mpn/powerpc64/mode64/divrem_1.asm         |   13 +-
 mpn/powerpc64/mode64/divrem_2.asm         |   11 +-
 mpn/powerpc64/mode64/invert_limb.asm      |   11 +-
 mpn/powerpc64/mode64/lshiftc.asm          |   11 +-
 mpn/powerpc64/mode64/mod_1_1.asm          |   11 +-
 mpn/powerpc64/mode64/mod_1_4.asm          |   11 +-
 mpn/powerpc64/mode64/mod_34lsub1.asm      |   11 +-
 mpn/powerpc64/mode64/mode1o.asm           |   10 +-
 mpn/powerpc64/mode64/mul_1.asm            |   11 +-
 mpn/powerpc64/mode64/mul_basecase.asm     |   10 +-
 mpn/powerpc64/mode64/p5/gmp-mparam.h      |    2 +-
 mpn/powerpc64/mode64/p6/gmp-mparam.h      |    2 +-
 mpn/powerpc64/mode64/p7/gmp-mparam.h      |  155 ++++++++++++++++++++++++++++++
 mpn/powerpc64/mode64/rsh1add_n.asm        |   11 +-
 mpn/powerpc64/mode64/rsh1sub_n.asm        |   11 +-
 mpn/powerpc64/mode64/sqr_diag_addlsh1.asm |   11 +-
 mpn/powerpc64/rshift.asm                  |   11 +-
 28 files changed, 303 insertions(+), 111 deletions(-)

diffs (truncated from 685 to 300 lines):

diff -r ead1147b1211 -r 24ef457368dc ChangeLog
--- a/ChangeLog	Wed Nov 02 15:09:48 2011 +0100
+++ b/ChangeLog	Thu Nov 03 01:20:56 2011 +0100
@@ -1,3 +1,7 @@
+2011-11-03  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/mode64/p7/gmp-mparam.h: New file.
+
 2011-11-02  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/s390_64/invert_limb.asm: Slight optimisation.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/com.asm
--- a/mpn/powerpc64/com.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/com.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,9 +19,12 @@
 
 include(`../config.m4')
 
-C		cycles/limb
-C POWER3/PPC630:     1?
-C POWER4/PPC970:     1.6
+C                  cycles/limb
+C POWER3/PPC630          1?
+C POWER4/PPC970          1.6
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.45
 
 C TODO
 C  * 8-way unrolling brings timing down to about 1.3 cycles/limb.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/copyd.asm
--- a/mpn/powerpc64/copyd.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/copyd.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,9 +19,12 @@
 
 include(`../config.m4')
 
-C		cycles/limb
-C POWER3/PPC630:     1
-C POWER4/PPC970:     1
+C                  cycles/limb
+C POWER3/PPC630          1
+C POWER4/PPC970          1
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.4
 
 C INPUT PARAMETERS
 C rp	r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/copyi.asm
--- a/mpn/powerpc64/copyi.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/copyi.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,9 +19,12 @@
 
 include(`../config.m4')
 
-C		cycles/limb
-C POWER3/PPC630:     1
-C POWER4/PPC970:     1
+C                  cycles/limb
+C POWER3/PPC630          1
+C POWER4/PPC970          1
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.4
 
 C INPUT PARAMETERS
 C rp	r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/logops_n.asm
--- a/mpn/powerpc64/logops_n.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/logops_n.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -20,9 +20,12 @@
 
 include(`../config.m4')
 
-C		cycles/limb
-C POWER3/PPC630:     1.75
-C POWER4/PPC970:     2.10
+C                  cycles/limb
+C POWER3/PPC630          1.75
+C POWER4/PPC970          2.10
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.75
 
 C   n	   POWER3/PPC630   POWER4/PPC970
 C     1	       15.00	       15.33
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/lshift.asm
--- a/mpn/powerpc64/lshift.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/lshift.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,12 @@
 
 include(`../config.m4')
 
-C		    cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
-C POWER5		 2.25
-C POWER6		 9.75
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          ?
+C POWER5                 2.25
+C POWER6                 9.75
+C POWER7                 2.15
 
 C TODO
 C  * Try to reduce the number of needed live registers
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/aors_n.asm
--- a/mpn/powerpc64/mode64/aors_n.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/aors_n.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -20,11 +20,12 @@
 
 include(`../config.m4')
 
-C		    cycles/limb
-C POWER3/PPC630		 1.5
-C POWER4/PPC970		 2
-C POWER5		 2.25
-C POWER6		 2.63
+C                   cycles/limb
+C POWER3/PPC630          1.5
+C POWER4/PPC970          2
+C POWER5                 2.25
+C POWER6                 2.63
+C POWER7               2.25-2.87
 
 C This code is a little bit slower for POWER3/PPC630 than the simple code used
 C previously, but it is much faster for POWER4/PPC970.  The reason for the
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/aorslshC_n.asm
--- a/mpn/powerpc64/mode64/aorslshC_n.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/aorslshC_n.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -17,11 +17,12 @@
 dnl  You should have received a copy of the GNU Lesser General Public License
 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-C		   cycles/limb
-C POWER3/PPC630		 1.83	(1.5 c/l should be possible)
-C POWER4/PPC970		 3	(2.0 c/l should be possible)
-C POWER5		 3
-C POWER6	      3.5-47
+C                  cycles/limb
+C POWER3/PPC630          1.83   (1.5 c/l should be possible)
+C POWER4/PPC970          3      (2.0 c/l should be possible)
+C POWER5                 3
+C POWER6              3.5-47
+C POWER7                 3
 
 C STATUS
 C  * Try combining upx+up, and vpx+vp.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/aorsmul_1.asm
--- a/mpn/powerpc64/mode64/aorsmul_1.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/aorsmul_1.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -20,12 +20,13 @@
 
 include(`../config.m4')
 
-C		mpn_addmul_1	mpn_submul_1
-C		cycles/limb	cycles/limb
-C POWER3/PPC630   6-18		   6-18
-C POWER4/PPC970	   8		    8.3
-C POWER5	   8		    8.25
-C POWER6	  16.25		   16.75
+C               mpn_addmul_1    mpn_submul_1
+C               cycles/limb     cycles/limb
+C POWER3/PPC630   6-18             6-18
+C POWER4/PPC970    8                8.3
+C POWER5           8                8.25
+C POWER6          16.25            16.75
+C POWER7           3.77             4.9
 
 C TODO
 C  * Try to reduce the number of needed live registers
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/bdiv_dbm1c.asm
--- a/mpn/powerpc64/mode64/bdiv_dbm1c.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/bdiv_dbm1c.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,13 @@
 
 include(`../config.m4')
 
-C		  cycles/limb
+C                 cycles/limb
 C POWER3/PPC630       6-18
 C POWER4/PPC970       8.5?
 C POWER5              8.5  fluctuating as function of n % 3
 C POWER6             15
+C POWER6             15
+C POWER7              4.75
 
 C TODO
 C  * Nothing to do...
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/dive_1.asm
--- a/mpn/powerpc64/mode64/dive_1.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/dive_1.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,12 +19,13 @@
 
 include(`../config.m4')
 
-C			cycles/limb
-C			norm	unorm
+C                       cycles/limb
+C                       norm    unorm
 C POWER3/PPC630        13-19
-C POWER4/PPC970		16
-C POWER5		16	16
-C POWER6		37	46
+C POWER4/PPC970         16
+C POWER5                16      16
+C POWER6                37      46
+C POWER7                12      12
 
 C TODO
 C  * Check if n=1 code is really an improvement.  It probably isn't.
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/divrem_1.asm
--- a/mpn/powerpc64/mode64/divrem_1.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/divrem_1.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -20,12 +20,13 @@
 
 include(`../config.m4')
 
-C			    cycles/limb
-C			norm	unorm	frac
-C POWER3/PPC630		16-34	16-34	~11
-C POWER4/PPC970		 29		 19
-C POWER5		 29	 29	~20
-C POWER6		 50	 59	~42
+C                           cycles/limb
+C                       norm    unorm   frac
+C POWER3/PPC630         16-34   16-34   ~11
+C POWER4/PPC970          29              19
+C POWER5                 29      29     ~20
+C POWER6                 50      59     ~42
+C POWER7                 25      25     ~14
 
 C INPUT PARAMETERS
 C qp  = r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/divrem_2.asm
--- a/mpn/powerpc64/mode64/divrem_2.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/divrem_2.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,12 +19,13 @@
 
 include(`../config.m4')
 
-C			cycles/limb
-C			norm	frac
+C                       cycles/limb
+C                       norm    frac
 C POWER3/PPC630
-C POWER4/PPC970		?	?
-C POWER5		37	?
-C POWER6		62	?
+C POWER4/PPC970         ?       ?
+C POWER5                37      ?
+C POWER6                62      ?
+C POWER6                30.5    ?
 
 C INPUT PARAMETERS
 C qp  = r3
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/invert_limb.asm
--- a/mpn/powerpc64/mode64/invert_limb.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/invert_limb.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,12 @@
 
 include(`../config.m4')
 
-C		   cycles/limb (approximate)
-C POWER3/PPC630		80
-C POWER4/PPC970		86
-C POWER5		86
-C POWER6	       170
+C                  cycles/limb (approximate)
+C POWER3/PPC630         80
+C POWER4/PPC970         86
+C POWER5                86
+C POWER6               170
+C POWER7                66
 
 ASM_START()
 PROLOGUE(mpn_invert_limb)
diff -r ead1147b1211 -r 24ef457368dc mpn/powerpc64/mode64/lshiftc.asm
--- a/mpn/powerpc64/mode64/lshiftc.asm	Wed Nov 02 15:09:48 2011 +0100
+++ b/mpn/powerpc64/mode64/lshiftc.asm	Thu Nov 03 01:20:56 2011 +0100
@@ -19,11 +19,12 @@
 
 include(`../config.m4')
 
-C		    cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
-C POWER5		 2.25
-C POWER6		 9.5
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          ?
+C POWER5                 2.25
+C POWER6                 9.5
+C POWER7                 2.15
 
 C TODO
 C  * Try to reduce the number of needed live registers


More information about the gmp-commit mailing list