[Gmp-commit] /home/hgfiles/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon May 3 02:29:58 CEST 2010


details:   /home/hgfiles/gmp/rev/2ee6f52ba733
changeset: 13587:2ee6f52ba733
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun May 02 21:51:45 2010 +0200
description:
Major overhaul of powerpc support.

details:   /home/hgfiles/gmp/rev/34b31e21e1ee
changeset: 13588:34b31e21e1ee
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun May 02 21:52:13 2010 +0200
description:
Recognise power7.

details:   /home/hgfiles/gmp/rev/dff63af3a2c3
changeset: 13589:dff63af3a2c3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 03 02:26:07 2010 +0200
description:
New file.

details:   /home/hgfiles/gmp/rev/4a1d128f5d4e
changeset: 13590:4a1d128f5d4e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 03 02:28:33 2010 +0200
description:
Complete rewrite.

details:   /home/hgfiles/gmp/rev/acb26f202896
changeset: 13591:acb26f202896
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 03 02:29:52 2010 +0200
description:
Fix some alignments, add cycle counts coherently.

diffstat:

 ChangeLog                             |   10 +
 config.guess                          |    3 +
 configure.in                          |   64 +++-----
 mpn/powerpc64/mode64/aors_n.asm       |   40 +----
 mpn/powerpc64/mode64/aorslshC_n.asm   |   12 +-
 mpn/powerpc64/mode64/aorsmul_1.asm    |    4 +-
 mpn/powerpc64/mode64/invert_limb.asm  |    9 +-
 mpn/powerpc64/mode64/mod_1_4.asm      |  256 ++++++++++++++++++++++++++++++++++
 mpn/powerpc64/mode64/mod_34lsub1.asm  |   11 +-
 mpn/powerpc64/mode64/mul_basecase.asm |   10 +-
 mpn/powerpc64/mode64/rsh1add_n.asm    |   25 +-
 mpn/powerpc64/mode64/rsh1sub_n.asm    |   25 +-
 mpn/powerpc64/sqr_diagonal.asm        |   97 +++++++++---
 13 files changed, 421 insertions(+), 145 deletions(-)

diffs (truncated from 837 to 300 lines):

diff -r 3ee4dc316ee6 -r acb26f202896 ChangeLog
--- a/ChangeLog	Sun May 02 11:24:00 2010 +0200
+++ b/ChangeLog	Mon May 03 02:29:52 2010 +0200
@@ -1,5 +1,15 @@
+2010-05-03  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/sqr_diagonal.asm: Complete rewrite.
+
+	* mpn/powerpc64/mode64/mod_1_4.asm: New file.
+
 2010-05-02  Torbjorn Granlund  <tege at gmplib.org>
 
+	* config.guess: Recognise power7.
+
+	* configure.in: Major overhaul of powerpc support.
+
 	* mpn/powerpc64/p6/lshift.asm: New file.
 	* mpn/powerpc64/p6/lshiftc.asm: Likewise.
 	* mpn/powerpc64/p6/rshift.asm: Likewise.
diff -r 3ee4dc316ee6 -r acb26f202896 config.guess
--- a/config.guess	Sun May 02 11:24:00 2010 +0200
+++ b/config.guess	Mon May 03 02:29:52 2010 +0200
@@ -454,6 +454,9 @@
 #ifdef POWER_6
   case POWER_6:    puts ("power6"); break;
 #endif
+#ifdef POWER_7
+  case POWER_7:    puts ("power7"); break;
+#endif
   default:
     if (_system_configuration.architecture == POWER_RS)
       puts ("power");
diff -r 3ee4dc316ee6 -r acb26f202896 configure.in
--- a/configure.in	Sun May 02 11:24:00 2010 +0200
+++ b/configure.in	Mon May 03 02:29:52 2010 +0200
@@ -885,32 +885,27 @@
         path="powerpc32" ;;
     esac
 
-    # gcc 2.7.2 knows -mcpu=403, 601, 603, 604.
-    # gcc 2.95 adds 401, 505, 602, 603e, ec603e, 604e, 620, 740, 750,
-    #   801, 821, 823, 860.
-    # gcc 3.0 adds 630, rs64a.
-    # gcc 3.1 adds 405, 7400, 7450.
-    # gcc 3.2 adds nothing.
-    # gcc 3.3 adds power3, power4, 8540.  power3 seems to be a synonym for 630.
-    # gcc pre-release 3.4 adds 405fp, 440, 440fp, 970.
-    #
-    # FIXME: The way 603e falls back to 603 for gcc 2.7.2 should be
-    # done for all the others too.  But what would be the correct
-    # arrangements?
-    #
     case $host_cpu in
       powerpc401)   gcc_cflags_cpu="-mcpu=401" ;;
-      powerpc403)   gcc_cflags_cpu="-mcpu=403" ;;
+      powerpc403)   gcc_cflags_cpu="-mcpu=403"
+      		    xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
       powerpc405)   gcc_cflags_cpu="-mcpu=405" ;;
       powerpc505)   gcc_cflags_cpu="-mcpu=505" ;;
-      powerpc601)   gcc_cflags_cpu="-mcpu=601" ;;
-      powerpc602)   gcc_cflags_cpu="-mcpu=602" ;;
-      powerpc603)   gcc_cflags_cpu="-mcpu=603" ;;
-      powerpc603e)  gcc_cflags_cpu="-mcpu=603e -mcpu=603" ;;
-      powerpc604)   gcc_cflags_cpu="-mcpu=604" ;;
-      powerpc604e)  gcc_cflags_cpu="-mcpu=604e -mcpu=604" ;;
+      powerpc601)   gcc_cflags_cpu="-mcpu=601"
+      		    xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
+      powerpc602)   gcc_cflags_cpu="-mcpu=602"
+      		    xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
+      powerpc603)   gcc_cflags_cpu="-mcpu=603"
+      		    xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc603e)  gcc_cflags_cpu="-mcpu=603e -mcpu=603"
+      		    xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc604)   gcc_cflags_cpu="-mcpu=604"
+      		    xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+      powerpc604e)  gcc_cflags_cpu="-mcpu=604e -mcpu=604"
+      		    xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
       powerpc620)   gcc_cflags_cpu="-mcpu=620" ;;
       powerpc630)   gcc_cflags_cpu="-mcpu=630"
+      		    xlc_cflags_arch="-qarch=pwr3"
 		    cpu_path="p3" ;;
       powerpc740)   gcc_cflags_cpu="-mcpu=740" ;;
       powerpc7400 | powerpc7410)
@@ -925,14 +920,21 @@
       powerpc823)   gcc_cflags_cpu="-mcpu=823" ;;
       powerpc860)   gcc_cflags_cpu="-mcpu=860" ;;
       powerpc970)   gcc_cflags_cpu="-mtune=970"
+      		    xlc_cflags_arch="-qarch=970 -qarch=pwr3"
 		    vmx_path="powerpc64/vmx"
 		    cpu_path="p4" ;;
       power4)	    gcc_cflags_cpu="-mtune=power4"
+      		    xlc_cflags_arch="-qarch=pwr4"
 		    cpu_path="p4" ;;
       power5)	    gcc_cflags_cpu="-mtune=power5 -mtune=power4"
+      		    xlc_cflags_arch="-qarch=pwr5"
 		    cpu_path="p5 p4" ;;
       power6)	    gcc_cflags_cpu="-mtune=power6"
+      		    xlc_cflags_arch="-qarch=pwr6"
 		    cpu_path="p6" ;;
+      power7)	    gcc_cflags_cpu="-mtune=power7 -mtune=power5"
+      		    xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
+		    cpu_path="p7 p5 p4" ;;
     esac
 
     case $host in
@@ -944,23 +946,6 @@
 	xlc_32_cflags_maybe="-q32"
 	ar_32_flags="-X32"
 	nm_32_flags="-X32"
-
-        # xlc (what version?) knows -qarch=ppc, ppcgr, 601, 602, 603, 604,
-        # 403, rs64a
-        # -qarch=ppc is needed, so ensure everything falls back to that.
-        # FIXME: Perhaps newer versions know more flavours.
-        #
-	case $host_cpu in
-	  powerpc403)   xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
-	  powerpc601)   xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
-	  powerpc602)   xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
-	  powerpc603)   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
-	  powerpc603e)  xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
-	  powerpc604)   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
-	  powerpc604e)  xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
-	  *)            xlc_cflags_arch="-qarch=ppc" ;;
-        esac
-        ;;
     esac
 
     case $host in
@@ -974,7 +959,8 @@
             gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
             gcc_aix64_cflags_optlist="cpu"
 	    gcc_aix64_ldflags="-Wc,-maix64"
-            xlc_aix64_cflags="-O2 -q64 -qtune=pwr3 -qmaxmem=20000"
+            xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
+            xlc_aix64_cflags_optlist="arch"
 	    xlc_aix64_ldflags="-Wc,-q64"
             # Must indicate object type to ar and nm
 	    ar_aix64_flags="-X64"
@@ -1074,7 +1060,7 @@
     ;;
 
 
-  # POWER
+  # POWER 32-bit
   [power-*-* | power[12]-*-* | power2sc-*-*])
     AC_DEFINE(HAVE_HOST_CPU_FAMILY_power)
     HAVE_HOST_CPU_FAMILY_power=1
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/aors_n.asm
--- a/mpn/powerpc64/mode64/aors_n.asm	Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/aors_n.asm	Mon May 03 02:29:52 2010 +0200
@@ -20,37 +20,11 @@
 
 include(`../config.m4')
 
-C		cycles/limb
-C POWER3/PPC630:     1.5
-C POWER4/PPC970:     2
-
-C   n	   POWER3/PPC630   POWER4/PPC970
-C     1	       17.00	       19.00
-C     2		9.00	       10.49
-C     3		5.33		7.66
-C     4		4.50		5.14
-C     5		4.20		4.80
-C     6		3.83		4.33
-C     7		3.00		3.99
-C     8		2.87		3.55
-C     9		2.89		3.40
-C    10		2.60		3.42
-C    11		2.45		3.15
-C    12		2.41		2.99
-C    13		2.46		3.01
-C    14		2.42		2.97
-C    15		2.20		2.85
-C    50		1.78		2.44
-C   100		1.83		2.20
-C   200		1.55		2.12
-C   400		1.53		2.05
-C  1000		1.98		2.02#
-C  2000		1.50#		2.04
-C  4000		2.55		2.50
-C  8000		2.70		2.45
-C 16000		2.65		5.94
-C 32000		2.62	       16.41
-C 64000		2.73	       18.94
+C		    cycles/limb
+C POWER3/PPC630		 1.5
+C POWER4/PPC970		 2
+C POWER5		 2.25
+C POWER6		 2.63
 
 C This code is a little bit slower for POWER3/PPC630 than the simple code used
 C previously, but it is much faster for POWER4/PPC970.  The reason for the
@@ -162,7 +136,7 @@
 	addi	r4, r4, 32
 	addi	r5, r5, 32
 
-L(oop):	ADDSUBC	r28, r7, r6
+L(top):	ADDSUBC	r28, r7, r6
 	ld	r6, 0(r4)	C load s1 limb
 	ld	r7, 0(r5)	C load s2 limb
 	ADDSUBC	r29, r9, r8
@@ -181,7 +155,7 @@
 	std	r30, 16(r3)
 	std	r31, 24(r3)
 	addi	r3, r3, 32
-	bdnz	L(oop)		C decrement ctr and loop back
+	bdnz	L(top)		C decrement ctr and loop back
 
 L(end):	ADDSUBC	r28, r7, r6
 	ADDSUBC	r29, r9, r8
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/aorslshC_n.asm
--- a/mpn/powerpc64/mode64/aorslshC_n.asm	Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/aorslshC_n.asm	Mon May 03 02:29:52 2010 +0200
@@ -19,11 +19,11 @@
 
 include(`../config.m4')
 
-C		cycles/limb
-C POWER3/PPC630:     1.83	(1.5 c/l should be possible)
-C POWER4/PPC970:     3		(2.0 c/l should be possible)
-C POWER5:	     3
-C POWER6:	     3.5-47
+C		   cycles/limb
+C POWER3/PPC630		 1.83	(1.5 c/l should be possible)
+C POWER4/PPC970		 3	(2.0 c/l should be possible)
+C POWER5		 3
+C POWER6	      3.5-47
 
 C STATUS
 C  * Try combining upx+up, and vpx+vp.
@@ -130,7 +130,7 @@
 	addi	vp, vp, 16
 	addi	vpx, vpx, 24
 
-	ALIGN(16)
+	ALIGN(32)
 L(top):	ldx	u0, rp, up
 	ldx	v0, rp, vp
 	rldimi	s1, v1, LSH, 0
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/aorsmul_1.asm
--- a/mpn/powerpc64/mode64/aorsmul_1.asm	Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/aorsmul_1.asm	Mon May 03 02:29:52 2010 +0200
@@ -24,8 +24,8 @@
 C		cycles/limb	cycles/limb
 C POWER3/PPC630   6-18		   6-18
 C POWER4/PPC970	   8?		    8.3?  not updated for last file revision
-C POWER5	   8		    8.75
-C POWER6	  16		   16.5
+C POWER5	   8		    8.63
+C POWER6	  16.25		   16.75
 
 C TODO
 C  * Try to reduce the number of needed live registers
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/invert_limb.asm
--- a/mpn/powerpc64/mode64/invert_limb.asm	Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/invert_limb.asm	Mon May 03 02:29:52 2010 +0200
@@ -19,10 +19,11 @@
 
 include(`../config.m4')
 
-C		cycles/limb (approximate)
-C POWER3/PPC630:     80
-C POWER4/PPC970:     86
-C POWER5:	     86
+C		   cycles/limb (approximate)
+C POWER3/PPC630		80
+C POWER4/PPC970		86
+C POWER5		86
+C POWER6	       170
 
 ASM_START()
 PROLOGUE(mpn_invert_limb)
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/mod_1_4.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/mode64/mod_1_4.asm	Mon May 03 02:29:52 2010 +0200
@@ -0,0 +1,256 @@
+dnl  PowerPC-64 mpn_mod_1s_4p
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		    cycles/limb


More information about the gmp-commit mailing list