[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Nov 21 01:06:06 CET 2011


details:   /var/hg/gmp/rev/60293e00f755
changeset: 14456:60293e00f755
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Nov 20 21:47:49 2011 +0100
description:
Configure improvements powerpc64 with abi=32.

details:   /var/hg/gmp/rev/34d47e7a3fd4
changeset: 14457:34d47e7a3fd4
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Nov 20 21:49:10 2011 +0100
description:
Provide special powerpc64 add_n/sub_n abi=32 code.

details:   /var/hg/gmp/rev/5e479471bb50
changeset: 14458:5e479471bb50
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Nov 20 22:55:07 2011 +0100
description:
Split x86 CPUs into more subtypes for more accurate passing of gcc flags.

details:   /var/hg/gmp/rev/f29ad42873fe
changeset: 14459:f29ad42873fe
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Nov 20 22:56:02 2011 +0100
description:
Retune.

diffstat:

 ChangeLog                        |   12 ++
 configure.in                     |   34 +++++-
 mpn/powerpc32/p3-p7/aors_n.asm   |  176 +++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/coreisbr/gmp-mparam.h |   36 ++++---
 4 files changed, 235 insertions(+), 23 deletions(-)

diffs (truncated from 347 to 300 lines):

diff -r 42b23f369c5e -r f29ad42873fe ChangeLog
--- a/ChangeLog	Thu Nov 17 22:19:28 2011 +0100
+++ b/ChangeLog	Sun Nov 20 22:56:02 2011 +0100
@@ -1,3 +1,15 @@
+2011-11-20  Torbjorn Granlund  <tege at gmplib.org>
+
+	* configure.in: Split x86 CPUs into more subtypes for more accurate
+	passing of gcc flags.
+
+	* mpn/powerpc32/p3-p7/aors_n.asm: New file.
+
+	* configure.in: Pass -m32 for powerpc64 with abi=32, using via _maybe
+	mechanism.
+
+	* configure.in: Support powerpc32/p3-p7 directory for affected CPUs.
+
 2011-11-17  Torbjorn Granlund  <tege at gmplib.org>
 
 	* tune/speed.c (routine): Add mpn_tabselect.
diff -r 42b23f369c5e -r f29ad42873fe configure.in
--- a/configure.in	Thu Nov 17 22:19:28 2011 +0100
+++ b/configure.in	Sun Nov 20 22:56:02 2011 +0100
@@ -919,7 +919,7 @@
       powerpc620)   gcc_cflags_cpu="-mcpu=620" ;;
       powerpc630)   gcc_cflags_cpu="-mcpu=630"
 		    xlc_cflags_arch="-qarch=pwr3"
-		    cpu_path="p3" ;;
+		    cpu_path="p3 p3-p7" ;;
       powerpc740)   gcc_cflags_cpu="-mcpu=740" ;;
       powerpc7400 | powerpc7410)
 		    gcc_cflags_asm="-Wa,-maltivec"
@@ -935,19 +935,19 @@
       powerpc970)   gcc_cflags_cpu="-mtune=970"
 		    xlc_cflags_arch="-qarch=970 -qarch=pwr3"
 		    vmx_path="powerpc64/vmx"
-		    cpu_path="p4" ;;
+		    cpu_path="p4 p3-p7" ;;
       power4)	    gcc_cflags_cpu="-mtune=power4"
 		    xlc_cflags_arch="-qarch=pwr4"
-		    cpu_path="p4" ;;
+		    cpu_path="p4 p3-p7" ;;
       power5)	    gcc_cflags_cpu="-mtune=power5 -mtune=power4"
 		    xlc_cflags_arch="-qarch=pwr5"
-		    cpu_path="p5 p4" ;;
+		    cpu_path="p5 p4 p3-p7" ;;
       power6)	    gcc_cflags_cpu="-mtune=power6"
 		    xlc_cflags_arch="-qarch=pwr6"
-		    cpu_path="p6" ;;
+		    cpu_path="p6 p3-p7" ;;
       power7)	    gcc_cflags_cpu="-mtune=power7 -mtune=power5"
 		    xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
-		    cpu_path="p7 p5 p4" ;;
+		    cpu_path="p7 p5 p4 p3-p7" ;;
     esac
 
     case $host in
@@ -1012,6 +1012,7 @@
 	    # incompatible with a shared library.
 	    #
 	    abilist="mode64 mode32 $abilist"
+	    gcc_32_cflags_maybe="-m32"
 	    gcc_cflags_opt="-O3 -O2 -O1"	# will this become used?
 	    cclist_mode32="gcc"
 	    gcc_mode32_cflags_maybe="-m32"
@@ -1057,6 +1058,7 @@
 	    # 64-bits.
 	    #
 	    abilist="mode64 mode32 $abilist"
+	    gcc_32_cflags_maybe="-m32"
 	    cclist_mode32="gcc"
 	    gcc_mode32_cflags_maybe="-m32"
 	    gcc_mode32_cflags="-mpowerpc64"
@@ -1534,14 +1536,30 @@
         gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
         gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
         ;;
-      athlon64 | k8 | k10 | bobcat | bulldozer | x86_64)
+      athlon64 | k8 | x86_64)
         gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
         gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
         ;;
-      core2 | corei | coreinhm | coreiwsm | coreisbr)
+      k10)
+        gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
+        gcc_cflags_arch="-march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+        ;;
+      bobcat)
+        gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
+        gcc_cflags_arch="-march=btver1 -march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+        ;;
+      bulldozer)
+        gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
+        gcc_cflags_arch="-march=bdver1 -march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+        ;;
+      core2)
         gcc_cflags_cpu="-mtune=core2 -mtune=k8"
         gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
         ;;
+      corei | coreinhm | coreiwsm | coreisbr)
+        gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+        gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+        ;;
       atom)
         gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
         gcc_cflags_arch="-march=atom -march=pentium3"
diff -r 42b23f369c5e -r f29ad42873fe mpn/powerpc32/p3-p7/aors_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc32/p3-p7/aors_n.asm	Sun Nov 20 22:56:02 2011 +0100
@@ -0,0 +1,176 @@
+dnl  PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          1.5
+C POWER4/PPC970          2
+C POWER5                 2
+C POWER6                 2.78
+C POWER7               2.15-2.87
+
+C This code is based on powerpc64/aors_n.asm.
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C vp	r5
+C n	r6
+
+ifdef(`OPERATION_add_n',`
+  define(ADDSUBC,	adde)
+  define(ADDSUB,	addc)
+  define(func,		mpn_add_n)
+  define(func_nc,	mpn_add_nc)
+  define(GENRVAL,	`addi	r3, r3, 1')
+  define(SETCBR,	`addic	r0, $1, -1')
+  define(CLRCB,		`addic	r0, r0, 0')
+')
+ifdef(`OPERATION_sub_n',`
+  define(ADDSUBC,	subfe)
+  define(ADDSUB,	subfc)
+  define(func,		mpn_sub_n)
+  define(func_nc,	mpn_sub_nc)
+  define(GENRVAL,	`neg	r3, r3')
+  define(SETCBR,	`subfic	r0, $1, 0')
+  define(CLRCB,		`addic	r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+	SETCBR(r7)
+	b	L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+	CLRCB
+L(ent):	stw	r31, -4(r1)
+	stw	r30, -8(r1)
+	stw	r29, -12(r1)
+	stw	r28, -16(r1)
+
+	rlwinm.	r0, r6, 0,30,31	C r0 = n & 3, set cr0
+	cmpwi	cr6, r0, 2
+	addi	r6, r6, 3	C compute count...
+	srwi	r6, r6, 2	C ...for ctr
+	mtctr	r6		C copy count into ctr
+	beq	cr0, L(b00)
+	blt	cr6, L(b01)
+	beq	cr6, L(b10)
+
+L(b11):	lwz	r8, 0(r4)	C load s1 limb
+	lwz	r9, 0(r5)	C load s2 limb
+	lwz	r10, 4(r4)	C load s1 limb
+	lwz	r11, 4(r5)	C load s2 limb
+	lwz	r12, 8(r4)	C load s1 limb
+	addi	r4, r4, 12
+	lwz	r0, 8(r5)	C load s2 limb
+	addi	r5, r5, 12
+	ADDSUBC	r29, r9, r8
+	ADDSUBC	r30, r11, r10
+	ADDSUBC	r31, r0, r12
+	stw	r29, 0(r3)
+	stw	r30, 4(r3)
+	stw	r31, 8(r3)
+	addi	r3, r3, 12
+	bdnz	L(go)
+	b	L(ret)
+
+L(b01):	lwz	r12, 0(r4)	C load s1 limb
+	addi	r4, r4, 4
+	lwz	r0, 0(r5)	C load s2 limb
+	addi	r5, r5, 4
+	ADDSUBC	r31, r0, r12	C add
+	stw	r31, 0(r3)
+	addi	r3, r3, 4
+	bdnz	L(go)
+	b	L(ret)
+
+L(b10):	lwz	r10, 0(r4)	C load s1 limb
+	lwz	r11, 0(r5)	C load s2 limb
+	lwz	r12, 4(r4)	C load s1 limb
+	addi	r4, r4, 8
+	lwz	r0, 4(r5)	C load s2 limb
+	addi	r5, r5, 8
+	ADDSUBC	r30, r11, r10	C add
+	ADDSUBC	r31, r0, r12	C add
+	stw	r30, 0(r3)
+	stw	r31, 4(r3)
+	addi	r3, r3, 8
+	bdnz	L(go)
+	b	L(ret)
+
+L(b00):	C INITCY		C clear/set cy
+L(go):	lwz	r6, 0(r4)	C load s1 limb
+	lwz	r7, 0(r5)	C load s2 limb
+	lwz	r8, 4(r4)	C load s1 limb
+	lwz	r9, 4(r5)	C load s2 limb
+	lwz	r10, 8(r4)	C load s1 limb
+	lwz	r11, 8(r5)	C load s2 limb
+	lwz	r12, 12(r4)	C load s1 limb
+	lwz	r0, 12(r5)	C load s2 limb
+	bdz	L(end)
+
+	addi	r4, r4, 16
+	addi	r5, r5, 16
+
+	ALIGN(16)
+L(top):	ADDSUBC	r28, r7, r6
+	lwz	r6, 0(r4)	C load s1 limb
+	lwz	r7, 0(r5)	C load s2 limb
+	ADDSUBC	r29, r9, r8
+	lwz	r8, 4(r4)	C load s1 limb
+	lwz	r9, 4(r5)	C load s2 limb
+	ADDSUBC	r30, r11, r10
+	lwz	r10, 8(r4)	C load s1 limb
+	lwz	r11, 8(r5)	C load s2 limb
+	ADDSUBC	r31, r0, r12
+	lwz	r12, 12(r4)	C load s1 limb
+	lwz	r0, 12(r5)	C load s2 limb
+	stw	r28, 0(r3)
+	addi	r4, r4, 16
+	stw	r29, 4(r3)
+	addi	r5, r5, 16
+	stw	r30, 8(r3)
+	stw	r31, 12(r3)
+	addi	r3, r3, 16
+	bdnz	L(top)		C decrement ctr and loop back
+
+L(end):	ADDSUBC	r28, r7, r6
+	ADDSUBC	r29, r9, r8
+	ADDSUBC	r30, r11, r10
+	ADDSUBC	r31, r0, r12
+	stw	r28, 0(r3)
+	stw	r29, 4(r3)
+	stw	r30, 8(r3)
+	stw	r31, 12(r3)
+
+L(ret):	lwz	r31, -4(r1)
+	lwz	r30, -8(r1)
+	lwz	r29, -12(r1)
+	lwz	r28, -16(r1)
+
+	subfe	r3, r0, r0	C -cy
+	GENRVAL
+	blr
+EPILOGUE()
diff -r 42b23f369c5e -r f29ad42873fe mpn/x86_64/coreisbr/gmp-mparam.h
--- a/mpn/x86_64/coreisbr/gmp-mparam.h	Thu Nov 17 22:19:28 2011 +0100
+++ b/mpn/x86_64/coreisbr/gmp-mparam.h	Sun Nov 20 22:56:02 2011 +0100
@@ -53,9 +53,13 @@
 #define SQR_TOOM6_THRESHOLD                  0
 #define SQR_TOOM8_THRESHOLD                458
 
-#define MULMOD_BNM1_THRESHOLD               13
+#define MULMID_TOOM42_THRESHOLD             24
+
+#define MULMOD_BNM1_THRESHOLD               14
 #define SQRMOD_BNM1_THRESHOLD               14
 
+#define POWM_SEC_TABLE  4,35,130,713,2080
+


More information about the gmp-commit mailing list