[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Nov 21 01:06:06 CET 2011
details: /var/hg/gmp/rev/60293e00f755
changeset: 14456:60293e00f755
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Nov 20 21:47:49 2011 +0100
description:
Configure improvements powerpc64 with abi=32.
details: /var/hg/gmp/rev/34d47e7a3fd4
changeset: 14457:34d47e7a3fd4
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Nov 20 21:49:10 2011 +0100
description:
Provide special powerpc64 add_n/sub_n abi=32 code.
details: /var/hg/gmp/rev/5e479471bb50
changeset: 14458:5e479471bb50
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Nov 20 22:55:07 2011 +0100
description:
Split x86 CPUs into more subtypes for more accurate passing of gcc flags.
details: /var/hg/gmp/rev/f29ad42873fe
changeset: 14459:f29ad42873fe
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Nov 20 22:56:02 2011 +0100
description:
Retune.
diffstat:
ChangeLog | 12 ++
configure.in | 34 +++++-
mpn/powerpc32/p3-p7/aors_n.asm | 176 +++++++++++++++++++++++++++++++++++++++
mpn/x86_64/coreisbr/gmp-mparam.h | 36 ++++---
4 files changed, 235 insertions(+), 23 deletions(-)
diffs (truncated from 347 to 300 lines):
diff -r 42b23f369c5e -r f29ad42873fe ChangeLog
--- a/ChangeLog Thu Nov 17 22:19:28 2011 +0100
+++ b/ChangeLog Sun Nov 20 22:56:02 2011 +0100
@@ -1,3 +1,15 @@
+2011-11-20 Torbjorn Granlund <tege at gmplib.org>
+
+ * configure.in: Split x86 CPUs into more subtypes for more accurate
+ passing of gcc flags.
+
+ * mpn/powerpc32/p3-p7/aors_n.asm: New file.
+
+ * configure.in: Pass -m32 for powerpc64 with abi=32, using via _maybe
+ mechanism.
+
+ * configure.in: Support powerpc32/p3-p7 directory for affected CPUs.
+
2011-11-17 Torbjorn Granlund <tege at gmplib.org>
* tune/speed.c (routine): Add mpn_tabselect.
diff -r 42b23f369c5e -r f29ad42873fe configure.in
--- a/configure.in Thu Nov 17 22:19:28 2011 +0100
+++ b/configure.in Sun Nov 20 22:56:02 2011 +0100
@@ -919,7 +919,7 @@
powerpc620) gcc_cflags_cpu="-mcpu=620" ;;
powerpc630) gcc_cflags_cpu="-mcpu=630"
xlc_cflags_arch="-qarch=pwr3"
- cpu_path="p3" ;;
+ cpu_path="p3 p3-p7" ;;
powerpc740) gcc_cflags_cpu="-mcpu=740" ;;
powerpc7400 | powerpc7410)
gcc_cflags_asm="-Wa,-maltivec"
@@ -935,19 +935,19 @@
powerpc970) gcc_cflags_cpu="-mtune=970"
xlc_cflags_arch="-qarch=970 -qarch=pwr3"
vmx_path="powerpc64/vmx"
- cpu_path="p4" ;;
+ cpu_path="p4 p3-p7" ;;
power4) gcc_cflags_cpu="-mtune=power4"
xlc_cflags_arch="-qarch=pwr4"
- cpu_path="p4" ;;
+ cpu_path="p4 p3-p7" ;;
power5) gcc_cflags_cpu="-mtune=power5 -mtune=power4"
xlc_cflags_arch="-qarch=pwr5"
- cpu_path="p5 p4" ;;
+ cpu_path="p5 p4 p3-p7" ;;
power6) gcc_cflags_cpu="-mtune=power6"
xlc_cflags_arch="-qarch=pwr6"
- cpu_path="p6" ;;
+ cpu_path="p6 p3-p7" ;;
power7) gcc_cflags_cpu="-mtune=power7 -mtune=power5"
xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
- cpu_path="p7 p5 p4" ;;
+ cpu_path="p7 p5 p4 p3-p7" ;;
esac
case $host in
@@ -1012,6 +1012,7 @@
# incompatible with a shared library.
#
abilist="mode64 mode32 $abilist"
+ gcc_32_cflags_maybe="-m32"
gcc_cflags_opt="-O3 -O2 -O1" # will this become used?
cclist_mode32="gcc"
gcc_mode32_cflags_maybe="-m32"
@@ -1057,6 +1058,7 @@
# 64-bits.
#
abilist="mode64 mode32 $abilist"
+ gcc_32_cflags_maybe="-m32"
cclist_mode32="gcc"
gcc_mode32_cflags_maybe="-m32"
gcc_mode32_cflags="-mpowerpc64"
@@ -1534,14 +1536,30 @@
gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
;;
- athlon64 | k8 | k10 | bobcat | bulldozer | x86_64)
+ athlon64 | k8 | x86_64)
gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
;;
- core2 | corei | coreinhm | coreiwsm | coreisbr)
+ k10)
+ gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
+ gcc_cflags_arch="-march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+ ;;
+ bobcat)
+ gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
+ gcc_cflags_arch="-march=btver1 -march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+ ;;
+ bulldozer)
+ gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
+ gcc_cflags_arch="-march=bdver1 -march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+ ;;
+ core2)
gcc_cflags_cpu="-mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
;;
+ corei | coreinhm | coreiwsm | coreisbr)
+ gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+ gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+ ;;
atom)
gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
gcc_cflags_arch="-march=atom -march=pentium3"
diff -r 42b23f369c5e -r f29ad42873fe mpn/powerpc32/p3-p7/aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc32/p3-p7/aors_n.asm Sun Nov 20 22:56:02 2011 +0100
@@ -0,0 +1,176 @@
+dnl PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 1.5
+C POWER4/PPC970 2
+C POWER5 2
+C POWER6 2.78
+C POWER7 2.15-2.87
+
+C This code is based on powerpc64/aors_n.asm.
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C vp r5
+C n r6
+
+ifdef(`OPERATION_add_n',`
+ define(ADDSUBC, adde)
+ define(ADDSUB, addc)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)
+ define(GENRVAL, `addi r3, r3, 1')
+ define(SETCBR, `addic r0, $1, -1')
+ define(CLRCB, `addic r0, r0, 0')
+')
+ifdef(`OPERATION_sub_n',`
+ define(ADDSUBC, subfe)
+ define(ADDSUB, subfc)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)
+ define(GENRVAL, `neg r3, r3')
+ define(SETCBR, `subfic r0, $1, 0')
+ define(CLRCB, `addic r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+ SETCBR(r7)
+ b L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+ CLRCB
+L(ent): stw r31, -4(r1)
+ stw r30, -8(r1)
+ stw r29, -12(r1)
+ stw r28, -16(r1)
+
+ rlwinm. r0, r6, 0,30,31 C r0 = n & 3, set cr0
+ cmpwi cr6, r0, 2
+ addi r6, r6, 3 C compute count...
+ srwi r6, r6, 2 C ...for ctr
+ mtctr r6 C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ beq cr6, L(b10)
+
+L(b11): lwz r8, 0(r4) C load s1 limb
+ lwz r9, 0(r5) C load s2 limb
+ lwz r10, 4(r4) C load s1 limb
+ lwz r11, 4(r5) C load s2 limb
+ lwz r12, 8(r4) C load s1 limb
+ addi r4, r4, 12
+ lwz r0, 8(r5) C load s2 limb
+ addi r5, r5, 12
+ ADDSUBC r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ stw r29, 0(r3)
+ stw r30, 4(r3)
+ stw r31, 8(r3)
+ addi r3, r3, 12
+ bdnz L(go)
+ b L(ret)
+
+L(b01): lwz r12, 0(r4) C load s1 limb
+ addi r4, r4, 4
+ lwz r0, 0(r5) C load s2 limb
+ addi r5, r5, 4
+ ADDSUBC r31, r0, r12 C add
+ stw r31, 0(r3)
+ addi r3, r3, 4
+ bdnz L(go)
+ b L(ret)
+
+L(b10): lwz r10, 0(r4) C load s1 limb
+ lwz r11, 0(r5) C load s2 limb
+ lwz r12, 4(r4) C load s1 limb
+ addi r4, r4, 8
+ lwz r0, 4(r5) C load s2 limb
+ addi r5, r5, 8
+ ADDSUBC r30, r11, r10 C add
+ ADDSUBC r31, r0, r12 C add
+ stw r30, 0(r3)
+ stw r31, 4(r3)
+ addi r3, r3, 8
+ bdnz L(go)
+ b L(ret)
+
+L(b00): C INITCY C clear/set cy
+L(go): lwz r6, 0(r4) C load s1 limb
+ lwz r7, 0(r5) C load s2 limb
+ lwz r8, 4(r4) C load s1 limb
+ lwz r9, 4(r5) C load s2 limb
+ lwz r10, 8(r4) C load s1 limb
+ lwz r11, 8(r5) C load s2 limb
+ lwz r12, 12(r4) C load s1 limb
+ lwz r0, 12(r5) C load s2 limb
+ bdz L(end)
+
+ addi r4, r4, 16
+ addi r5, r5, 16
+
+ ALIGN(16)
+L(top): ADDSUBC r28, r7, r6
+ lwz r6, 0(r4) C load s1 limb
+ lwz r7, 0(r5) C load s2 limb
+ ADDSUBC r29, r9, r8
+ lwz r8, 4(r4) C load s1 limb
+ lwz r9, 4(r5) C load s2 limb
+ ADDSUBC r30, r11, r10
+ lwz r10, 8(r4) C load s1 limb
+ lwz r11, 8(r5) C load s2 limb
+ ADDSUBC r31, r0, r12
+ lwz r12, 12(r4) C load s1 limb
+ lwz r0, 12(r5) C load s2 limb
+ stw r28, 0(r3)
+ addi r4, r4, 16
+ stw r29, 4(r3)
+ addi r5, r5, 16
+ stw r30, 8(r3)
+ stw r31, 12(r3)
+ addi r3, r3, 16
+ bdnz L(top) C decrement ctr and loop back
+
+L(end): ADDSUBC r28, r7, r6
+ ADDSUBC r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ stw r28, 0(r3)
+ stw r29, 4(r3)
+ stw r30, 8(r3)
+ stw r31, 12(r3)
+
+L(ret): lwz r31, -4(r1)
+ lwz r30, -8(r1)
+ lwz r29, -12(r1)
+ lwz r28, -16(r1)
+
+ subfe r3, r0, r0 C -cy
+ GENRVAL
+ blr
+EPILOGUE()
diff -r 42b23f369c5e -r f29ad42873fe mpn/x86_64/coreisbr/gmp-mparam.h
--- a/mpn/x86_64/coreisbr/gmp-mparam.h Thu Nov 17 22:19:28 2011 +0100
+++ b/mpn/x86_64/coreisbr/gmp-mparam.h Sun Nov 20 22:56:02 2011 +0100
@@ -53,9 +53,13 @@
#define SQR_TOOM6_THRESHOLD 0
#define SQR_TOOM8_THRESHOLD 458
-#define MULMOD_BNM1_THRESHOLD 13
+#define MULMID_TOOM42_THRESHOLD 24
+
+#define MULMOD_BNM1_THRESHOLD 14
#define SQRMOD_BNM1_THRESHOLD 14
+#define POWM_SEC_TABLE 4,35,130,713,2080
+
More information about the gmp-commit
mailing list