[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Nov 25 23:58:16 CET 2011
details: /var/hg/gmp/rev/d6c6cdd7ca6b
changeset: 14478:d6c6cdd7ca6b
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Nov 25 23:55:30 2011 +0100
description:
Overhaul x86/x86_64 support, merging three case statements into one.
details: /var/hg/gmp/rev/5faab8df9eb2
changeset: 14479:5faab8df9eb2
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Nov 25 23:57:06 2011 +0100
description:
Many new gmp-mparam.h file for 64-bit CPUs in 32-bit mode.
details: /var/hg/gmp/rev/06e562700261
changeset: 14480:06e562700261
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Nov 25 23:57:13 2011 +0100
description:
*** empty log message ***
diffstat:
ChangeLog | 7 +
configure.in | 112 +++++++++++++++---------------
mpn/x86/bobcat/gmp-mparam.h | 141 ++++++++++++++++++++++++++++++++++++++
mpn/x86/core2/gmp-mparam.h | 141 ++++++++++++++++++++++++++++++++++++++
mpn/x86/coreinhm/gmp-mparam.h | 141 ++++++++++++++++++++++++++++++++++++++
mpn/x86/coreisbr/gmp-mparam.h | 140 ++++++++++++++++++++++++++++++++++++++
mpn/x86/k10/gmp-mparam.h | 142 +++++++++++++++++++++++++++++++++++++++
mpn/x86/k8/gmp-mparam.h | 144 +++++++++++++++++++++++++++++++++++++++
mpn/x86/nano/gmp-mparam.h | 152 ++++++++++++++++++++++++++++++++++++++++++
9 files changed, 1064 insertions(+), 56 deletions(-)
diffs (truncated from 1269 to 300 lines):
diff -r da32b78d3376 -r 06e562700261 ChangeLog
--- a/ChangeLog Thu Nov 24 22:11:22 2011 +0100
+++ b/ChangeLog Fri Nov 25 23:57:13 2011 +0100
@@ -1,3 +1,10 @@
+2011-11-25 Torbjorn Granlund <tege at gmplib.org>
+
+ * x86/*: Many new gmp-mparam.h file for 64-bit CPUs in 32-bit mode.
+
+ * configure.in: Overhaul x86/x86_64 support, merging three case
+ statements into one.
+
2011-11-24 Torbjorn Granlund <tege at gmplib.org>
* doc/gmp.texi (Formatted Output Strings): Clarify rules for mpf_t
diff -r da32b78d3376 -r 06e562700261 configure.in
--- a/configure.in Thu Nov 24 22:11:22 2011 +0100
+++ b/configure.in Fri Nov 25 23:57:13 2011 +0100
@@ -1468,46 +1468,62 @@
i386*)
gcc_cflags_cpu="-mtune=i386 -mcpu=i386 -m386"
gcc_cflags_arch="-march=i386"
+ path="x86"
;;
i486*)
gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
gcc_cflags_arch="-march=i486"
+ path="x86/i486 x86"
;;
i586 | pentium)
gcc_cflags_cpu="-mtune=pentium -mcpu=pentium -m486"
gcc_cflags_arch="-march=pentium"
+ path="x86/pentium x86"
;;
pentiummmx)
gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
gcc_cflags_arch="-march=pentium-mmx -march=pentium"
+ path="x86/pentium/mmx x86/pentium x86"
;;
i686 | pentiumpro)
gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=pentiumpro -march=pentium"
+ path="x86/p6 x86"
;;
pentium2)
gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
+ path="x86/p6/mmx x86/p6 x86"
;;
- pentium3 | pentiumm)
+ pentium3)
gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+ path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ ;;
+ pentiumm)
+ gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+ path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
;;
k6)
gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
gcc_cflags_arch="-march=k6"
+ path="x86/k6/mmx x86/k6 x86"
;;
k62)
gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
gcc_cflags_arch="-march=k6-2 -march=k6"
+ path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
;;
k63)
gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
gcc_cflags_arch="-march=k6-3 -march=k6"
+ path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
;;
geode)
gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
gcc_cflags_arch="-march=k6-3 -march=k6"
+ path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
;;
athlon)
# Athlon instruction costs are close to P6 (3 cycle load latency,
@@ -1515,6 +1531,7 @@
# know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
+ path="x86/k7/mmx x86/k7 x86"
;;
i786 | pentium4)
# pentiumpro is the primary fallback when gcc doesn't know pentium4.
@@ -1524,77 +1541,84 @@
gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
gcc_64_cflags_cpu="-mtune=nocona"
+ path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
+ path_64="x86_64/pentium4 x86_64"
;;
viac32)
# Not sure of the best fallbacks here for -mcpu.
# c3-2 has sse and mmx, so pentium3 is good for -march.
gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
+ path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
;;
viac3*)
# Not sure of the best fallbacks here.
gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
+ path="x86/pentium/mmx x86/pentium x86"
;;
athlon64 | k8 | x86_64)
gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
+ path="x86/k8 x86"
+ path_64="x86_64/k8 x86_64"
;;
k10)
gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
- gcc_cflags_arch="-march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+ gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+ path="x86/k10 x86/k8 x86"
+ path_64="x86_64/k10 x86_64/k8 x86_64"
;;
bobcat)
gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
- gcc_cflags_arch="-march=btver1 -march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+ gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+ path="x86/bobcat x86"
+ path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
;;
- bulldozer)
+ bulldozer | bd1)
gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
- gcc_cflags_arch="-march=bdver1 -march=amdfam10 -mtune=k8 -march=k8~-mno-sse2"
+ gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+ path="x86/bd1 x86"
+ path_64="x86_64/bd1 x86_64"
;;
core2)
gcc_cflags_cpu="-mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
- ;;
- corei | coreinhm | coreiwsm | coreisbr)
+ path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ path_64="x86_64/core2 x86_64"
+ ;;
+ corei | coreinhm | coreiwsm)
gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+ path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ path_64="x86_64/coreinhm x86_64/core2 x86_64"
+ ;;
+ coreisbr)
+ gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+ gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+ path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
;;
atom)
gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
gcc_cflags_arch="-march=atom -march=pentium3"
+ path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
+ path_64="x86_64/atom x86_64"
+ ;;
+ nano)
+ gcc_cflags_cpu="-mtune=nano"
+ gcc_cflags_arch="-march=nano"
+ path="x86/nano x86"
+ path_64="x86_64/nano x86_64"
;;
*)
gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
gcc_cflags_arch="-march=i486"
+ path="x86"
+ path_64="x86_64"
;;
esac
- case $host_cpu in
- i386*) path="x86" ;;
- i486*) path="x86/i486 x86" ;;
- i586 | pentium) path="x86/pentium x86" ;;
- pentiummmx) path="x86/pentium/mmx x86/pentium x86" ;;
- i686 | pentiumpro) path="x86/p6 x86" ;;
- pentium2) path="x86/p6/mmx x86/p6 x86" ;;
- pentium3) path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86";;
- pentiumm | core2 | corei | coreinhm | coreiwsm | coreisbr)
- path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86";;
- [k6[23]]) path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86" ;;
- k6) path="x86/k6/mmx x86/k6 x86" ;;
- geode) path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86" ;;
- # we don't have any specific 32-bit code for athlon64/opteron, the
- # athlon code should be reasonable
- athlon | athlon64 | k8 | k10 | bobcat | bulldozer)
- path="x86/k7/mmx x86/k7 x86" ;;
- i786 | pentium4) path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86" ;;
- # VIA/Centaur processors, sold as CyrixIII and C3.
- viac32) path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86";;
- viac3*) path="x86/pentium/mmx x86/pentium x86";;
- atom) path="x86/atom/sse2 x86/atom/mmx x86/atom x86" ;;
- *) path="x86" ;;
- esac
-
case $host in
X86_64_PATTERN)
cclist_64="gcc"
@@ -1604,34 +1628,10 @@
SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
cyclecounter_size_64=2
abilist="64 32"
- path_64="x86_64"
if test "$enable_assembly" = "yes" ; then
extra_functions_64="invert_limb_table"
fi
- case $host_cpu in
- x86_64)
- ;;
- k10 | bulldozer)
- path_64="x86_64/k10 x86_64/k8 $path_64" ;;
- athlon64 | k8)
- path_64="x86_64/k8 $path_64" ;;
- bobcat)
- path_64="x86_64/bobcat x86_64/k10 x86_64/k8 $path_64" ;;
- pentium4)
- path_64="x86_64/pentium4 $path_64" ;;
- core2)
- path_64="x86_64/core2 $path_64" ;;
- corei | coreinhm | coreiwsm)
- path_64="x86_64/coreinhm x86_64/core2 $path_64" ;;
- coreisbr)
- path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 $path_64" ;;
- atom)
- path_64="x86_64/atom $path_64" ;;
- nano)
- path_64="x86_64/nano $path_64" ;;
- esac
-
case $host in
*-*-solaris*)
# Sun cc.
diff -r da32b78d3376 -r 06e562700261 mpn/x86/bobcat/gmp-mparam.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/bobcat/gmp-mparam.h Fri Nov 25 23:57:13 2011 +0100
@@ -0,0 +1,141 @@
+/* x86/bobcat gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 12
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 40
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 85
+#define MUL_TOOM44_THRESHOLD 147
+#define MUL_TOOM6H_THRESHOLD 270
+#define MUL_TOOM8H_THRESHOLD 454
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 107
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 111
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 38
+#define SQR_TOOM3_THRESHOLD 101
+#define SQR_TOOM4_THRESHOLD 220
+#define SQR_TOOM6_THRESHOLD 303
+#define SQR_TOOM8_THRESHOLD 454
+
+#define MULMID_TOOM42_THRESHOLD 76
More information about the gmp-commit
mailing list