[Gmp-commit] /home/hgfiles/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon May 3 02:29:58 CEST 2010
details: /home/hgfiles/gmp/rev/2ee6f52ba733
changeset: 13587:2ee6f52ba733
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 02 21:51:45 2010 +0200
description:
Major overhaul of powerpc support.
details: /home/hgfiles/gmp/rev/34b31e21e1ee
changeset: 13588:34b31e21e1ee
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 02 21:52:13 2010 +0200
description:
Recognise power7.
details: /home/hgfiles/gmp/rev/dff63af3a2c3
changeset: 13589:dff63af3a2c3
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 03 02:26:07 2010 +0200
description:
New file.
details: /home/hgfiles/gmp/rev/4a1d128f5d4e
changeset: 13590:4a1d128f5d4e
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 03 02:28:33 2010 +0200
description:
Complete rewrite.
details: /home/hgfiles/gmp/rev/acb26f202896
changeset: 13591:acb26f202896
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 03 02:29:52 2010 +0200
description:
Fix some alignments, add cycle counts coherently.
diffstat:
ChangeLog | 10 +
config.guess | 3 +
configure.in | 64 +++-----
mpn/powerpc64/mode64/aors_n.asm | 40 +----
mpn/powerpc64/mode64/aorslshC_n.asm | 12 +-
mpn/powerpc64/mode64/aorsmul_1.asm | 4 +-
mpn/powerpc64/mode64/invert_limb.asm | 9 +-
mpn/powerpc64/mode64/mod_1_4.asm | 256 ++++++++++++++++++++++++++++++++++
mpn/powerpc64/mode64/mod_34lsub1.asm | 11 +-
mpn/powerpc64/mode64/mul_basecase.asm | 10 +-
mpn/powerpc64/mode64/rsh1add_n.asm | 25 +-
mpn/powerpc64/mode64/rsh1sub_n.asm | 25 +-
mpn/powerpc64/sqr_diagonal.asm | 97 +++++++++---
13 files changed, 421 insertions(+), 145 deletions(-)
diffs (truncated from 837 to 300 lines):
diff -r 3ee4dc316ee6 -r acb26f202896 ChangeLog
--- a/ChangeLog Sun May 02 11:24:00 2010 +0200
+++ b/ChangeLog Mon May 03 02:29:52 2010 +0200
@@ -1,5 +1,15 @@
+2010-05-03 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/sqr_diagonal.asm: Complete rewrite.
+
+ * mpn/powerpc64/mode64/mod_1_4.asm: New file.
+
2010-05-02 Torbjorn Granlund <tege at gmplib.org>
+ * config.guess: Recognise power7.
+
+ * configure.in: Major overhaul of powerpc support.
+
* mpn/powerpc64/p6/lshift.asm: New file.
* mpn/powerpc64/p6/lshiftc.asm: Likewise.
* mpn/powerpc64/p6/rshift.asm: Likewise.
diff -r 3ee4dc316ee6 -r acb26f202896 config.guess
--- a/config.guess Sun May 02 11:24:00 2010 +0200
+++ b/config.guess Mon May 03 02:29:52 2010 +0200
@@ -454,6 +454,9 @@
#ifdef POWER_6
case POWER_6: puts ("power6"); break;
#endif
+#ifdef POWER_7
+ case POWER_7: puts ("power7"); break;
+#endif
default:
if (_system_configuration.architecture == POWER_RS)
puts ("power");
diff -r 3ee4dc316ee6 -r acb26f202896 configure.in
--- a/configure.in Sun May 02 11:24:00 2010 +0200
+++ b/configure.in Mon May 03 02:29:52 2010 +0200
@@ -885,32 +885,27 @@
path="powerpc32" ;;
esac
- # gcc 2.7.2 knows -mcpu=403, 601, 603, 604.
- # gcc 2.95 adds 401, 505, 602, 603e, ec603e, 604e, 620, 740, 750,
- # 801, 821, 823, 860.
- # gcc 3.0 adds 630, rs64a.
- # gcc 3.1 adds 405, 7400, 7450.
- # gcc 3.2 adds nothing.
- # gcc 3.3 adds power3, power4, 8540. power3 seems to be a synonym for 630.
- # gcc pre-release 3.4 adds 405fp, 440, 440fp, 970.
- #
- # FIXME: The way 603e falls back to 603 for gcc 2.7.2 should be
- # done for all the others too. But what would be the correct
- # arrangements?
- #
case $host_cpu in
powerpc401) gcc_cflags_cpu="-mcpu=401" ;;
- powerpc403) gcc_cflags_cpu="-mcpu=403" ;;
+ powerpc403) gcc_cflags_cpu="-mcpu=403"
+ xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
powerpc405) gcc_cflags_cpu="-mcpu=405" ;;
powerpc505) gcc_cflags_cpu="-mcpu=505" ;;
- powerpc601) gcc_cflags_cpu="-mcpu=601" ;;
- powerpc602) gcc_cflags_cpu="-mcpu=602" ;;
- powerpc603) gcc_cflags_cpu="-mcpu=603" ;;
- powerpc603e) gcc_cflags_cpu="-mcpu=603e -mcpu=603" ;;
- powerpc604) gcc_cflags_cpu="-mcpu=604" ;;
- powerpc604e) gcc_cflags_cpu="-mcpu=604e -mcpu=604" ;;
+ powerpc601) gcc_cflags_cpu="-mcpu=601"
+ xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
+ powerpc602) gcc_cflags_cpu="-mcpu=602"
+ xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
+ powerpc603) gcc_cflags_cpu="-mcpu=603"
+ xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+ powerpc603e) gcc_cflags_cpu="-mcpu=603e -mcpu=603"
+ xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+ powerpc604) gcc_cflags_cpu="-mcpu=604"
+ xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+ powerpc604e) gcc_cflags_cpu="-mcpu=604e -mcpu=604"
+ xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
powerpc620) gcc_cflags_cpu="-mcpu=620" ;;
powerpc630) gcc_cflags_cpu="-mcpu=630"
+ xlc_cflags_arch="-qarch=pwr3"
cpu_path="p3" ;;
powerpc740) gcc_cflags_cpu="-mcpu=740" ;;
powerpc7400 | powerpc7410)
@@ -925,14 +920,21 @@
powerpc823) gcc_cflags_cpu="-mcpu=823" ;;
powerpc860) gcc_cflags_cpu="-mcpu=860" ;;
powerpc970) gcc_cflags_cpu="-mtune=970"
+ xlc_cflags_arch="-qarch=970 -qarch=pwr3"
vmx_path="powerpc64/vmx"
cpu_path="p4" ;;
power4) gcc_cflags_cpu="-mtune=power4"
+ xlc_cflags_arch="-qarch=pwr4"
cpu_path="p4" ;;
power5) gcc_cflags_cpu="-mtune=power5 -mtune=power4"
+ xlc_cflags_arch="-qarch=pwr5"
cpu_path="p5 p4" ;;
power6) gcc_cflags_cpu="-mtune=power6"
+ xlc_cflags_arch="-qarch=pwr6"
cpu_path="p6" ;;
+ power7) gcc_cflags_cpu="-mtune=power7 -mtune=power5"
+ xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
+ cpu_path="p7 p5 p4" ;;
esac
case $host in
@@ -944,23 +946,6 @@
xlc_32_cflags_maybe="-q32"
ar_32_flags="-X32"
nm_32_flags="-X32"
-
- # xlc (what version?) knows -qarch=ppc, ppcgr, 601, 602, 603, 604,
- # 403, rs64a
- # -qarch=ppc is needed, so ensure everything falls back to that.
- # FIXME: Perhaps newer versions know more flavours.
- #
- case $host_cpu in
- powerpc403) xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
- powerpc601) xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
- powerpc602) xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
- powerpc603) xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
- powerpc603e) xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
- powerpc604) xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
- powerpc604e) xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
- *) xlc_cflags_arch="-qarch=ppc" ;;
- esac
- ;;
esac
case $host in
@@ -974,7 +959,8 @@
gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
gcc_aix64_cflags_optlist="cpu"
gcc_aix64_ldflags="-Wc,-maix64"
- xlc_aix64_cflags="-O2 -q64 -qtune=pwr3 -qmaxmem=20000"
+ xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
+ xlc_aix64_cflags_optlist="arch"
xlc_aix64_ldflags="-Wc,-q64"
# Must indicate object type to ar and nm
ar_aix64_flags="-X64"
@@ -1074,7 +1060,7 @@
;;
- # POWER
+ # POWER 32-bit
[power-*-* | power[12]-*-* | power2sc-*-*])
AC_DEFINE(HAVE_HOST_CPU_FAMILY_power)
HAVE_HOST_CPU_FAMILY_power=1
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/aors_n.asm
--- a/mpn/powerpc64/mode64/aors_n.asm Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/aors_n.asm Mon May 03 02:29:52 2010 +0200
@@ -20,37 +20,11 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.5
-C POWER4/PPC970: 2
-
-C n POWER3/PPC630 POWER4/PPC970
-C 1 17.00 19.00
-C 2 9.00 10.49
-C 3 5.33 7.66
-C 4 4.50 5.14
-C 5 4.20 4.80
-C 6 3.83 4.33
-C 7 3.00 3.99
-C 8 2.87 3.55
-C 9 2.89 3.40
-C 10 2.60 3.42
-C 11 2.45 3.15
-C 12 2.41 2.99
-C 13 2.46 3.01
-C 14 2.42 2.97
-C 15 2.20 2.85
-C 50 1.78 2.44
-C 100 1.83 2.20
-C 200 1.55 2.12
-C 400 1.53 2.05
-C 1000 1.98 2.02#
-C 2000 1.50# 2.04
-C 4000 2.55 2.50
-C 8000 2.70 2.45
-C 16000 2.65 5.94
-C 32000 2.62 16.41
-C 64000 2.73 18.94
+C cycles/limb
+C POWER3/PPC630 1.5
+C POWER4/PPC970 2
+C POWER5 2.25
+C POWER6 2.63
C This code is a little bit slower for POWER3/PPC630 than the simple code used
C previously, but it is much faster for POWER4/PPC970. The reason for the
@@ -162,7 +136,7 @@
addi r4, r4, 32
addi r5, r5, 32
-L(oop): ADDSUBC r28, r7, r6
+L(top): ADDSUBC r28, r7, r6
ld r6, 0(r4) C load s1 limb
ld r7, 0(r5) C load s2 limb
ADDSUBC r29, r9, r8
@@ -181,7 +155,7 @@
std r30, 16(r3)
std r31, 24(r3)
addi r3, r3, 32
- bdnz L(oop) C decrement ctr and loop back
+ bdnz L(top) C decrement ctr and loop back
L(end): ADDSUBC r28, r7, r6
ADDSUBC r29, r9, r8
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/aorslshC_n.asm
--- a/mpn/powerpc64/mode64/aorslshC_n.asm Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/aorslshC_n.asm Mon May 03 02:29:52 2010 +0200
@@ -19,11 +19,11 @@
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.83 (1.5 c/l should be possible)
-C POWER4/PPC970: 3 (2.0 c/l should be possible)
-C POWER5: 3
-C POWER6: 3.5-47
+C cycles/limb
+C POWER3/PPC630 1.83 (1.5 c/l should be possible)
+C POWER4/PPC970 3 (2.0 c/l should be possible)
+C POWER5 3
+C POWER6 3.5-47
C STATUS
C * Try combining upx+up, and vpx+vp.
@@ -130,7 +130,7 @@
addi vp, vp, 16
addi vpx, vpx, 24
- ALIGN(16)
+ ALIGN(32)
L(top): ldx u0, rp, up
ldx v0, rp, vp
rldimi s1, v1, LSH, 0
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/aorsmul_1.asm
--- a/mpn/powerpc64/mode64/aorsmul_1.asm Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/aorsmul_1.asm Mon May 03 02:29:52 2010 +0200
@@ -24,8 +24,8 @@
C cycles/limb cycles/limb
C POWER3/PPC630 6-18 6-18
C POWER4/PPC970 8? 8.3? not updated for last file revision
-C POWER5 8 8.75
-C POWER6 16 16.5
+C POWER5 8 8.63
+C POWER6 16.25 16.75
C TODO
C * Try to reduce the number of needed live registers
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/invert_limb.asm
--- a/mpn/powerpc64/mode64/invert_limb.asm Sun May 02 11:24:00 2010 +0200
+++ b/mpn/powerpc64/mode64/invert_limb.asm Mon May 03 02:29:52 2010 +0200
@@ -19,10 +19,11 @@
include(`../config.m4')
-C cycles/limb (approximate)
-C POWER3/PPC630: 80
-C POWER4/PPC970: 86
-C POWER5: 86
+C cycles/limb (approximate)
+C POWER3/PPC630 80
+C POWER4/PPC970 86
+C POWER5 86
+C POWER6 170
ASM_START()
PROLOGUE(mpn_invert_limb)
diff -r 3ee4dc316ee6 -r acb26f202896 mpn/powerpc64/mode64/mod_1_4.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/mode64/mod_1_4.asm Mon May 03 02:29:52 2010 +0200
@@ -0,0 +1,256 @@
+dnl PowerPC-64 mpn_mod_1s_4p
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
More information about the gmp-commit
mailing list