[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 27 00:31:13 UTC 2017
details: /var/hg/gmp/rev/2c1de5efc3f9
changeset: 17507:2c1de5efc3f9
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Dec 27 00:19:25 2017 +0100
description:
Recognise POWER9 and more variants of POWER8.
Reorder recog code to favour PVR over proc/cpuinfo.
details: /var/hg/gmp/rev/2355dc8177e6
changeset: 17508:2355dc8177e6
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Dec 27 00:20:16 2017 +0100
description:
Separate handling of POWER8 and POWER9.
details: /var/hg/gmp/rev/19d4782aaaca
changeset: 17509:19d4782aaaca
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Dec 27 00:27:52 2017 +0100
description:
Provide POWER9 addmul_1.asm, utilising maddld/maddhdu.
diffstat:
config.guess | 53 +++++++++------
configure.ac | 5 +-
mpn/powerpc64/p9/addmul_1.asm | 136 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 171 insertions(+), 23 deletions(-)
diffs (241 lines):
diff -r ceb91e187c23 -r 19d4782aaaca config.guess
--- a/config.guess Thu Dec 14 23:48:09 2017 +0100
+++ b/config.guess Wed Dec 27 00:27:52 2017 +0100
@@ -393,27 +393,9 @@
# development", so it doesn't seem wise to use it just yet, not while
# there's an alternative.
- # Grep the /proc/cpuinfo pseudo-file.
- # Anything unrecognised is ignored, since of course we mustn't spit out
- # a cpu type config.sub doesn't know.
- if test -z "$exact_cpu" && test -f /proc/cpuinfo; then
- x=`grep "^cpu[ ]" /proc/cpuinfo | head -n 1`
- x=`echo $x | sed -n 's/^cpu[ ]*:[ ]*\([A-Za-z0-9]*\).*/\1/p'`
- x=`echo $x | sed 's/PPC//'`
- case $x in
- 601) exact_cpu="power" ;;
- 603ev) exact_cpu="powerpc603e" ;;
- 604ev5) exact_cpu="powerpc604e" ;;
- 970??) exact_cpu="powerpc970" ;;
- 603 | 603e | 604 | 604e | 750 | 821 | 860)
- exact_cpu="powerpc$x" ;;
- POWER[4-9]*)
- exact_cpu=`echo $x | sed -e "s;POWER;power;" -e "s;[a-zA-Z]*$;;"` ;;
- esac
- fi
-
- # Try to read the PVR. mfpvr is a protected instruction, NetBSD, MacOS
- # and AIX don't allow it in user mode, but the Linux kernel does.
+ # Try to read the PVR. mfpvr is a protected instruction, NetBSD, MacOS and
+ # AIX don't allow it in user mode, but the Linux kernel does. We prefer this
+ # to /proc/cpuinfo since the latter lags for newer CPUs.
#
# Note this is no good on AIX, since a C function there is the address of
# a function descriptor, not actual code. But this doesn't matter since
@@ -442,7 +424,10 @@
case 0x000c: puts ("powerpc7400"); break;
case 0x0041: puts ("powerpc630"); break;
case 0x003f: puts ("power7"); break;
- case 0x004b: puts ("power8"); break;
+ case 0x004b:
+ case 0x004c:
+ case 0x004d: puts ("power8"); break;
+ case 0x004e: puts ("power9"); break;
case 0x0050: puts ("powerpc860"); break;
case 0x8000: puts ("powerpc7450"); break;
case 0x8001: puts ("powerpc7455"); break;
@@ -463,6 +448,27 @@
fi
fi
+
+ # Grep the /proc/cpuinfo pseudo-file.
+ # Anything unrecognised is ignored, since of course we mustn't spit out
+ # a cpu type config.sub doesn't know.
+ if test -z "$exact_cpu" && test -f /proc/cpuinfo; then
+ x=`grep "^cpu[ ]" /proc/cpuinfo | head -n 1`
+ x=`echo $x | sed -n 's/^cpu[ ]*:[ ]*\([A-Za-z0-9]*\).*/\1/p'`
+ x=`echo $x | sed 's/PPC//'`
+ case $x in
+ 601) exact_cpu="power" ;;
+ 603ev) exact_cpu="powerpc603e" ;;
+ 604ev5) exact_cpu="powerpc604e" ;;
+ 970??) exact_cpu="powerpc970" ;;
+ 603 | 603e | 604 | 604e | 750 | 821 | 860)
+ exact_cpu="powerpc$x" ;;
+ POWER[4-9]*)
+ exact_cpu=`echo $x | sed -e "s;POWER;power;" -e "s;[a-zA-Z]*$;;"` ;;
+ esac
+ fi
+
+
if test -z "$exact_cpu"; then
# On AIX, try looking at _system_configuration. This is present in
# version 4 at least.
@@ -514,6 +520,9 @@
#ifdef POWER_8
case POWER_8: puts ("power8"); break;
#endif
+#ifdef POWER_9
+ case POWER_9: puts ("power9"); break;
+#endif
default:
if (_system_configuration.architecture == POWER_RS)
puts ("power");
diff -r ceb91e187c23 -r 19d4782aaaca configure.ac
--- a/configure.ac Thu Dec 14 23:48:09 2017 +0100
+++ b/configure.ac Wed Dec 27 00:27:52 2017 +0100
@@ -1119,9 +1119,12 @@
power7) gcc_cflags_cpu="-mtune=power7 -mtune=power5"
xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
cpu_path="p7 p5 p4 p3-p7" ;;
- [power[89]]) gcc_cflags_cpu="-mtune=power8 -mtune=power7 -mtune=power5"
+ power8) gcc_cflags_cpu="-mtune=power8 -mtune=power7 -mtune=power5"
xlc_cflags_arch="-qarch=pwr8 -qarch=pwr7 -qarch=pwr5"
cpu_path="p8 p7 p5 p4 p3-p7" ;;
+ power9) gcc_cflags_cpu="-mtune=power9 -mtune=power8 -mtune=power7 -mtune=power5"
+ xlc_cflags_arch="-qarch=pwr9 -qarch=pwr8 -qarch=pwr7 -qarch=pwr5"
+ cpu_path="p9 p8 p7 p5 p4 p3-p7" ;;
esac
case $host in
diff -r ceb91e187c23 -r 19d4782aaaca mpn/powerpc64/p9/addmul_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/p9/addmul_1.asm Wed Dec 27 00:27:52 2017 +0100
@@ -0,0 +1,136 @@
+dnl POWER9 mpn_addmul_1.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 -
+C POWER4/PPC970 -
+C POWER5 -
+C POWER6 -
+C POWER7 -
+C POWER8 -
+C POWER9 ?
+
+C TODO
+C * Schedule for POWER9 pipeline.
+C * Unroll to at least 4x if that proves beneficial.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`v0', `r6')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ std r31, -8(r1)
+
+ cmpdi cr6, n, 2
+
+ addi r0, n, -1
+ srdi r0, r0, 1
+ mtctr r0
+
+ rldicl. r0, n, 0,63 C r0 = n & 3, set cr0
+ bne cr0, L(b1)
+
+L(b0): ld r10, 0(rp)
+ ld r12, 0(up)
+ ld r11, 8(rp)
+ ld r31, 8(up)
+ maddld r0, r12, v0, r10
+ maddhdu r7, r12, v0, r10
+ ble cr6, L(2)
+ ld r10, 16(rp)
+ ld r12, 16(up)
+ maddld r8, r31, v0, r11
+ maddhdu r5, r31, v0, r11
+ addic up, up, 16
+ addi rp, rp, -8
+ b L(mid)
+
+L(b1): ld r11, 0(rp)
+ ld r31, 0(up)
+ ble cr6, L(1)
+ ld r10, 8(rp)
+ ld r12, 8(up)
+ maddld r0, r31, v0, r11
+ maddhdu r5, r31, v0, r11
+ ld r11, 16(rp)
+ ld r31, 16(up)
+ maddld r9, r12, v0, r10
+ maddhdu r7, r12, v0, r10
+ addic up, up, 24
+ bdz L(end)
+
+ ALIGN(16)
+L(top): ld r10, 24(rp)
+ ld r12, 0(up)
+ std r0, 0(rp)
+ maddld r8, r31, v0, r11 C W:0,2,4
+ adde r0, r5, r9
+ maddhdu r5, r31, v0, r11 C W:1,3,5
+L(mid): ld r11, 32(rp)
+ ld r31, 8(up)
+ std r0, 8(rp)
+ maddld r9, r12, v0, r10 C W:1,3,5
+ adde r0, r7, r8
+ maddhdu r7, r12, v0, r10 C W:2,4,6
+ addi rp, rp, 16
+ addi up, up, 16
+ bdnz L(top)
+
+L(end): std r0, 0(rp)
+ maddld r8, r31, v0, r11
+ adde r0, r5, r9
+ maddhdu r5, r31, v0, r11
+ std r0, 8(rp)
+ adde r0, r7, r8
+ std r0, 16(rp)
+ addze r3, r5
+ ld r31, -8(r1)
+ blr
+
+L(2): maddld r8, r31, v0, r11
+ maddhdu r5, r31, v0, r11
+ std r0, 0(rp)
+ addc r0, r7, r8
+ std r0, 8(rp)
+ addze r3, r5
+ ld r31, -8(r1)
+ blr
+
+L(1): maddld r0, r31, v0, r11
+ std r0, 0(rp)
+ maddhdu r3, r31, v0, r11
+ ld r31, -8(r1)
+ blr
+EPILOGUE()
More information about the gmp-commit
mailing list