[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Dec 27 00:31:13 UTC 2017


details:   /var/hg/gmp/rev/2c1de5efc3f9
changeset: 17507:2c1de5efc3f9
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Dec 27 00:19:25 2017 +0100
description:
Recognise POWER9 and more variants of POWER8.
Reorder recog code to favour PVR over proc/cpuinfo.

details:   /var/hg/gmp/rev/2355dc8177e6
changeset: 17508:2355dc8177e6
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Dec 27 00:20:16 2017 +0100
description:
Separate handling of POWER8 and POWER9.

details:   /var/hg/gmp/rev/19d4782aaaca
changeset: 17509:19d4782aaaca
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Dec 27 00:27:52 2017 +0100
description:
Provide POWER9 addmul_1.asm, utilising maddld/maddhdu.

diffstat:

 config.guess                  |   53 +++++++++------
 configure.ac                  |    5 +-
 mpn/powerpc64/p9/addmul_1.asm |  136 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 171 insertions(+), 23 deletions(-)

diffs (241 lines):

diff -r ceb91e187c23 -r 19d4782aaaca config.guess
--- a/config.guess	Thu Dec 14 23:48:09 2017 +0100
+++ b/config.guess	Wed Dec 27 00:27:52 2017 +0100
@@ -393,27 +393,9 @@
   # development", so it doesn't seem wise to use it just yet, not while
   # there's an alternative.
 
-  # Grep the /proc/cpuinfo pseudo-file.
-  # Anything unrecognised is ignored, since of course we mustn't spit out
-  # a cpu type config.sub doesn't know.
-  if test -z "$exact_cpu" && test -f /proc/cpuinfo; then
-    x=`grep "^cpu[ 	]" /proc/cpuinfo | head -n 1`
-    x=`echo $x | sed -n 's/^cpu[ 	]*:[ 	]*\([A-Za-z0-9]*\).*/\1/p'`
-    x=`echo $x | sed 's/PPC//'`
-    case $x in
-      601)     exact_cpu="power" ;;
-      603ev)   exact_cpu="powerpc603e" ;;
-      604ev5)  exact_cpu="powerpc604e" ;;
-      970??)   exact_cpu="powerpc970" ;;
-      603 | 603e | 604 | 604e | 750 | 821 | 860)
-        exact_cpu="powerpc$x" ;;
-      POWER[4-9]*)
-        exact_cpu=`echo $x | sed -e "s;POWER;power;" -e "s;[a-zA-Z]*$;;"` ;;
-    esac
-  fi
-
-  # Try to read the PVR.  mfpvr is a protected instruction, NetBSD, MacOS
-  # and AIX don't allow it in user mode, but the Linux kernel does.
+  # Try to read the PVR.  mfpvr is a protected instruction, NetBSD, MacOS and
+  # AIX don't allow it in user mode, but the Linux kernel does.  We prefer this
+  # to /proc/cpuinfo since the latter lags for newer CPUs.
   #
   # Note this is no good on AIX, since a C function there is the address of
   # a function descriptor, not actual code.  But this doesn't matter since
@@ -442,7 +424,10 @@
   case 0x000c: puts ("powerpc7400"); break;
   case 0x0041: puts ("powerpc630");  break;
   case 0x003f: puts ("power7");      break;
-  case 0x004b: puts ("power8");      break;
+  case 0x004b:
+  case 0x004c:
+  case 0x004d: puts ("power8");      break;
+  case 0x004e: puts ("power9");      break;
   case 0x0050: puts ("powerpc860");  break;
   case 0x8000: puts ("powerpc7450"); break;
   case 0x8001: puts ("powerpc7455"); break;
@@ -463,6 +448,27 @@
     fi
   fi
 
+
+  # Grep the /proc/cpuinfo pseudo-file.
+  # Anything unrecognised is ignored, since of course we mustn't spit out
+  # a cpu type config.sub doesn't know.
+  if test -z "$exact_cpu" && test -f /proc/cpuinfo; then
+    x=`grep "^cpu[ 	]" /proc/cpuinfo | head -n 1`
+    x=`echo $x | sed -n 's/^cpu[ 	]*:[ 	]*\([A-Za-z0-9]*\).*/\1/p'`
+    x=`echo $x | sed 's/PPC//'`
+    case $x in
+      601)     exact_cpu="power" ;;
+      603ev)   exact_cpu="powerpc603e" ;;
+      604ev5)  exact_cpu="powerpc604e" ;;
+      970??)   exact_cpu="powerpc970" ;;
+      603 | 603e | 604 | 604e | 750 | 821 | 860)
+        exact_cpu="powerpc$x" ;;
+      POWER[4-9]*)
+        exact_cpu=`echo $x | sed -e "s;POWER;power;" -e "s;[a-zA-Z]*$;;"` ;;
+    esac
+  fi
+
+
   if test -z "$exact_cpu"; then
     # On AIX, try looking at _system_configuration.  This is present in
     # version 4 at least.
@@ -514,6 +520,9 @@
 #ifdef POWER_8
   case POWER_8:    puts ("power8"); break;
 #endif
+#ifdef POWER_9
+  case POWER_9:    puts ("power9"); break;
+#endif
   default:
     if (_system_configuration.architecture == POWER_RS)
       puts ("power");
diff -r ceb91e187c23 -r 19d4782aaaca configure.ac
--- a/configure.ac	Thu Dec 14 23:48:09 2017 +0100
+++ b/configure.ac	Wed Dec 27 00:27:52 2017 +0100
@@ -1119,9 +1119,12 @@
       power7)	    gcc_cflags_cpu="-mtune=power7 -mtune=power5"
 		    xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
 		    cpu_path="p7 p5 p4 p3-p7" ;;
-      [power[89]])  gcc_cflags_cpu="-mtune=power8 -mtune=power7 -mtune=power5"
+      power8)	    gcc_cflags_cpu="-mtune=power8 -mtune=power7 -mtune=power5"
 		    xlc_cflags_arch="-qarch=pwr8 -qarch=pwr7 -qarch=pwr5"
 		    cpu_path="p8 p7 p5 p4 p3-p7" ;;
+      power9)	    gcc_cflags_cpu="-mtune=power9 -mtune=power8 -mtune=power7 -mtune=power5"
+		    xlc_cflags_arch="-qarch=pwr9 -qarch=pwr8 -qarch=pwr7 -qarch=pwr5"
+		    cpu_path="p9 p8 p7 p5 p4 p3-p7" ;;
     esac
 
     case $host in
diff -r ceb91e187c23 -r 19d4782aaaca mpn/powerpc64/p9/addmul_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/p9/addmul_1.asm	Wed Dec 27 00:27:52 2017 +0100
@@ -0,0 +1,136 @@
+dnl  POWER9 mpn_addmul_1.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630		 -
+C POWER4/PPC970		 -
+C POWER5		 -
+C POWER6		 -
+C POWER7		 -
+C POWER8		 -
+C POWER9		 ?
+
+C TODO
+C  * Schedule for POWER9 pipeline.
+C  * Unroll to at least 4x if that proves beneficial.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`v0', `r6')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	std	r31, -8(r1)
+
+	cmpdi	cr6, n, 2
+
+	addi	r0, n, -1
+	srdi	r0, r0, 1
+	mtctr	r0
+
+	rldicl.	r0, n, 0,63	C r0 = n & 3, set cr0
+	bne	cr0, L(b1)
+
+L(b0):	ld	r10, 0(rp)
+	ld	r12, 0(up)
+	ld	r11, 8(rp)
+	ld	r31, 8(up)
+	maddld	r0, r12, v0, r10
+	maddhdu	r7, r12, v0, r10
+	ble	cr6, L(2)
+	ld	r10, 16(rp)
+	ld	r12, 16(up)
+	maddld	r8, r31, v0, r11
+	maddhdu	r5, r31, v0, r11
+	addic	up, up, 16
+	addi	rp, rp, -8
+	b	L(mid)
+
+L(b1):	ld	r11, 0(rp)
+	ld	r31, 0(up)
+	ble	cr6, L(1)
+	ld	r10, 8(rp)
+	ld	r12, 8(up)
+	maddld	r0, r31, v0, r11
+	maddhdu	r5, r31, v0, r11
+	ld	r11, 16(rp)
+	ld	r31, 16(up)
+	maddld	r9, r12, v0, r10
+	maddhdu	r7, r12, v0, r10
+	addic	up, up, 24
+	bdz	L(end)
+
+	ALIGN(16)
+L(top):	ld	r10, 24(rp)
+	ld	r12, 0(up)
+	std	r0, 0(rp)
+	maddld	r8, r31, v0, r11	C W:0,2,4
+	adde	r0, r5, r9
+	maddhdu	r5, r31, v0, r11	C W:1,3,5
+L(mid):	ld	r11, 32(rp)
+	ld	r31, 8(up)
+	std	r0, 8(rp)
+	maddld	r9, r12, v0, r10	C W:1,3,5
+	adde	r0, r7, r8
+	maddhdu	r7, r12, v0, r10	C W:2,4,6
+	addi	rp, rp, 16
+	addi	up, up, 16
+	bdnz	L(top)
+
+L(end):	std	r0, 0(rp)
+	maddld	r8, r31, v0, r11
+	adde	r0, r5, r9
+	maddhdu	r5, r31, v0, r11
+	std	r0, 8(rp)
+	adde	r0, r7, r8
+	std	r0, 16(rp)
+	addze	r3, r5
+	ld	r31, -8(r1)
+	blr
+
+L(2):	maddld	r8, r31, v0, r11
+	maddhdu	r5, r31, v0, r11
+	std	r0, 0(rp)
+	addc	r0, r7, r8
+	std	r0, 8(rp)
+	addze	r3, r5
+	ld	r31, -8(r1)
+	blr
+
+L(1):	maddld	r0,  r31, v0, r11
+	std	r0, 0(rp)
+	maddhdu	r3, r31, v0, r11
+	ld	r31, -8(r1)
+	blr
+EPILOGUE()


More information about the gmp-commit mailing list