[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Apr 15 00:08:28 UTC 2017


details:   /var/hg/gmp/rev/50970e9d2cf3
changeset: 17342:50970e9d2cf3
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Apr 15 00:26:38 2017 +0200
description:
Move x86_64/mulx code to new x86_64/zen subdir.

details:   /var/hg/gmp/rev/2ce5c60f5372
changeset: 17343:2ce5c60f5372
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Apr 15 02:08:10 2017 +0200
description:
Recognise AMD zen.

diffstat:

 acinclude.m4                  |    4 +-
 config.guess                  |    3 +
 config.sub                    |    2 +-
 configure.ac                  |   14 ++-
 mpn/x86_64/fat/fat.c          |    7 +-
 mpn/x86_64/mulx/aorsmul_1.asm |  161 ----------------------------------------
 mpn/x86_64/mulx/mul_1.asm     |  154 --------------------------------------
 mpn/x86_64/zen/aorsmul_1.asm  |  168 ++++++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/zen/mul_1.asm      |  161 ++++++++++++++++++++++++++++++++++++++++
 9 files changed, 351 insertions(+), 323 deletions(-)

diffs (truncated from 768 to 300 lines):

diff -r c32d616089b4 -r 2ce5c60f5372 acinclude.m4
--- a/acinclude.m4	Sun Apr 02 06:14:40 2017 +0200
+++ b/acinclude.m4	Sat Apr 15 02:08:10 2017 +0200
@@ -63,7 +63,7 @@
 [[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
 
 define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-*]])
 
 dnl  GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
 dnl  ---------------------------------
@@ -741,7 +741,7 @@
   long i;
   for (i = 0; i < 88 + 1; i++)
     a[i] = ~0L;
-  r = malloc (10000 * sizeof (unsigned long));
+  r = calloc (10000, sizeof (unsigned long));
   r2 = r;
   for (i = 0; i < 528; i += 23)
     {
diff -r c32d616089b4 -r 2ce5c60f5372 config.guess
--- a/config.guess	Sun Apr 02 06:14:40 2017 +0200
+++ b/config.guess	Sat Apr 15 02:08:10 2017 +0200
@@ -951,6 +951,9 @@
 	case 22:		/* Jaguar, an improved bobcat */
 	  cpu_64bit = 1, cpu_avx = 1, modelstr = "jaguar";
 	  break;
+	case 23:		/* Zen */
+	  cpu_64bit = 1, cpu_avx = 1, modelstr = "zen";
+	  break;
 	}
     }
   else if (strcmp (vendor_string, "CyrixInstead") == 0)
diff -r c32d616089b4 -r 2ce5c60f5372 config.sub
--- a/config.sub	Sun Apr 02 06:14:40 2017 +0200
+++ b/config.sub	Sat Apr 15 02:08:10 2017 +0200
@@ -102,7 +102,7 @@
   test_cpu=ia64 ;;
 pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
   test_cpu=i386 ;;
-athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding)
+athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding)
   test_cpu=x86_64 ;;
 power[2-9] | power2sc)
   test_cpu=power ;;
diff -r c32d616089b4 -r 2ce5c60f5372 configure.ac
--- a/configure.ac	Sun Apr 02 06:14:40 2017 +0200
+++ b/configure.ac	Sat Apr 15 02:08:10 2017 +0200
@@ -1828,6 +1828,12 @@
 	path="x86/bd4 x86/bd3 x86/bd2 x86/bd1 x86/k7/mmx x86/k7 x86/mmx x86"
 	path_64="x86_64/bd4 x86_64/bd3 x86_64/bd2 x86_64/bd1 x86_64/k10 x86_64/k8 x86_64"
 	;;
+      zen)
+	gcc_cflags_cpu="-mtune=znver1 -mtune=amdfam10 -mtune=k8"
+	gcc_cflags_arch="-march=znver1 -march=amdfam10 -march=k8"
+	path="x86/k7/mmx x86/k7 x86/mmx x86"
+	path_64="x86_64/zen x86_64/k10 x86_64/k8 x86_64"
+	;;
       core2)
 	gcc_cflags_cpu="-mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
@@ -2225,10 +2231,10 @@
 	extra_functions_64="$extra_functions_64 fat fat_entry"
 	path_64="x86_64/fat x86_64"
 	fat_path="x86_64 x86_64/fat
-		  x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat x86_64/pentium4
-		  x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/coreihwl
-		  x86_64/coreibwl x86_64/skylake x86_64/atom x86_64/silvermont
-		  x86_64/nano"
+		  x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat x86_64/zen
+		  x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
+		  x86_64/coreihwl x86_64/coreibwl x86_64/skylake x86_64/atom
+		  x86_64/silvermont x86_64/nano"
 	fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
       fi
 
diff -r c32d616089b4 -r 2ce5c60f5372 mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c	Sun Apr 02 06:14:40 2017 +0200
+++ b/mpn/x86_64/fat/fat.c	Sat Apr 15 02:08:10 2017 +0200
@@ -91,6 +91,7 @@
   { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
   { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
   { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
+  { "zen",        "AuthenticAMD", MAKE_FMS (23, 1) },
 
   { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
 };
@@ -400,7 +401,6 @@
 	case 0x0f:		/* k8 */
 	case 0x11:		/* "fam 11h", mix of k8 and k10 */
 	case 0x13:
-	case 0x17:
 	  CPUVEC_SETUP_k8;
 	  break;
 
@@ -421,6 +421,11 @@
 	  CPUVEC_SETUP_k8;
 	  CPUVEC_SETUP_k10;
 	  CPUVEC_SETUP_bd1;
+
+	case 0x17:	    /* zen */
+	  CPUVEC_SETUP_k8;
+	  CPUVEC_SETUP_k10;
+	  CPUVEC_SETUP_zen;
 	}
     }
   else if (strcmp (vendor_string, "CentaurHauls") == 0)
diff -r c32d616089b4 -r 2ce5c60f5372 mpn/x86_64/mulx/aorsmul_1.asm
--- a/mpn/x86_64/mulx/aorsmul_1.asm	Sun Apr 02 06:14:40 2017 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-dnl  AMD64 mpn_addmul_1 and mpn_submul_1 for CPUs with mulx.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 -
-C AMD K10	 -
-C AMD bd1	 -
-C AMD bd2	 ?
-C AMD bobcat	 -
-C AMD jaguar	 ?
-C Intel P4	 -
-C Intel PNR	 -
-C Intel NHM	 -
-C Intel SBR	 -
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 -
-C VIA nano	 -
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0_param',`%rcx')   C r9
-
-define(`n',       `%rcx')
-define(`v0',      `%rdx')
-
-ifdef(`OPERATION_addmul_1',`
-      define(`ADDSUB',        `add')
-      define(`ADCSBB',        `adc')
-      define(`func',  `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-      define(`ADDSUB',        `sub')
-      define(`ADCSBB',        `sbb')
-      define(`func',  `mpn_submul_1')
-')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-IFDOS(`	define(`up', ``%rsi'')	') dnl
-IFDOS(`	define(`rp', ``%rcx'')	') dnl
-IFDOS(`	define(`vl', ``%r9'')	') dnl
-IFDOS(`	define(`r9', ``rdi'')	') dnl
-IFDOS(`	define(`n',  ``%r8'')	') dnl
-IFDOS(`	define(`r8', ``r11'')	') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	mov	(up), %r8
-
-	push	%rbx
-	push	%r12
-	push	%r13
-
-	lea	(up,n_param,8), up
-	lea	-32(rp,n_param,8), rp
-	mov	R32(n_param), R32(%rax)
-	xchg	v0_param, v0		C FIXME: is this insn fast?
-
-	neg	n
-
-	and	$3, R8(%rax)
-	jz	L(b0)
-	cmp	$2, R8(%rax)
-	jz	L(b2)
-	jg	L(b3)
-
-L(b1):	mulx	%r8, %rbx, %rax
-	sub	$-1, n
-	jz	L(wd1)
-	mulx	(up,n,8), %r9, %r8
-	mulx	8(up,n,8), %r11, %r10
-	test	R32(%rax), R32(%rax)		C clear cy
-	jmp	L(lo1)
-
-L(b0):	mulx	%r8, %r9, %r8
-	mulx	8(up,n,8), %r11, %r10
-	mulx	16(up,n,8), %r13, %r12
-	xor	R32(%rax), R32(%rax)
-	jmp	L(lo0)
-
-L(b3):	mulx	%r8, %r11, %r10
-	mulx	8(up,n,8), %r13, %r12
-	mulx	16(up,n,8), %rbx, %rax
-	add	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	sub	$-3, n
-	jz	L(wd3)
-	test	R32(%rax), R32(%rax)		C clear cy
-	jmp	L(lo3)
-
-L(b2):	mulx	%r8, %r13, %r12
-	mulx	8(up,n,8), %rbx, %rax
-	add	%r12, %rbx
-	adc	$0, %rax
-	sub	$-2, n
-	jz	L(wd2)
-	mulx	(up,n,8), %r9, %r8
-	test	R32(%rax), R32(%rax)		C clear cy
-	jmp	L(lo2)
-
-L(top):	ADDSUB	%r9, (rp,n,8)
-L(lo3):	mulx	(up,n,8), %r9, %r8
-	ADCSBB	%r11, 8(rp,n,8)
-L(lo2):	mulx	8(up,n,8), %r11, %r10
-	ADCSBB	%r13, 16(rp,n,8)
-L(lo1):	mulx	16(up,n,8), %r13, %r12
-	ADCSBB	%rbx, 24(rp,n,8)
-	adc	%rax, %r9
-L(lo0):	mulx	24(up,n,8), %rbx, %rax
-	adc	%r8, %r11
-	adc	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax		C rax = carry limb
-	add	$4, n
-	js	L(top)
-
-L(end):	ADDSUB	%r9, (rp)
-L(wd3):	ADCSBB	%r11, 8(rp)
-L(wd2):	ADCSBB	%r13, 16(rp)
-L(wd1):	ADCSBB	%rbx, 24(rp)
-	adc	n, %rax
-	pop	%r13
-	pop	%r12
-	pop	%rbx
-	ret
-EPILOGUE()
-ASM_END()
diff -r c32d616089b4 -r 2ce5c60f5372 mpn/x86_64/mulx/mul_1.asm
--- a/mpn/x86_64/mulx/mul_1.asm	Sun Apr 02 06:14:40 2017 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-dnl  AMD64 mpn_mul_1 for CPUs with mulx.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY


More information about the gmp-commit mailing list