[Gmp-commit] /var/hg/gmp-6.1: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Dec 14 16:27:08 UTC 2015


details:   /var/hg/gmp-6.1/rev/67d4ee9dead1
changeset: 16933:67d4ee9dead1
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Dec 14 17:05:09 2015 +0100
description:
Add FUNC_EXITs.

details:   /var/hg/gmp-6.1/rev/8985e668c308
changeset: 16934:8985e668c308
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Dec 14 17:15:52 2015 +0100
description:
Work around skylake cpuid bug.

details:   /var/hg/gmp-6.1/rev/8fa02c71fdb4
changeset: 16935:8fa02c71fdb4
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Dec 14 17:17:40 2015 +0100
description:
Fix spelling of kabylake.

details:   /var/hg/gmp-6.1/rev/587cb8eab1fd
changeset: 16936:587cb8eab1fd
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Dec 14 17:18:35 2015 +0100
description:
Handle more BMI2 crippled CPUs.

details:   /var/hg/gmp-6.1/rev/85315d4a5197
changeset: 16937:85315d4a5197
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Dec 14 17:21:19 2015 +0100
description:
ChangeLog

details:   /var/hg/gmp-6.1/rev/73fe4c9774e0
changeset: 16938:73fe4c9774e0
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Dec 14 17:23:28 2015 +0100
description:
White spaces in ChangeLog

diffstat:

 ChangeLog                            |  43 +++++++++++++++++++------
 NEWS                                 |   5 +++
 acinclude.m4                         |   2 +-
 config.guess                         |  57 +++++++++++++++++++++++++++++++---
 config.sub                           |   2 +-
 configure.ac                         |   2 +-
 mpn/x86_64/coreibwl/mul_basecase.asm |   4 ++
 mpn/x86_64/fat/fat.c                 |  58 +++++++++++++++++++++++++++++++++--
 8 files changed, 149 insertions(+), 24 deletions(-)

diffs (truncated from 345 to 300 lines):

diff -r 20bf21d5f600 -r 73fe4c9774e0 ChangeLog
--- a/ChangeLog	Mon Dec 14 08:37:18 2015 +0100
+++ b/ChangeLog	Mon Dec 14 17:23:28 2015 +0100
@@ -1,8 +1,29 @@
+2015-12-14  Torbjörn Granlund  <torbjorng at google.com>
+
+	* mpn/x86_64/fat/fat.c (gmp_workaround_skylake_cpuid_bug):
+	New function.
+	(__gmpn_cpuvec_init): Handle more BMI2 crippled CPUs.
+
+2015-12-13  Torbjörn Granlund  <torbjorng at google.com>
+
+	* config.sub: Fix spelling of kabylake.
+	* acinclude.m4: Likewise.
+	* mpn/x86_64/fat/fat.c: Likewise.
+
 2015-12-10 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* tests/misc/t-printf.c: Test a sequence of '%'.
 	* printf/doprnt.c: Avoid buffer overread with long long limbs.
 
+2015-12-03  Torbjörn Granlund  <torbjorng at google.com>
+
+	* config.guess: Work around skylake cpuid bug.
+	Fix spelling of kabylake.
+
+2015-12-01  Torbjörn Granlund  <torbjorng at google.com>
+
+	* mpn/x86_64/coreibwl/mul_basecase.asm: Add FUNC_EXITs.
+
 2015-11-21 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* gmp-impl.h (MPN_TOOM22_MUL_MINSIZE): Consider ToomX2 limits
@@ -684,7 +705,7 @@
 
 	* tune/tune-gcd-p.c: Add casts for C++ compatibility.
 
-	* tune/tuneup.c:  Add casts for C++ compatibility.
+	* tune/tuneup.c: Add casts for C++ compatibility.
 	(mpn_divrem_1_tune, mpn_mod_1_tune): Mark as extern "C".
 	(INSERT_FFTTAB): Produce sentinels differently to silence compiler.
 
@@ -712,12 +733,12 @@
 
 	* mpn/powerpc32/addmul_1.asm: Avoid negative stack pointer references.
 	* mpn/powerpc32/lshift.asm: Likewise.
-	* mpn/powerpc32/lshiftc.asm:  Likewise.
-	* mpn/powerpc32/p3-p7/aors_n.asm:  Likewise.
-	* mpn/powerpc32/rshift.asm:  Likewise.
-	* mpn/powerpc32/sec_tabselect.asm:  Likewise.
-	* mpn/powerpc32/submul_1.asm:  Likewise.
-	* mpn/powerpc32/vmx/mod_34lsub1.asm:  Likewise.
+	* mpn/powerpc32/lshiftc.asm: Likewise.
+	* mpn/powerpc32/p3-p7/aors_n.asm: Likewise.
+	* mpn/powerpc32/rshift.asm: Likewise.
+	* mpn/powerpc32/sec_tabselect.asm: Likewise.
+	* mpn/powerpc32/submul_1.asm: Likewise.
+	* mpn/powerpc32/vmx/mod_34lsub1.asm: Likewise.
 
 2014-10-13  Torbjörn Granlund  <torbjorng at google.com>
 
@@ -2044,8 +2065,8 @@
 
 	* mpn/powerpc64/mode64/divrem_1.asm: Remove explicit nop after CALL.
 	* mpn/powerpc64/mode64/divrem_2.asm: Likewise.
-	* mpn/powerpc64/mode64/mod_1_1.asm:  Likewise.
-	* mpn/powerpc64/mode64/mod_1_4.asm:  Likewise.
+	* mpn/powerpc64/mode64/mod_1_1.asm: Likewise.
+	* mpn/powerpc64/mode64/mod_1_4.asm: Likewise.
 
 2013-07-13  Torbjorn Granlund  <tege at gmplib.org>
 
@@ -2440,7 +2461,7 @@
 	* mpn/arm/dive_1.asm: New file.
 	* mpn/arm/v6/dive_1.asm: New file.
 
-	* mpn/arm/v6t2/mode1o.asm:  Make trivial change to avoid v6t2...
+	* mpn/arm/v6t2/mode1o.asm: Make trivial change to avoid v6t2...
 	* mpn/arm/v6/mode1o.asm: ...instruction, move file accordingly.
 
 	* mpn/powerpc64/mode64/invert_limb.asm: Put all multiplies low-limb first.
@@ -4788,7 +4809,7 @@
 	Reintroduce previously removed RP argument.
 	* mpn/x86_64/redc_1.asm: Likewise.
 
-	* mpn/generic/redc_2.c:  Remove mpn_sub_n call, return carry from
+	* mpn/generic/redc_2.c: Remove mpn_sub_n call, return carry from
 	mpn_add_n call.
 
 	* gmp-impl.h (mpn_redc_1, mpn_redc_2): Now return an mp_limb_t.
diff -r 20bf21d5f600 -r 73fe4c9774e0 NEWS
--- a/NEWS	Mon Dec 14 08:37:18 2015 +0100
+++ b/NEWS	Mon Dec 14 17:23:28 2015 +0100
@@ -4,6 +4,11 @@
 medium, provided this notice is preserved.
 
 
+Changes between GMP version 6.1.1 and 6.1.0
+
+  BUGS FIXED
+  * .
+
 Changes between GMP version 6.0.* and 6.1.0
 
   BUGS FIXED
diff -r 20bf21d5f600 -r 73fe4c9774e0 acinclude.m4
--- a/acinclude.m4	Mon Dec 14 08:37:18 2015 +0100
+++ b/acinclude.m4	Mon Dec 14 17:23:28 2015 +0100
@@ -63,7 +63,7 @@
 [[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
 
 define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | cabylake*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-*]])
 
 dnl  GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
 dnl  ---------------------------------
diff -r 20bf21d5f600 -r 73fe4c9774e0 config.guess
--- a/config.guess	Mon Dec 14 08:37:18 2015 +0100
+++ b/config.guess	Mon Dec 14 17:23:28 2015 +0100
@@ -762,6 +762,48 @@
 extern "C"
 #endif
 unsigned int cpuid (int, char *, char *, int);
+
+int
+gmp_workaround_skylake_cpuid_bug ()
+{
+  char feature_string[49];
+  char processor_name_string[49];
+  static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ };
+  int i;
+
+  /* Example strings:                                   */
+  /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz"          */
+  /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz"         */
+  /*                  ^               ^               ^ */
+  /*     0x80000002       0x80000003      0x80000004    */
+  /* We match out just the 0x80000003 part here. */
+
+  /* In their infinitive wisdom, Intel decided to use one register order for
+     the vendor string, and another for the processor name string.  We shuffle
+     things about here, rather than write a new variant of our assembly cpuid.
+  */
+
+  unsigned int eax, ebx, ecx, edx;
+  eax = CPUID (feature_string, 0x80000003);
+  ebx = ((unsigned int *)feature_string)[0];
+  edx = ((unsigned int *)feature_string)[1];
+  ecx = ((unsigned int *)feature_string)[2];
+
+  ((unsigned int *) (processor_name_string))[0] = eax;
+  ((unsigned int *) (processor_name_string))[1] = ebx;
+  ((unsigned int *) (processor_name_string))[2] = ecx;
+  ((unsigned int *) (processor_name_string))[3] = edx;
+
+  processor_name_string[16] = 0;
+
+  for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++)
+    {
+      if (strstr (processor_name_string, bad_cpus[i]) != 0)
+	return 1;
+    }
+  return 0;
+}
+
 int
 main ()
 {
@@ -839,16 +881,19 @@
           else if (model == 0x5c) cpu_64bit = 1,            modelstr = "goldmont";   /* Goldmont */
           else if (model == 0x5e) cpu_64bit = 1, cpu_avx=1, modelstr = "skylake";    /* Skylake */
           else if (model == 0x5f) cpu_64bit = 1,            modelstr = "goldmont";   /* Goldmont */
-          else if (model == 0x8e) cpu_64bit = 1, cpu_avx=1, modelstr = "cabylake";   /* Capylake Y/U */
-          else if (model == 0x9e) cpu_64bit = 1, cpu_avx=1, modelstr = "cabylake";   /* Capylake desktop */
+          else if (model == 0x8e) cpu_64bit = 1, cpu_avx=1, modelstr = "kabylake";   /* Kabylake Y/U */
+          else if (model == 0x9e) cpu_64bit = 1, cpu_avx=1, modelstr = "kabylake";   /* Kabylake desktop */
           else                    cpu_64bit = 1,            modelstr = "nehalem";    /* default */
 
-	  if (strcmp (modelstr, "haswell") == 0)
+	  if (strcmp (modelstr, "haswell") == 0 ||
+	      strcmp (modelstr, "broadwell") == 0 ||
+	      strcmp (modelstr, "skylake") == 0)
 	    {
-	      /* Some Haswells lack BMI2.  Let them appear as Sandybridges for
-		 now.  */
+	      /* Some haswell, broadwell, skylake lack BMI2.  Let them appear
+		 as sandybridge for now.  */
 	      CPUID (feature_string, 7);
-	      if ((feature_string[0 + 8 / 8] & (1 << (8 % 8))) == 0)
+	      if ((feature_string[0 + 8 / 8] & (1 << (8 % 8))) == 0
+		  || gmp_workaround_skylake_cpuid_bug ())
 		modelstr = "sandybridge";
 	    }
 
diff -r 20bf21d5f600 -r 73fe4c9774e0 config.sub
--- a/config.sub	Mon Dec 14 08:37:18 2015 +0100
+++ b/config.sub	Mon Dec 14 17:23:28 2015 +0100
@@ -102,7 +102,7 @@
   test_cpu=ia64 ;;
 pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
   test_cpu=i386 ;;
-athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | cabylake* | knightslanding)
+athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding)
   test_cpu=x86_64 ;;
 power[2-9] | power2sc)
   test_cpu=power ;;
diff -r 20bf21d5f600 -r 73fe4c9774e0 configure.ac
--- a/configure.ac	Mon Dec 14 08:37:18 2015 +0100
+++ b/configure.ac	Mon Dec 14 17:23:28 2015 +0100
@@ -1800,7 +1800,7 @@
 	path_64="x86_64/coreibwl x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	# extra_functions_64="missing"	 # enable for bmi2/adx simulation
 	;;
-      skylake | skylakenoavx | cabylake | cabylakenoavx)
+      skylake | skylakenoavx | kabylake | kabylakenoavx)
 	gcc_cflags_cpu="-mtune=skylake -mtune=broadwell -mtune=corei7 -mtune=core2 -mtune=k8"
 	# Don't pass -march=skylake for now as then some compilers emit AVX512.
 	gcc_cflags_arch="-march=broadwell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
diff -r 20bf21d5f600 -r 73fe4c9774e0 mpn/x86_64/coreibwl/mul_basecase.asm
--- a/mpn/x86_64/coreibwl/mul_basecase.asm	Mon Dec 14 08:37:18 2015 +0100
+++ b/mpn/x86_64/coreibwl/mul_basecase.asm	Mon Dec 14 17:23:28 2015 +0100
@@ -94,6 +94,7 @@
 
 L(s11):	mov	%rax, (rp)
 	mov	%r9, 8(rp)
+	FUNC_EXIT()
 	ret
 
 L(s2x):	cmp	$2, vn
@@ -105,6 +106,7 @@
 	mov	%rax, (rp)
 	mov	%r9, 8(rp)
 	mov	%r10, 16(rp)
+	FUNC_EXIT()
 	ret
 
 L(s22):	add	%r8, %r9		C 1
@@ -121,6 +123,7 @@
 	mov	%r9, 8(rp)
 	mov	%r10, 16(rp)
 	mov	%rdx, 24(rp)
+	FUNC_EXIT()
 	ret
 
 	ALIGN(16)
@@ -294,6 +297,7 @@
 	pop	%r12
 	pop	%rbp
 	pop	%rbx
+	FUNC_EXIT()
 	ret
 
 L(f2):
diff -r 20bf21d5f600 -r 73fe4c9774e0 mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c	Mon Dec 14 08:37:18 2015 +0100
+++ b/mpn/x86_64/fat/fat.c	Mon Dec 14 17:23:28 2015 +0100
@@ -206,6 +206,49 @@
    asm routines only operate correctly up to their own defined threshold,
    not an arbitrary value.  */
 
+static int
+gmp_workaround_skylake_cpuid_bug ()
+{
+  char feature_string[49];
+  char processor_name_string[49];
+  static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ };
+  int i;
+
+  /* Example strings:                                   */
+  /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz"          */
+  /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz"         */
+  /*                  ^               ^               ^ */
+  /*     0x80000002       0x80000003      0x80000004    */
+  /* We match out just the 0x80000003 part here. */
+
+  /* In their infinitive wisdom, Intel decided to use one register order for
+     the vendor string, and another for the processor name string.  We shuffle
+     things about here, rather than write a new variant of our assembly cpuid.
+  */
+
+  unsigned int eax, ebx, ecx, edx;
+  eax = __gmpn_cpuid (feature_string, 0x80000003);
+  ebx = ((unsigned int *)feature_string)[0];
+  edx = ((unsigned int *)feature_string)[1];
+  ecx = ((unsigned int *)feature_string)[2];
+
+  ((unsigned int *) (processor_name_string))[0] = eax;
+  ((unsigned int *) (processor_name_string))[1] = ebx;
+  ((unsigned int *) (processor_name_string))[2] = ecx;
+  ((unsigned int *) (processor_name_string))[3] = edx;
+
+  processor_name_string[16] = 0;
+
+  for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++)
+    {
+      if (strstr (processor_name_string, bad_cpus[i]) != 0)
+	return 1;
+    }
+  return 0;


More information about the gmp-commit mailing list