[Gmp-commit] /var/hg/gmp: Add Skylake support.

mercurial at gmplib.org mercurial at gmplib.org
Mon Aug 17 19:47:25 UTC 2015


details:   /var/hg/gmp/rev/7850a6d24896
changeset: 16768:7850a6d24896
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Aug 17 21:47:23 2015 +0200
description:
Add Skylake support.

diffstat:

 ChangeLog                       |   14 ++-
 acinclude.m4                    |    2 +-
 config.guess                    |    1 +
 config.sub                      |    2 +-
 configure.ac                    |    7 +
 mpn/x86_64/skylake/gmp-mparam.h |  194 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 217 insertions(+), 3 deletions(-)

diffs (278 lines):

diff -r 451c0f77758f -r 7850a6d24896 ChangeLog
--- a/ChangeLog	Sat Aug 15 21:08:15 2015 +0200
+++ b/ChangeLog	Mon Aug 17 21:47:23 2015 +0200
@@ -1,3 +1,15 @@
+2015-08-17  Torbjörn Granlund  <torbjorng at google.com>
+
+	* acinclude.m4 (X86_64_PATTERN): Add skylake.
+	* config.guess: Corresponding changes.
+	* config.sub: Corresponding changes.
+	* configure.ac: Corresponding changes.
+	* mpn/x86_64/skylake/gmp-mparam.h: New file.
+	
+2015-08-15  Torbjörn Granlund  <torbjorng at google.com>
+
+	* mpn/generic/mullo_basecase.c: Provide alternative code, make default.
+
 2015-08-04 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* tests/refmpn.c (refmpn_sqrlo): New function.
@@ -12,7 +24,7 @@
 	* tests/mpn/Makefile.am (check_PROGRAMS): Add new test.
 	* tests/devel/try.c: Support mpn_sqrlo and mpn_sqrlo_basecase.
 
-	* tune/common.c (speed_mpn_sqrlo{,_basecase}): New functions. 
+	* tune/common.c (speed_mpn_sqrlo{,_basecase}): New functions.
 	* tune/speed.c: Support new functions.
 	* tune/speed.h (SPEED_ROUTINE_MPN_MULLO_BASECASE): Update.
 	(SPEED_ROUTINE_MPN_SQRLO): New macro.
diff -r 451c0f77758f -r 7850a6d24896 acinclude.m4
--- a/acinclude.m4	Sat Aug 15 21:08:15 2015 +0200
+++ b/acinclude.m4	Mon Aug 17 21:47:23 2015 +0200
@@ -63,7 +63,7 @@
 [[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
 
 define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-*]])
 
 dnl  GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
 dnl  ---------------------------------
diff -r 451c0f77758f -r 7850a6d24896 config.guess
--- a/config.guess	Sat Aug 15 21:08:15 2015 +0200
+++ b/config.guess	Mon Aug 17 21:47:23 2015 +0200
@@ -832,6 +832,7 @@
           else if (model == 0x4d) cpu_64bit = 1,            modelstr = "silvermont"; /* Silvermont/Avoton */
           else if (model == 0x4f) cpu_64bit = 1, cpu_avx=1, modelstr = "broadwell";  /* Broadwell server */
           else if (model == 0x56) cpu_64bit = 1, cpu_avx=1, modelstr = "broadwell";  /* Broadwell microserver */
+          else if (model == 0x5e) cpu_64bit = 1, cpu_avx=1, modelstr = "skylake";    /* Skylake */
           else                    cpu_64bit = 1,            modelstr = "nehalem";    /* default */
 
 	  if (strcmp (modelstr, "haswell") == 0)
diff -r 451c0f77758f -r 7850a6d24896 config.sub
--- a/config.sub	Sat Aug 15 21:08:15 2015 +0200
+++ b/config.sub	Mon Aug 17 21:47:23 2015 +0200
@@ -102,7 +102,7 @@
   test_cpu=ia64 ;;
 pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
   test_cpu=i386 ;;
-athlon64 | atom | silvermont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell*)
+athlon64 | atom | silvermont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell* | skylake*)
   test_cpu=x86_64 ;;
 power[2-9] | power2sc)
   test_cpu=power ;;
diff -r 451c0f77758f -r 7850a6d24896 configure.ac
--- a/configure.ac	Sat Aug 15 21:08:15 2015 +0200
+++ b/configure.ac	Mon Aug 17 21:47:23 2015 +0200
@@ -1800,6 +1800,13 @@
 	path_64="x86_64/coreibwl x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	# extra_functions_64="missing"	 # enable for bmi2/adx simulation
 	;;
+      skylake | skylakenoavx)
+	gcc_cflags_cpu="-mtune=skylake -mtune=broadwell -mtune=corei7 -mtune=core2 -mtune=k8"
+	gcc_cflags_arch="-march=skylake -march=broadwell -march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
+	path_64="x86_64/skylake x86_64/coreibwl x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+	# extra_functions_64="missing"	 # enable for bmi2/adx simulation
+	;;
       atom)
 	gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
 	gcc_cflags_arch="-march=atom -march=pentium3"
diff -r 451c0f77758f -r 7850a6d24896 mpn/x86_64/skylake/gmp-mparam.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/skylake/gmp-mparam.h	Mon Aug 17 21:47:23 2015 +0200
@@ -0,0 +1,194 @@
+/* Skylake gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* Disable use of slow functions.  FIXME: We should disable lib inclusion.  */
+#undef HAVE_NATIVE_mpn_mul_2
+#undef HAVE_NATIVE_mpn_addmul_2
+
+/* 3500 MHz i5-6600K Skylake */
+/* Generated by tuneup.c, 2015-04-23, gcc 4.5 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        13
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1_NORM_THRESHOLD              1
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           22
+
+#define MUL_TOOM22_THRESHOLD                26
+#define MUL_TOOM33_THRESHOLD                73
+#define MUL_TOOM44_THRESHOLD               208
+#define MUL_TOOM6H_THRESHOLD               418
+#define MUL_TOOM8H_THRESHOLD                 0  /* always */
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     152
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     137
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     163
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     203
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 33
+#define SQR_TOOM3_THRESHOLD                105
+#define SQR_TOOM4_THRESHOLD                450
+#define SQR_TOOM6_THRESHOLD                552
+#define SQR_TOOM8_THRESHOLD                597
+
+#define MULMID_TOOM42_THRESHOLD             48
+
+#define MULMOD_BNM1_THRESHOLD               15
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             400  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    400, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     16, 6}, \
+    {     33, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
+    {     15, 9}, {     43,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     83,10}, {     47, 9}, {     99,10}, {     55,11}, \
+    {     31,10}, {     63, 9}, {    127,10}, {     79,11}, \
+    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
+    {    135,11}, {     79,10}, {    167,11}, {     95,10}, \
+    {    191, 9}, {    383,11}, {    111,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,11}, {    143,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
+    {    383,11}, {    207,10}, {    415,13}, {     63, 9}, \
+    {   1023,11}, {    271,10}, {    543, 9}, {   1087,10}, \
+    {    575,11}, {    303,10}, {    607,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,11}, {    447,13}, \
+    {    127,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,12}, {    287,11}, {    607,12}, {    319,11}, \
+    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
+    {    479,14}, {    127,12}, {    543,11}, {   1087,12}, \
+    {    607,13}, {    319,12}, {    735,13}, {    383,12}, \
+    {    831,13}, {    447,12}, {    895,11}, {   1791,12}, \
+    {    959,13}, {    511,12}, {   1023,11}, {   2047,12}, \
+    {   1087,13}, {    575,12}, {   1215,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 138
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             376  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    376, 5}, {     23, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
+    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    271,10}, {    575, 6}, \
+    {   9727, 7}, {   4991, 5}, {  19967, 7}, {   5375, 9}, \
+    {   1407,10}, {    735,11}, {    383,10}, {    767, 9}, \
+    {   1535,10}, {    799,11}, {    415,10}, {    831,11}, \
+    {    447,10}, {    895,11}, {    479,13}, {    127,11}, \
+    {    511,10}, {   1023,11}, {    575,10}, {   1151,11}, \
+    {    607,12}, {    319,11}, {    671,12}, {    351,11}, \
+    {    703,12}, {    383,11}, {    799,12}, {    415,11}, \
+    {    831,12}, {    479,14}, {    127,13}, {    255,12}, \
+    {    607,13}, {    319,12}, {    671,11}, {   1343,12}, \
+    {    703,13}, {    383,12}, {    831,13}, {    447,12}, \
+    {    895,11}, {   1791,12}, {    959,13}, {    511,12}, \
+    {   1023,13}, {    575,12}, {   1215,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 122
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  25
+#define MULLO_MUL_N_THRESHOLD             9174
+
+#define DC_DIV_QR_THRESHOLD                 52
+#define DC_DIVAPPR_Q_THRESHOLD             167
+#define DC_BDIV_QR_THRESHOLD                86
+#define DC_BDIV_Q_THRESHOLD                152
+
+#define INV_MULMOD_BNM1_THRESHOLD           50
+#define INV_NEWTON_THRESHOLD               157
+#define INV_APPR_THRESHOLD                 157
+
+#define BINV_NEWTON_THRESHOLD              294
+#define REDC_1_TO_REDC_2_THRESHOLD          30
+#define REDC_2_TO_REDC_N_THRESHOLD          75
+
+#define MU_DIV_QR_THRESHOLD               1620
+#define MU_DIVAPPR_Q_THRESHOLD            1652
+#define MUPI_DIV_QR_THRESHOLD               69
+#define MU_BDIV_QR_THRESHOLD              1620
+#define MU_BDIV_Q_THRESHOLD               1620
+
+#define POWM_SEC_TABLE  1,10,361,960,1099,2577
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        19
+#define SET_STR_DC_THRESHOLD               987
+#define SET_STR_PRECOMPUTE_THRESHOLD      1948
+
+#define FAC_DSC_THRESHOLD                  781
+#define FAC_ODD_THRESHOLD                    0  /* always */
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      71
+#define HGCD_APPR_THRESHOLD                 55
+#define HGCD_REDUCE_THRESHOLD             3014
+#define GCD_DC_THRESHOLD                   706
+#define GCDEXT_DC_THRESHOLD                393
+#define JACOBI_BASE_METHOD                   3


More information about the gmp-commit mailing list