[PATCH] Add Zhaoxin x86 processor support
DylanFan-oc
DylanFan-oc at zhaoxin.com
Mon Dec 14 01:53:51 UTC 2020
Add Zhaoxin x86 processor support
We add the code to support Zhaoxin processor because CPU Vendor ID
"Shanghai" and "CentaurHauls" belongs to Zhaoxin now and this part
of the code is maintained and upgraded by Zhaoxin now.
Background:
Shanghai Zhaoxin Semiconductor Co., Ltd ("Zhaoxin") , established in 2013,
headquartered in Zhangjiang, Shanghai, China. Zhaoxin aims at providing
general-purpose x86 processors.
Related Zhaoxin Linux Kernel patch can be found at
https://lore.kernel.org/lkml/01042674b2f741b2aed1f797359bdffb@zhaoxin.com
acinclude.m4 | 2 +-
config.guess | 20 ++-
config.sub | 2 +-
configure.ac | 21 +++-
mpn/x86/fat/fat.c | 36 +++++-
mpn/x86_64/fat/fat.c | 51 +++++++-
mpn/x86_64/kx5000/gmp-mparam.h | 172 +++++++++++++++++++++++++++++
mpn/x86_64/kx6000/gmp-mparam.h | 161 +++++++++++++++++++++++++++
mpn/x86_64/zxc/gmp-mparam.h | 239 +++++++++++++++++++++++++++++++++++++++++
9 files changed, 688 insertions(+), 16 deletions(-)
diff -r 1a5f0f394f98 -r a6637d676396 acinclude.m4
--- a/acinclude.m4 Mon Nov 30 09:43:41 2020 +0100
+++ b/acinclude.m4 Wed Dec 09 14:25:52 2020 +0800
@@ -63,7 +63,7 @@
[[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | zxc-*-* | kx5000-*-* | kx6000-*-*]])
dnl GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
dnl ---------------------------------
diff -r 1a5f0f394f98 -r a6637d676396 config.guess
--- a/config.guess Mon Nov 30 09:43:41 2020 +0100
+++ b/config.guess Wed Dec 09 14:25:52 2020 +0800
@@ -847,7 +847,7 @@
char vendor_string[13];
char feature_string[12];
long fms;
- int family, model;
+ int family, model, stepping;
const char *modelstr, *suffix;
int cpu_64bit = 0, cpu_avx = 0;
int cpuid_64bit, cpuid_avx, cpuid_osxsave;
@@ -859,6 +859,7 @@
family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+ stepping = (fms & 0xf);
cpuid_avx = (feature_string[11] >> 4) & 1;
cpuid_osxsave = (feature_string[11] >> 3) & 1;
@@ -1018,15 +1019,28 @@
{
/* Should recognize Cyrix' processors too. */
}
- else if (strcmp (vendor_string, "CentaurHauls") == 0)
+ else if (strcmp (vendor_string, "CentaurHauls") == 0 ||
+ strcmp (vendor_string, " Shanghai ") == 0)
{
switch (family)
{
case 6:
if (model < 9) modelstr = "viac3";
else if (model < 15) modelstr = "viac32";
- else cpu_64bit = 1, modelstr = "nano";
+ else if (model == 15)
+ {
+ if (stepping == 0xE)
+ cpu_64bit=1, modelstr = "zxc";
+ else cpu_64bit = 1, modelstr = "nano";
+ }
+ else cpu_64bit=1, modelstr = "zxc";
break;
+ case 7:
+ if(model == 0x1B)
+ cpu_64bit = 1, cpu_avx = 1, modelstr = "kx5000";
+ else if(model == 0x3B)
+ cpu_64bit = 1, cpu_avx = 1, modelstr = "kx6000";
+ break;
}
}
diff -r 1a5f0f394f98 -r a6637d676396 config.sub
--- a/config.sub Mon Nov 30 09:43:41 2020 +0100
+++ b/config.sub Wed Dec 09 14:25:52 2020 +0800
@@ -102,7 +102,7 @@
test_cpu=ia64 ;;
pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
test_cpu=i386 ;;
-athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding)
+athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding | zxc | kx5000 | kx6000)
test_cpu=x86_64 ;;
power[2-9] | power2sc)
test_cpu=power ;;
diff -r 1a5f0f394f98 -r a6637d676396 configure.ac
--- a/configure.ac Mon Nov 30 09:43:41 2020 +0100
+++ b/configure.ac Wed Dec 09 14:25:52 2020 +0800
@@ -1971,6 +1971,24 @@
path="x86/nano x86/mmx x86"
path_64="x86_64/nano x86_64"
;;
+ zxc)
+ gcc_cflags_cpu="-mtune=k8"
+ gcc_cflags_arch="-march=k8"
+ path="x86/mmx x86"
+ path_64="x86_64/zxc x86_64/nano x86_64/zen x86_64"
+ ;;
+ kx5000)
+ gcc_cflags_cpu="-mtune=k8"
+ gcc_cflags_arch="-march=k8"
+ path="x86/mmx x86"
+ path_64="x86_64/kx5000 x86_64/nano x86_64/zen x86_64"
+ ;;
+ kx6000)
+ gcc_cflags_cpu="-mtune=k8"
+ gcc_cflags_arch="-march=k8"
+ path="x86/mmx x86"
+ path_64="x86_64/kx6000 x86_64/nano x86_64/zen x86_64"
+ ;;
*)
gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
gcc_cflags_arch="-march=i486"
@@ -2316,7 +2334,8 @@
x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bt1 x86_64/bt2 x86_64/zen
x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
x86_64/coreihwl x86_64/coreibwl x86_64/skylake x86_64/atom
- x86_64/silvermont x86_64/goldmont x86_64/nano"
+ x86_64/silvermont x86_64/goldmont x86_64/nano
+ x86_64/zxc x86_64/kx5000 x86_64/kx6000"
fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
fi
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86/fat/fat.c
--- a/mpn/x86/fat/fat.c Mon Nov 30 09:43:41 2020 +0100
+++ b/mpn/x86/fat/fat.c Wed Dec 09 14:25:52 2020 +0800
@@ -110,6 +110,9 @@
{ "viac3", "CentaurHauls", MAKE_FMS (6, 0) },
{ "viac32", "CentaurHauls", MAKE_FMS (6, 9) },
{ "nano", "CentaurHauls", MAKE_FMS (6, 15) },
+ { "zxc", "CentaurHauls", 0x000006FE },
+ { "kx5000", "CentaurHauls", MAKE_FMS (7, 0x1b) },
+ { "kx6000", "CentaurHauls", MAKE_FMS (7, 0x3b) },
};
static int
@@ -242,7 +245,7 @@
char vendor_string[13];
char dummy_string[12];
long fms;
- int family, model;
+ int family, model, stepping;
__gmpn_cpuid (vendor_string, 0);
vendor_string[12] = 0;
@@ -250,6 +253,7 @@
fms = __gmpn_cpuid (dummy_string, 1);
family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+ stepping = (fms & 0xf);
if (strcmp (vendor_string, "GenuineIntel") == 0)
{
@@ -488,7 +492,8 @@
break;
}
}
- else if (strcmp (vendor_string, "CentaurHauls") == 0)
+ else if (strcmp (vendor_string, "CentaurHauls") == 0 ||
+ strcmp (vendor_string, " Shanghai ") == 0)
{
switch (family)
{
@@ -498,11 +503,32 @@
{
TRACE (printf (" viac32\n"));
}
- if (model >= 15)
+ if (model == 15)
{
- TRACE (printf (" nano\n"));
- CPUVEC_SETUP_nano;
+ if (stepping == 0xE)
+ {
+ TRACE(printf(" zxc\n"));
+ }
+ else
+ {
+ TRACE (printf (" nano\n"));
+ CPUVEC_SETUP_nano;
+ }
}
+ if (model == 0x19)
+ {
+ TRACE(printf(" zxc\n"));
+ }
+ break;
+ case 7:
+ if(model == 0x1B)
+ {
+ TRACE(printf(" kx5000\n"));
+ }
+ else if (model == 0x3B)
+ {
+ TRACE(printf(" kx6000\n"));
+ }
break;
}
}
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c Mon Nov 30 09:43:41 2020 +0100
+++ b/mpn/x86_64/fat/fat.c Wed Dec 09 14:25:52 2020 +0800
@@ -94,6 +94,9 @@
{ "zen", "AuthenticAMD", MAKE_FMS (23, 1) },
{ "nano", "CentaurHauls", MAKE_FMS (6, 15) },
+ { "zxc", "CentaurHauls", 0x000006FE },
+ { "kx5000", "CentaurHauls", MAKE_FMS (7, 0x1b) },
+ { "kx6000", "CentaurHauls", MAKE_FMS (7, 0x3b) },
};
static int
@@ -256,7 +259,7 @@
char vendor_string[13];
char dummy_string[12];
long fms;
- int family, model;
+ int family, model, stepping;
TRACE (printf ("__gmpn_cpuvec_init:\n"));
@@ -271,6 +274,7 @@
fms = __gmpn_cpuid (dummy_string, 1);
family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+ stepping = (fms & 0xf);
/* Check extended feature flags */
__gmpn_cpuid (dummy_string, 0x80000001);
@@ -445,14 +449,51 @@
break;
}
}
- else if (strcmp (vendor_string, "CentaurHauls") == 0)
+ else if (strcmp (vendor_string, "CentaurHauls") == 0 ||
+ strcmp (vendor_string, " Shanghai ") == 0 )
{
switch (family)
- {
+ {
case 6:
- if (model >= 15)
- CPUVEC_SETUP_nano;
+ if (model == 15)
+ {
+ if (stepping == 0xE)
+ {
+ TRACE(printf(" zxc\n"));
+ CPUVEC_SETUP_zen;
+ CPUVEC_SETUP_nano;
+ CPUVEC_SETUP_zxc;
+ }
+ else
+ {
+ TRACE (printf (" nano\n"));
+ CPUVEC_SETUP_nano;
+ }
+ }
+ if (model == 0x19)
+ {
+ TRACE(printf(" zxc\n"));
+ CPUVEC_SETUP_zen;
+ CPUVEC_SETUP_nano;
+ CPUVEC_SETUP_zxc;
+ }
break;
+ case 7:
+ if(model == 0x1B)
+ {
+ TRACE(printf(" kx5000\n"));
+ CPUVEC_SETUP_zen;
+ CPUVEC_SETUP_nano;
+ CPUVEC_SETUP_kx5000;
+ }
+ else if (model == 0x3B)
+ {
+ TRACE(printf(" kx6000\n"));
+ CPUVEC_SETUP_zen;
+ CPUVEC_SETUP_nano;
+ CPUVEC_SETUP_kx6000;
+ }
+ break;
}
}
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/kx5000/gmp-mparam.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/kx5000/gmp-mparam.h Wed Dec 09 14:25:52 2020 +0800
@@ -0,0 +1,172 @@
+/* Zhaoxin kx5000 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+/* Generated by tuneup.c, 2020-11-23, gcc 7.5 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 39
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1_NORM_THRESHOLD 1
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD 18
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 15
+
+#define DIV_1_VS_MUL_1_PERCENT 347
+
+#define MUL_TOOM22_THRESHOLD 15
+#define MUL_TOOM33_THRESHOLD 266
+#define MUL_TOOM44_THRESHOLD 674
+#define MUL_TOOM6H_THRESHOLD 1095
+#define MUL_TOOM8H_THRESHOLD 1193
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 439
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 430
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 425
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 498
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 830
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 22
+#define SQR_TOOM3_THRESHOLD 396
+#define SQR_TOOM4_THRESHOLD 987
+#define SQR_TOOM6_THRESHOLD 1095
+#define SQR_TOOM8_THRESHOLD 1193
+
+#define MULMID_TOOM42_THRESHOLD 26
+
+#define MULMOD_BNM1_THRESHOLD 13
+#define SQRMOD_BNM1_THRESHOLD 21
+
+#define MUL_FFT_MODF_THRESHOLD 940 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 940, 5}, { 34, 7}, { 9, 5}, { 37, 6}, \
+ { 19, 5}, { 39, 6}, { 20, 7}, { 11, 6}, \
+ { 24, 7}, { 13, 8}, { 7, 7}, { 15, 6}, \
+ { 31, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \
+ { 15, 7}, { 33, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 25, 7}, { 51, 8}, { 27, 4}, { 464, 8}, \
+ { 31, 5}, { 284, 6}, { 166, 7}, { 84, 6}, \
+ { 170, 7}, { 86, 6}, { 174, 7}, { 90,10}, \
+ { 15, 9}, { 31, 8}, { 63, 7}, { 127, 9}, \
+ { 35, 8}, { 71, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 95, 8}, { 191,10}, { 1024,11}, \
+ { 2048,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 69
+#define MUL_FFT_THRESHOLD 3712
+
+#define SQR_FFT_MODF_THRESHOLD 913 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 913, 5}, { 35, 7}, { 9, 5}, { 37, 6}, \
+ { 19, 5}, { 39, 6}, { 20, 7}, { 11, 6}, \
+ { 24, 7}, { 13, 8}, { 7, 7}, { 15, 6}, \
+ { 31, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \
+ { 15, 7}, { 33, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 25, 7}, { 51, 8}, { 27, 7}, { 55, 9}, \
+ { 15, 8}, { 31, 7}, { 63, 8}, { 35, 7}, \
+ { 71, 9}, { 19, 8}, { 39, 7}, { 79, 9}, \
+ { 23, 8}, { 47, 5}, { 382, 4}, { 767, 5}, \
+ { 385, 4}, { 831, 6}, { 224, 4}, { 959, 7}, \
+ { 120, 6}, { 242, 7}, { 122, 6}, { 246, 7}, \
+ { 126, 6}, { 254, 7}, { 131,10}, { 23, 9}, \
+ { 47, 8}, { 95,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 95,11}, { 2048,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 75
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 53
+#define MULLO_MUL_N_THRESHOLD 6831
+#define SQRLO_BASECASE_THRESHOLD 9
+#define SQRLO_DC_THRESHOLD 21
+#define SQRLO_SQR_THRESHOLD 7246
+
+#define DC_DIV_QR_THRESHOLD 43
+#define DC_DIVAPPR_Q_THRESHOLD 125
+#define DC_BDIV_QR_THRESHOLD 35
+#define DC_BDIV_Q_THRESHOLD 42
+
+#define INV_MULMOD_BNM1_THRESHOLD 28
+#define INV_NEWTON_THRESHOLD 242
+#define INV_APPR_THRESHOLD 140
+
+#define BINV_NEWTON_THRESHOLD 300
+#define REDC_1_TO_REDC_N_THRESHOLD 43
+
+#define MU_DIV_QR_THRESHOLD 3689
+#define MU_DIVAPPR_Q_THRESHOLD 4404
+#define MUPI_DIV_QR_THRESHOLD 66
+#define MU_BDIV_QR_THRESHOLD 3913
+#define MU_BDIV_Q_THRESHOLD 4492
+
+#define POWM_SEC_TABLE 1,28,200,712,1730
+
+#define GET_STR_DC_THRESHOLD 10
+#define GET_STR_PRECOMPUTE_THRESHOLD 18
+#define SET_STR_DC_THRESHOLD 1105
+#define SET_STR_PRECOMPUTE_THRESHOLD 2018
+
+#define FAC_DSC_THRESHOLD 1623
+#define FAC_ODD_THRESHOLD 52
+
+#define MATRIX22_STRASSEN_THRESHOLD 21
+#define HGCD2_DIV1_METHOD 5 /* 1.87% faster than 2 */
+#define HGCD_THRESHOLD 133
+#define HGCD_APPR_THRESHOLD 169
+#define HGCD_REDUCE_THRESHOLD 4818
+#define GCD_DC_THRESHOLD 375
+#define GCDEXT_DC_THRESHOLD 361
+#define JACOBI_BASE_METHOD 4 /* 1.10% faster than 2 */
+
+/* Tuneup completed successfully, took 199 seconds */
+
+
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/kx6000/gmp-mparam.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/kx6000/gmp-mparam.h Wed Dec 09 14:25:52 2020 +0800
@@ -0,0 +1,161 @@
+/* Zhaoxin kx6000 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+/* Generated by tuneup.c, 2020-11-18, gcc 7.5 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 36
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1_NORM_THRESHOLD 1
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD 13
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 15
+
+#define DIV_1_VS_MUL_1_PERCENT 408
+
+#define MUL_TOOM22_THRESHOLD 18
+#define MUL_TOOM33_THRESHOLD 53
+#define MUL_TOOM44_THRESHOLD 178
+#define MUL_TOOM6H_THRESHOLD 318
+#define MUL_TOOM8H_THRESHOLD 512
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 103
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 115
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 118
+
+#define SQR_BASECASE_THRESHOLD 10
+#define SQR_TOOM2_THRESHOLD 46
+#define SQR_TOOM3_THRESHOLD 71
+#define SQR_TOOM4_THRESHOLD 300
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 381
+
+#define MULMID_TOOM42_THRESHOLD 32
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 8
+
+#define MUL_FFT_MODF_THRESHOLD 308 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 19, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 35, 5}, \
+ { 575, 6}, { 319, 7}, { 175, 8}, { 89,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95,10}, { 1024,11}, \
+ { 2048,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 53
+#define MUL_FFT_THRESHOLD 3712
+
+#define SQR_FFT_MODF_THRESHOLD 272 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 272, 5}, { 17, 6}, { 19, 7}, { 10, 6}, \
+ { 21, 7}, { 20, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
+ { 19, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 33, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 23, 9}, { 47,11}, { 15, 8}, { 125,10}, \
+ { 47,11}, { 2048,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 42
+#define SQR_FFT_THRESHOLD 2496
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 56
+#define MULLO_MUL_N_THRESHOLD 6633
+#define SQRLO_BASECASE_THRESHOLD 6
+#define SQRLO_DC_THRESHOLD 212
+#define SQRLO_SQR_THRESHOLD 4940
+
+#define DC_DIV_QR_THRESHOLD 76
+#define DC_DIVAPPR_Q_THRESHOLD 212
+#define DC_BDIV_QR_THRESHOLD 74
+#define DC_BDIV_Q_THRESHOLD 126
+
+#define INV_MULMOD_BNM1_THRESHOLD 34
+#define INV_NEWTON_THRESHOLD 212
+#define INV_APPR_THRESHOLD 214
+
+#define BINV_NEWTON_THRESHOLD 232
+#define REDC_1_TO_REDC_N_THRESHOLD 68
+
+#define MU_DIV_QR_THRESHOLD 1210
+#define MU_DIVAPPR_Q_THRESHOLD 1360
+#define MUPI_DIV_QR_THRESHOLD 84
+#define MU_BDIV_QR_THRESHOLD 942
+#define MU_BDIV_Q_THRESHOLD 1187
+
+#define POWM_SEC_TABLE 1,22,82,712,904,1985
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 21
+#define SET_STR_DC_THRESHOLD 686
+#define SET_STR_PRECOMPUTE_THRESHOLD 1947
+
+#define FAC_DSC_THRESHOLD 426
+#define FAC_ODD_THRESHOLD 0 /* always */
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD2_DIV1_METHOD 5 /* 0.53% faster than 2 */
+#define HGCD_THRESHOLD 143
+#define HGCD_APPR_THRESHOLD 175
+#define HGCD_REDUCE_THRESHOLD 3810
+#define GCD_DC_THRESHOLD 435
+#define GCDEXT_DC_THRESHOLD 382
+#define JACOBI_BASE_METHOD 2 /* 6.72% faster than 4 */
+
+/* Tuneup completed successfully, took 65 seconds */
+
+
+
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/zxc/gmp-mparam.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/zxc/gmp-mparam.h Wed Dec 09 14:25:52 2020 +0800
@@ -0,0 +1,239 @@
+/* Zhaoxin zxc gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2010, 2012, 2014 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1_NORM_THRESHOLD 1
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 22
+
+#define MUL_TOOM22_THRESHOLD 27
+#define MUL_TOOM33_THRESHOLD 38
+#define MUL_TOOM44_THRESHOLD 324
+#define MUL_TOOM6H_THRESHOLD 450
+#define MUL_TOOM8H_THRESHOLD 632
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 207
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 211
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 219
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 315
+
+#define SQR_BASECASE_THRESHOLD 10
+#define SQR_TOOM2_THRESHOLD 52
+#define SQR_TOOM3_THRESHOLD 73
+#define SQR_TOOM4_THRESHOLD 387
+#define SQR_TOOM6_THRESHOLD 662
+#define SQR_TOOM8_THRESHOLD 781
+
+#define MULMID_TOOM42_THRESHOLD 32
+
+#define MULMOD_BNM1_THRESHOLD 14
+#define SQRMOD_BNM1_THRESHOLD 15
+
+#define MUL_FFT_MODF_THRESHOLD 376 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 376, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 23, 7}, { 12, 6}, { 25, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 15, 7}, { 31, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 49, 9}, { 27,10}, { 15, 9}, { 43,10}, \
+ { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 83,10}, { 47, 9}, \
+ { 95,10}, { 79,11}, { 47,10}, { 103,12}, \
+ { 31,11}, { 63,10}, { 143,11}, { 79,10}, \
+ { 159, 9}, { 319,10}, { 175,11}, { 95, 9}, \
+ { 383, 8}, { 767,10}, { 207,11}, { 111,12}, \
+ { 63,11}, { 127,10}, { 255,11}, { 143, 9}, \
+ { 575, 8}, { 1151,10}, { 303,11}, { 159,10}, \
+ { 319, 9}, { 639, 8}, { 1279,10}, { 335,12}, \
+ { 95,11}, { 191,10}, { 383, 9}, { 767,11}, \
+ { 207,10}, { 415, 9}, { 831, 8}, { 1663,10}, \
+ { 447,13}, { 63,12}, { 127,11}, { 255,10}, \
+ { 511, 9}, { 1023,11}, { 271,10}, { 543, 9}, \
+ { 1087,10}, { 575, 9}, { 1215,12}, { 159,11}, \
+ { 319,10}, { 639, 9}, { 1279,11}, { 335,10}, \
+ { 671, 9}, { 1343,11}, { 351,10}, { 703, 9}, \
+ { 1407,12}, { 191,11}, { 383,10}, { 767, 9}, \
+ { 1535,10}, { 831, 9}, { 1663,12}, { 223,11}, \
+ { 447,10}, { 895,13}, { 127,12}, { 255,11}, \
+ { 511,10}, { 1023,11}, { 543,10}, { 1087,12}, \
+ { 287,11}, { 575,10}, { 1151,11}, { 607,10}, \
+ { 1215,12}, { 319,11}, { 639,10}, { 1279,11}, \
+ { 671,10}, { 1343,12}, { 351,11}, { 703,10}, \
+ { 1407,13}, { 191,12}, { 383,11}, { 767,10}, \
+ { 1535,12}, { 415,11}, { 831,10}, { 1663,12}, \
+ { 447,11}, { 895,10}, { 1791,14}, { 127,13}, \
+ { 255,12}, { 511,11}, { 1023,12}, { 543,11}, \
+ { 1087,12}, { 575,11}, { 1151,12}, { 607,11}, \
+ { 1215,13}, { 319,12}, { 639,11}, { 1279,12}, \
+ { 671,11}, { 1343,12}, { 703,11}, { 1407,13}, \
+ { 383,12}, { 767,11}, { 1535,12}, { 831,11}, \
+ { 1663,13}, { 447,12}, { 895,11}, { 1791,13}, \
+ { 511,12}, { 1023,11}, { 2047,12}, { 1087,13}, \
+ { 575,12}, { 1151,11}, { 2303,12}, { 1215,13}, \
+ { 639,12}, { 1279,11}, { 2559,12}, { 1343,13}, \
+ { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \
+ { 1535,13}, { 831,12}, { 1663,13}, { 895,12}, \
+ { 1791,13}, { 959,14}, { 511,13}, { 1023,12}, \
+ { 2047,13}, { 1087,12}, { 2175,13}, { 1151,12}, \
+ { 2303,13}, { 1215,14}, { 639,13}, { 1279,12}, \
+ { 2559,13}, { 1407,12}, { 2815,13}, { 1471,14}, \
+ { 767,13}, { 1535,12}, { 3071,13}, { 1663,14}, \
+ { 895,13}, { 1791,12}, { 3583,13}, { 1919,15}, \
+ { 511,14}, { 1023,13}, { 2047,12}, { 4095,13}, \
+ { 2175,14}, { 1151,13}, { 2303,12}, { 4607,13}, \
+ { 2431,14}, { 1279,13}, { 2559,12}, { 5119,14}, \
+ { 1407,13}, { 2815,12}, { 5631,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 224
+#define MUL_FFT_THRESHOLD 3520
+
+#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 340, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
+ { 31,10}, { 63, 9}, { 127,10}, { 71, 9}, \
+ { 143,10}, { 79,11}, { 47,10}, { 95, 9}, \
+ { 191,10}, { 103,12}, { 31,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 135, 7}, { 1087, 9}, \
+ { 287,11}, { 79, 9}, { 319, 8}, { 639,10}, \
+ { 167,11}, { 95,10}, { 191, 9}, { 383, 8}, \
+ { 767,11}, { 111,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511, 8}, { 1023,10}, { 271, 9}, \
+ { 543, 8}, { 1087,11}, { 143, 9}, { 575, 8}, \
+ { 1151,10}, { 303, 9}, { 639, 8}, { 1279,10}, \
+ { 335, 9}, { 671,10}, { 351, 9}, { 703,12}, \
+ { 95,11}, { 191,10}, { 383, 9}, { 767,11}, \
+ { 207,10}, { 415, 9}, { 831,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511, 9}, { 1023,11}, \
+ { 271,10}, { 543, 9}, { 1087,10}, { 575, 9}, \
+ { 1151,11}, { 303,10}, { 607, 9}, { 1215,12}, \
+ { 159,11}, { 319,10}, { 639, 9}, { 1279,10}, \
+ { 671, 9}, { 1343,11}, { 351,10}, { 703, 9}, \
+ { 1407,12}, { 191,11}, { 383,10}, { 767, 9}, \
+ { 1535,11}, { 415,10}, { 831, 9}, { 1663,12}, \
+ { 223,11}, { 447,10}, { 959,13}, { 127,12}, \
+ { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \
+ { 1087,11}, { 575,10}, { 1215,12}, { 319,11}, \
+ { 639,10}, { 1279,11}, { 671,10}, { 1343,12}, \
+ { 351,11}, { 703,10}, { 1407,13}, { 191,12}, \
+ { 383,11}, { 767,10}, { 1535,12}, { 415,11}, \
+ { 831,10}, { 1663,12}, { 447,11}, { 895,10}, \
+ { 1791,12}, { 479,11}, { 959,14}, { 127,12}, \
+ { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \
+ { 575,11}, { 1151,12}, { 607,11}, { 1215,13}, \
+ { 319,12}, { 639,11}, { 1279,12}, { 671,11}, \
+ { 1343,12}, { 703,11}, { 1407,13}, { 383,12}, \
+ { 767,11}, { 1535,12}, { 831,11}, { 1663,13}, \
+ { 447,12}, { 895,11}, { 1791,12}, { 959,13}, \
+ { 511,12}, { 1023,11}, { 2047,12}, { 1087,13}, \
+ { 575,12}, { 1215,13}, { 639,12}, { 1343,13}, \
+ { 703,12}, { 1407,11}, { 2815,13}, { 767,12}, \
+ { 1535,13}, { 831,12}, { 1663,13}, { 895,12}, \
+ { 1791,13}, { 959,14}, { 511,13}, { 1023,12}, \
+ { 2047,13}, { 1087,12}, { 2175,13}, { 1215,14}, \
+ { 639,13}, { 1279,12}, { 2559,13}, { 1407,12}, \
+ { 2815,14}, { 767,13}, { 1535,12}, { 3071,13}, \
+ { 1663,14}, { 895,13}, { 1791,12}, { 3583,13}, \
+ { 1919,15}, { 511,14}, { 1023,13}, { 2047,12}, \
+ { 4095,13}, { 2175,14}, { 1151,13}, { 2303,12}, \
+ { 4607,14}, { 1279,13}, { 2559,14}, { 1407,13}, \
+ { 2815,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 230
+#define SQR_FFT_THRESHOLD 2496
+
+#define MULLO_BASECASE_THRESHOLD 13
+#define MULLO_DC_THRESHOLD 38
+#define MULLO_MUL_N_THRESHOLD 6633
+
+#define DC_DIV_QR_THRESHOLD 56
+#define DC_DIVAPPR_Q_THRESHOLD 173
+#define DC_BDIV_QR_THRESHOLD 55
+#define DC_BDIV_Q_THRESHOLD 96
+
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 202
+#define INV_APPR_THRESHOLD 166
+
+#define BINV_NEWTON_THRESHOLD 246
+#define REDC_1_TO_REDC_2_THRESHOLD 7
+#define REDC_2_TO_REDC_N_THRESHOLD 85
+
+#define MU_DIV_QR_THRESHOLD 1499
+#define MU_DIVAPPR_Q_THRESHOLD 1652
+#define MUPI_DIV_QR_THRESHOLD 83
+#define MU_BDIV_QR_THRESHOLD 1210
+#define MU_BDIV_Q_THRESHOLD 1499
+
+#define POWM_SEC_TABLE 1,28,129,642,2387
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 127
+#define HGCD_APPR_THRESHOLD 214
+#define HGCD_REDUCE_THRESHOLD 2479
+#define GCD_DC_THRESHOLD 487
+#define GCDEXT_DC_THRESHOLD 505
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 802
+#define SET_STR_PRECOMPUTE_THRESHOLD 2042
+
+#define FAC_DSC_THRESHOLD 1737
+#define FAC_ODD_THRESHOLD 44
More information about the gmp-devel
mailing list