[PATCH] Add Zhaoxin x86 processor support

DylanFan-oc DylanFan-oc at zhaoxin.com
Mon Dec 14 01:53:51 UTC 2020


Add Zhaoxin x86 processor support

We add the code to support Zhaoxin processor because CPU Vendor ID
"Shanghai" and "CentaurHauls"  belongs to Zhaoxin now and this part
of the code is maintained and upgraded by Zhaoxin now.

Background:
Shanghai Zhaoxin Semiconductor Co., Ltd ("Zhaoxin") , established in 2013,
headquartered in Zhangjiang, Shanghai, China. Zhaoxin aims at providing
general-purpose x86 processors.

Related Zhaoxin Linux Kernel patch can be found at
https://lore.kernel.org/lkml/01042674b2f741b2aed1f797359bdffb@zhaoxin.com

 acinclude.m4                   |    2 +-
 config.guess                   |   20 ++-
 config.sub                     |    2 +-
 configure.ac                   |   21 +++-
 mpn/x86/fat/fat.c              |   36 +++++-
 mpn/x86_64/fat/fat.c           |   51 +++++++-
 mpn/x86_64/kx5000/gmp-mparam.h |  172 +++++++++++++++++++++++++++++
 mpn/x86_64/kx6000/gmp-mparam.h |  161 +++++++++++++++++++++++++++
 mpn/x86_64/zxc/gmp-mparam.h    |  239 +++++++++++++++++++++++++++++++++++++++++
 9 files changed, 688 insertions(+), 16 deletions(-)

diff -r 1a5f0f394f98 -r a6637d676396 acinclude.m4
--- a/acinclude.m4    Mon Nov 30 09:43:41 2020 +0100
+++ b/acinclude.m4    Wed Dec 09 14:25:52 2020 +0800
@@ -63,7 +63,7 @@
 [[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])

 define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | zxc-*-* | kx5000-*-* | kx6000-*-*]])

 dnl  GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
 dnl  ---------------------------------
diff -r 1a5f0f394f98 -r a6637d676396 config.guess
--- a/config.guess    Mon Nov 30 09:43:41 2020 +0100
+++ b/config.guess    Wed Dec 09 14:25:52 2020 +0800
@@ -847,7 +847,7 @@
   char vendor_string[13];
   char feature_string[12];
   long fms;
-  int family, model;
+  int family, model, stepping;
   const char *modelstr, *suffix;
   int cpu_64bit = 0, cpu_avx = 0;
   int cpuid_64bit, cpuid_avx, cpuid_osxsave;
@@ -859,6 +859,7 @@

   family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
   model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+  stepping = (fms & 0xf);

   cpuid_avx     = (feature_string[11] >> 4) & 1;
   cpuid_osxsave = (feature_string[11] >> 3) & 1;
@@ -1018,15 +1019,28 @@
     {
       /* Should recognize Cyrix' processors too.  */
     }
-  else if (strcmp (vendor_string, "CentaurHauls") == 0)
+  else if (strcmp (vendor_string, "CentaurHauls") == 0 ||
+           strcmp (vendor_string, "  Shanghai  ") == 0)
     {
       switch (family)
     {
     case 6:
       if (model < 9)    modelstr = "viac3";
       else if (model < 15)    modelstr = "viac32";
-      else            cpu_64bit = 1, modelstr = "nano";
+      else if (model == 15)
+          {
+            if (stepping == 0xE)
+              cpu_64bit=1, modelstr = "zxc";
+            else cpu_64bit = 1, modelstr = "nano";
+          }
+      else cpu_64bit=1, modelstr = "zxc";
       break;
+        case 7:
+          if(model == 0x1B)
+            cpu_64bit = 1, cpu_avx = 1, modelstr = "kx5000";
+          else if(model == 0x3B)
+            cpu_64bit = 1, cpu_avx = 1, modelstr = "kx6000";
+          break;
     }
     }

diff -r 1a5f0f394f98 -r a6637d676396 config.sub
--- a/config.sub    Mon Nov 30 09:43:41 2020 +0100
+++ b/config.sub    Wed Dec 09 14:25:52 2020 +0800
@@ -102,7 +102,7 @@
   test_cpu=ia64 ;;
 pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
   test_cpu=i386 ;;
-athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding)
+athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen* | nano | nehalem | westmere | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding | zxc | kx5000 | kx6000)
   test_cpu=x86_64 ;;
 power[2-9] | power2sc)
   test_cpu=power ;;
diff -r 1a5f0f394f98 -r a6637d676396 configure.ac
--- a/configure.ac    Mon Nov 30 09:43:41 2020 +0100
+++ b/configure.ac    Wed Dec 09 14:25:52 2020 +0800
@@ -1971,6 +1971,24 @@
     path="x86/nano x86/mmx x86"
     path_64="x86_64/nano x86_64"
     ;;
+      zxc)
+    gcc_cflags_cpu="-mtune=k8"
+    gcc_cflags_arch="-march=k8"
+    path="x86/mmx x86"
+    path_64="x86_64/zxc x86_64/nano x86_64/zen x86_64"
+    ;;
+      kx5000)
+      gcc_cflags_cpu="-mtune=k8"
+    gcc_cflags_arch="-march=k8"
+    path="x86/mmx x86"
+    path_64="x86_64/kx5000 x86_64/nano x86_64/zen x86_64"
+    ;;
+      kx6000)
+    gcc_cflags_cpu="-mtune=k8"
+    gcc_cflags_arch="-march=k8"
+    path="x86/mmx x86"
+    path_64="x86_64/kx6000 x86_64/nano x86_64/zen x86_64"
+     ;;
       *)
     gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
     gcc_cflags_arch="-march=i486"
@@ -2316,7 +2334,8 @@
           x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bt1 x86_64/bt2 x86_64/zen
           x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
           x86_64/coreihwl x86_64/coreibwl x86_64/skylake x86_64/atom
-          x86_64/silvermont x86_64/goldmont x86_64/nano"
+          x86_64/silvermont x86_64/goldmont x86_64/nano
+          x86_64/zxc x86_64/kx5000 x86_64/kx6000"
     fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
       fi

diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86/fat/fat.c
--- a/mpn/x86/fat/fat.c    Mon Nov 30 09:43:41 2020 +0100
+++ b/mpn/x86/fat/fat.c    Wed Dec 09 14:25:52 2020 +0800
@@ -110,6 +110,9 @@
   { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
   { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
   { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
+  { "zxc",        "CentaurHauls", 0x000006FE },
+  { "kx5000",     "CentaurHauls", MAKE_FMS (7, 0x1b) },
+  { "kx6000",     "CentaurHauls", MAKE_FMS (7, 0x3b) },
 };

 static int
@@ -242,7 +245,7 @@
       char vendor_string[13];
       char dummy_string[12];
       long fms;
-      int family, model;
+      int family, model, stepping;

       __gmpn_cpuid (vendor_string, 0);
       vendor_string[12] = 0;
@@ -250,6 +253,7 @@
       fms = __gmpn_cpuid (dummy_string, 1);
       family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
       model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+      stepping = (fms & 0xf);

       if (strcmp (vendor_string, "GenuineIntel") == 0)
         {
@@ -488,7 +492,8 @@
           break;
             }
         }
-      else if (strcmp (vendor_string, "CentaurHauls") == 0)
+      else if (strcmp (vendor_string, "CentaurHauls") == 0 ||
+               strcmp (vendor_string, "  Shanghai  ") == 0)
         {
           switch (family)
             {
@@ -498,11 +503,32 @@
                 {
                   TRACE (printf ("  viac32\n"));
                 }
-          if (model >= 15)
+          if (model == 15)
         {
-                  TRACE (printf ("  nano\n"));
-          CPUVEC_SETUP_nano;
+            if (stepping == 0xE)
+              {
+                TRACE(printf("  zxc\n"));
+          }
+        else
+              {
+                    TRACE (printf ("  nano\n"));
+            CPUVEC_SETUP_nano;
+          }
         }
+              if (model == 0x19)
+                {
+                  TRACE(printf("  zxc\n"));
+                }
+              break;
+        case 7:
+              if(model == 0x1B)
+              {
+                  TRACE(printf("  kx5000\n"));
+              }
+              else if (model == 0x3B)
+              {
+                TRACE(printf("  kx6000\n"));
+              }
               break;
             }
         }
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c    Mon Nov 30 09:43:41 2020 +0100
+++ b/mpn/x86_64/fat/fat.c    Wed Dec 09 14:25:52 2020 +0800
@@ -94,6 +94,9 @@
   { "zen",        "AuthenticAMD", MAKE_FMS (23, 1) },

   { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
+  { "zxc",        "CentaurHauls", 0x000006FE },
+  { "kx5000",     "CentaurHauls", MAKE_FMS (7, 0x1b) },
+  { "kx6000",     "CentaurHauls", MAKE_FMS (7, 0x3b) },
 };

 static int
@@ -256,7 +259,7 @@
   char vendor_string[13];
   char dummy_string[12];
   long fms;
-  int family, model;
+  int family, model, stepping;

   TRACE (printf ("__gmpn_cpuvec_init:\n"));

@@ -271,6 +274,7 @@
   fms = __gmpn_cpuid (dummy_string, 1);
   family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
   model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+  stepping = (fms & 0xf);

   /* Check extended feature flags */
   __gmpn_cpuid (dummy_string, 0x80000001);
@@ -445,14 +449,51 @@
       break;
     }
     }
-  else if (strcmp (vendor_string, "CentaurHauls") == 0)
+  else if (strcmp (vendor_string, "CentaurHauls") == 0 ||
+           strcmp (vendor_string, "  Shanghai  ") == 0 )
     {
       switch (family)
-    {
+       {
     case 6:
-      if (model >= 15)
-        CPUVEC_SETUP_nano;
+      if (model == 15)
+            {
+            if (stepping == 0xE)
+              {
+                TRACE(printf("  zxc\n"));
+                CPUVEC_SETUP_zen;
+                CPUVEC_SETUP_nano;
+                CPUVEC_SETUP_zxc;
+              }
+            else
+              {
+                TRACE (printf ("  nano\n"));
+                CPUVEC_SETUP_nano;
+              }
+            }
+          if (model == 0x19)
+            {
+              TRACE(printf("  zxc\n"));
+              CPUVEC_SETUP_zen;
+              CPUVEC_SETUP_nano;
+              CPUVEC_SETUP_zxc;
+            }
       break;
+    case 7:
+          if(model == 0x1B)
+            {
+              TRACE(printf("  kx5000\n"));
+              CPUVEC_SETUP_zen;
+              CPUVEC_SETUP_nano;
+              CPUVEC_SETUP_kx5000;
+            }
+          else if (model == 0x3B)
+            {
+              TRACE(printf("  kx6000\n"));
+              CPUVEC_SETUP_zen;
+              CPUVEC_SETUP_nano;
+              CPUVEC_SETUP_kx6000;
+            }
+          break;
     }
     }

diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/kx5000/gmp-mparam.h
--- /dev/null    Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/kx5000/gmp-mparam.h    Wed Dec 09 14:25:52 2020 +0800
@@ -0,0 +1,172 @@
+/* Zhaoxin kx5000 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+/* Generated by tuneup.c, 2020-11-23, gcc 7.5 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        39
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1_NORM_THRESHOLD              1
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD              18
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           15
+
+#define DIV_1_VS_MUL_1_PERCENT             347
+
+#define MUL_TOOM22_THRESHOLD                15
+#define MUL_TOOM33_THRESHOLD               266
+#define MUL_TOOM44_THRESHOLD               674
+#define MUL_TOOM6H_THRESHOLD              1095
+#define MUL_TOOM8H_THRESHOLD              1193
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     439
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     430
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     425
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     498
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     830
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 22
+#define SQR_TOOM3_THRESHOLD                396
+#define SQR_TOOM4_THRESHOLD                987
+#define SQR_TOOM6_THRESHOLD               1095
+#define SQR_TOOM8_THRESHOLD               1193
+
+#define MULMID_TOOM42_THRESHOLD             26
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               21
+
+#define MUL_FFT_MODF_THRESHOLD             940  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    940, 5}, {     34, 7}, {      9, 5}, {     37, 6}, \
+    {     19, 5}, {     39, 6}, {     20, 7}, {     11, 6}, \
+    {     24, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     15, 7}, {     33, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     25, 7}, {     51, 8}, {     27, 4}, {    464, 8}, \
+    {     31, 5}, {    284, 6}, {    166, 7}, {     84, 6}, \
+    {    170, 7}, {     86, 6}, {    174, 7}, {     90,10}, \
+    {     15, 9}, {     31, 8}, {     63, 7}, {    127, 9}, \
+    {     35, 8}, {     71, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     71,10}, {     39, 9}, {     79,10}, \
+    {     47, 9}, {     95, 8}, {    191,10}, {   1024,11}, \
+    {   2048,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 69
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             913  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    913, 5}, {     35, 7}, {      9, 5}, {     37, 6}, \
+    {     19, 5}, {     39, 6}, {     20, 7}, {     11, 6}, \
+    {     24, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     15, 7}, {     33, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     25, 7}, {     51, 8}, {     27, 7}, {     55, 9}, \
+    {     15, 8}, {     31, 7}, {     63, 8}, {     35, 7}, \
+    {     71, 9}, {     19, 8}, {     39, 7}, {     79, 9}, \
+    {     23, 8}, {     47, 5}, {    382, 4}, {    767, 5}, \
+    {    385, 4}, {    831, 6}, {    224, 4}, {    959, 7}, \
+    {    120, 6}, {    242, 7}, {    122, 6}, {    246, 7}, \
+    {    126, 6}, {    254, 7}, {    131,10}, {     23, 9}, \
+    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     71,10}, {     39, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {   2048,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 75
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  53
+#define MULLO_MUL_N_THRESHOLD             6831
+#define SQRLO_BASECASE_THRESHOLD             9
+#define SQRLO_DC_THRESHOLD                  21
+#define SQRLO_SQR_THRESHOLD               7246
+
+#define DC_DIV_QR_THRESHOLD                 43
+#define DC_DIVAPPR_Q_THRESHOLD             125
+#define DC_BDIV_QR_THRESHOLD                35
+#define DC_BDIV_Q_THRESHOLD                 42
+
+#define INV_MULMOD_BNM1_THRESHOLD           28
+#define INV_NEWTON_THRESHOLD               242
+#define INV_APPR_THRESHOLD                 140
+
+#define BINV_NEWTON_THRESHOLD              300
+#define REDC_1_TO_REDC_N_THRESHOLD          43
+
+#define MU_DIV_QR_THRESHOLD               3689
+#define MU_DIVAPPR_Q_THRESHOLD            4404
+#define MUPI_DIV_QR_THRESHOLD               66
+#define MU_BDIV_QR_THRESHOLD              3913
+#define MU_BDIV_Q_THRESHOLD               4492
+
+#define POWM_SEC_TABLE  1,28,200,712,1730
+
+#define GET_STR_DC_THRESHOLD                10
+#define GET_STR_PRECOMPUTE_THRESHOLD        18
+#define SET_STR_DC_THRESHOLD              1105
+#define SET_STR_PRECOMPUTE_THRESHOLD      2018
+
+#define FAC_DSC_THRESHOLD                 1623
+#define FAC_ODD_THRESHOLD                   52
+
+#define MATRIX22_STRASSEN_THRESHOLD         21
+#define HGCD2_DIV1_METHOD                    5  /* 1.87% faster than 2 */
+#define HGCD_THRESHOLD                     133
+#define HGCD_APPR_THRESHOLD                169
+#define HGCD_REDUCE_THRESHOLD             4818
+#define GCD_DC_THRESHOLD                   375
+#define GCDEXT_DC_THRESHOLD                361
+#define JACOBI_BASE_METHOD                   4  /* 1.10% faster than 2 */
+
+/* Tuneup completed successfully, took 199 seconds */
+
+
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/kx6000/gmp-mparam.h
--- /dev/null    Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/kx6000/gmp-mparam.h    Wed Dec 09 14:25:52 2020 +0800
@@ -0,0 +1,161 @@
+/* Zhaoxin kx6000 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+/* Generated by tuneup.c, 2020-11-18, gcc 7.5 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        36
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1_NORM_THRESHOLD              1
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD              13
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           15
+
+#define DIV_1_VS_MUL_1_PERCENT             408
+
+#define MUL_TOOM22_THRESHOLD                18
+#define MUL_TOOM33_THRESHOLD                53
+#define MUL_TOOM44_THRESHOLD               178
+#define MUL_TOOM6H_THRESHOLD               318
+#define MUL_TOOM8H_THRESHOLD               512
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     103
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     115
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     118
+
+#define SQR_BASECASE_THRESHOLD              10
+#define SQR_TOOM2_THRESHOLD                 46
+#define SQR_TOOM3_THRESHOLD                 71
+#define SQR_TOOM4_THRESHOLD                300
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                381
+
+#define MULMID_TOOM42_THRESHOLD             32
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD                8
+
+#define MUL_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     19, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     35, 5}, \
+    {    575, 6}, {    319, 7}, {    175, 8}, {     89,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {   1024,11}, \
+    {   2048,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 53
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             272  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    272, 5}, {     17, 6}, {     19, 7}, {     10, 6}, \
+    {     21, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     19, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     33, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     47,11}, {     15, 8}, {    125,10}, \
+    {     47,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 42
+#define SQR_FFT_THRESHOLD                 2496
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  56
+#define MULLO_MUL_N_THRESHOLD             6633
+#define SQRLO_BASECASE_THRESHOLD             6
+#define SQRLO_DC_THRESHOLD                 212
+#define SQRLO_SQR_THRESHOLD               4940
+
+#define DC_DIV_QR_THRESHOLD                 76
+#define DC_DIVAPPR_Q_THRESHOLD             212
+#define DC_BDIV_QR_THRESHOLD                74
+#define DC_BDIV_Q_THRESHOLD                126
+
+#define INV_MULMOD_BNM1_THRESHOLD           34
+#define INV_NEWTON_THRESHOLD               212
+#define INV_APPR_THRESHOLD                 214
+
+#define BINV_NEWTON_THRESHOLD              232
+#define REDC_1_TO_REDC_N_THRESHOLD          68
+
+#define MU_DIV_QR_THRESHOLD               1210
+#define MU_DIVAPPR_Q_THRESHOLD            1360
+#define MUPI_DIV_QR_THRESHOLD               84
+#define MU_BDIV_QR_THRESHOLD               942
+#define MU_BDIV_Q_THRESHOLD               1187
+
+#define POWM_SEC_TABLE  1,22,82,712,904,1985
+
+#define GET_STR_DC_THRESHOLD                11
+#define GET_STR_PRECOMPUTE_THRESHOLD        21
+#define SET_STR_DC_THRESHOLD               686
+#define SET_STR_PRECOMPUTE_THRESHOLD      1947
+
+#define FAC_DSC_THRESHOLD                  426
+#define FAC_ODD_THRESHOLD                    0  /* always */
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD2_DIV1_METHOD                    5  /* 0.53% faster than 2 */
+#define HGCD_THRESHOLD                     143
+#define HGCD_APPR_THRESHOLD                175
+#define HGCD_REDUCE_THRESHOLD             3810
+#define GCD_DC_THRESHOLD                   435
+#define GCDEXT_DC_THRESHOLD                382
+#define JACOBI_BASE_METHOD                   2  /* 6.72% faster than 4 */
+
+/* Tuneup completed successfully, took 65 seconds */
+
+
+
diff -r 1a5f0f394f98 -r a6637d676396 mpn/x86_64/zxc/gmp-mparam.h
--- /dev/null    Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/zxc/gmp-mparam.h    Wed Dec 09 14:25:52 2020 +0800
@@ -0,0 +1,239 @@
+/* Zhaoxin zxc gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2010, 2012, 2014 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1_NORM_THRESHOLD              1
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           22
+
+#define MUL_TOOM22_THRESHOLD                27
+#define MUL_TOOM33_THRESHOLD                38
+#define MUL_TOOM44_THRESHOLD               324
+#define MUL_TOOM6H_THRESHOLD               450
+#define MUL_TOOM8H_THRESHOLD               632
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     207
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     211
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     219
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     315
+
+#define SQR_BASECASE_THRESHOLD              10
+#define SQR_TOOM2_THRESHOLD                 52
+#define SQR_TOOM3_THRESHOLD                 73
+#define SQR_TOOM4_THRESHOLD                387
+#define SQR_TOOM6_THRESHOLD                662
+#define SQR_TOOM8_THRESHOLD                781
+
+#define MULMID_TOOM42_THRESHOLD             32
+
+#define MULMOD_BNM1_THRESHOLD               14
+#define SQRMOD_BNM1_THRESHOLD               15
+
+#define MUL_FFT_MODF_THRESHOLD             376  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    376, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     19, 7}, {     39, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     49, 9}, {     27,10}, {     15, 9}, {     43,10}, \
+    {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
+    {     95,10}, {     79,11}, {     47,10}, {    103,12}, \
+    {     31,11}, {     63,10}, {    143,11}, {     79,10}, \
+    {    159, 9}, {    319,10}, {    175,11}, {     95, 9}, \
+    {    383, 8}, {    767,10}, {    207,11}, {    111,12}, \
+    {     63,11}, {    127,10}, {    255,11}, {    143, 9}, \
+    {    575, 8}, {   1151,10}, {    303,11}, {    159,10}, \
+    {    319, 9}, {    639, 8}, {   1279,10}, {    335,12}, \
+    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
+    {    207,10}, {    415, 9}, {    831, 8}, {   1663,10}, \
+    {    447,13}, {     63,12}, {    127,11}, {    255,10}, \
+    {    511, 9}, {   1023,11}, {    271,10}, {    543, 9}, \
+    {   1087,10}, {    575, 9}, {   1215,12}, {    159,11}, \
+    {    319,10}, {    639, 9}, {   1279,11}, {    335,10}, \
+    {    671, 9}, {   1343,11}, {    351,10}, {    703, 9}, \
+    {   1407,12}, {    191,11}, {    383,10}, {    767, 9}, \
+    {   1535,10}, {    831, 9}, {   1663,12}, {    223,11}, \
+    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
+    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
+    {    287,11}, {    575,10}, {   1151,11}, {    607,10}, \
+    {   1215,12}, {    319,11}, {    639,10}, {   1279,11}, \
+    {    671,10}, {   1343,12}, {    351,11}, {    703,10}, \
+    {   1407,13}, {    191,12}, {    383,11}, {    767,10}, \
+    {   1535,12}, {    415,11}, {    831,10}, {   1663,12}, \
+    {    447,11}, {    895,10}, {   1791,14}, {    127,13}, \
+    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
+    {   1087,12}, {    575,11}, {   1151,12}, {    607,11}, \
+    {   1215,13}, {    319,12}, {    639,11}, {   1279,12}, \
+    {    671,11}, {   1343,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    767,11}, {   1535,12}, {    831,11}, \
+    {   1663,13}, {    447,12}, {    895,11}, {   1791,13}, \
+    {    511,12}, {   1023,11}, {   2047,12}, {   1087,13}, \
+    {    575,12}, {   1151,11}, {   2303,12}, {   1215,13}, \
+    {    639,12}, {   1279,11}, {   2559,12}, {   1343,13}, \
+    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    895,12}, \
+    {   1791,13}, {    959,14}, {    511,13}, {   1023,12}, \
+    {   2047,13}, {   1087,12}, {   2175,13}, {   1151,12}, \
+    {   2303,13}, {   1215,14}, {    639,13}, {   1279,12}, \
+    {   2559,13}, {   1407,12}, {   2815,13}, {   1471,14}, \
+    {    767,13}, {   1535,12}, {   3071,13}, {   1663,14}, \
+    {    895,13}, {   1791,12}, {   3583,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2047,12}, {   4095,13}, \
+    {   2175,14}, {   1151,13}, {   2303,12}, {   4607,13}, \
+    {   2431,14}, {   1279,13}, {   2559,12}, {   5119,14}, \
+    {   1407,13}, {   2815,12}, {   5631,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 224
+#define MUL_FFT_THRESHOLD                 3520
+
+#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    340, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     63, 9}, {    127,10}, {     71, 9}, \
+    {    143,10}, {     79,11}, {     47,10}, {     95, 9}, \
+    {    191,10}, {    103,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    135, 7}, {   1087, 9}, \
+    {    287,11}, {     79, 9}, {    319, 8}, {    639,10}, \
+    {    167,11}, {     95,10}, {    191, 9}, {    383, 8}, \
+    {    767,11}, {    111,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511, 8}, {   1023,10}, {    271, 9}, \
+    {    543, 8}, {   1087,11}, {    143, 9}, {    575, 8}, \
+    {   1151,10}, {    303, 9}, {    639, 8}, {   1279,10}, \
+    {    335, 9}, {    671,10}, {    351, 9}, {    703,12}, \
+    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
+    {    207,10}, {    415, 9}, {    831,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511, 9}, {   1023,11}, \
+    {    271,10}, {    543, 9}, {   1087,10}, {    575, 9}, \
+    {   1151,11}, {    303,10}, {    607, 9}, {   1215,12}, \
+    {    159,11}, {    319,10}, {    639, 9}, {   1279,10}, \
+    {    671, 9}, {   1343,11}, {    351,10}, {    703, 9}, \
+    {   1407,12}, {    191,11}, {    383,10}, {    767, 9}, \
+    {   1535,11}, {    415,10}, {    831, 9}, {   1663,12}, \
+    {    223,11}, {    447,10}, {    959,13}, {    127,12}, \
+    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
+    {   1087,11}, {    575,10}, {   1215,12}, {    319,11}, \
+    {    639,10}, {   1279,11}, {    671,10}, {   1343,12}, \
+    {    351,11}, {    703,10}, {   1407,13}, {    191,12}, \
+    {    383,11}, {    767,10}, {   1535,12}, {    415,11}, \
+    {    831,10}, {   1663,12}, {    447,11}, {    895,10}, \
+    {   1791,12}, {    479,11}, {    959,14}, {    127,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
+    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
+    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    831,11}, {   1663,13}, \
+    {    447,12}, {    895,11}, {   1791,12}, {    959,13}, \
+    {    511,12}, {   1023,11}, {   2047,12}, {   1087,13}, \
+    {    575,12}, {   1215,13}, {    639,12}, {   1343,13}, \
+    {    703,12}, {   1407,11}, {   2815,13}, {    767,12}, \
+    {   1535,13}, {    831,12}, {   1663,13}, {    895,12}, \
+    {   1791,13}, {    959,14}, {    511,13}, {   1023,12}, \
+    {   2047,13}, {   1087,12}, {   2175,13}, {   1215,14}, \
+    {    639,13}, {   1279,12}, {   2559,13}, {   1407,12}, \
+    {   2815,14}, {    767,13}, {   1535,12}, {   3071,13}, \
+    {   1663,14}, {    895,13}, {   1791,12}, {   3583,13}, \
+    {   1919,15}, {    511,14}, {   1023,13}, {   2047,12}, \
+    {   4095,13}, {   2175,14}, {   1151,13}, {   2303,12}, \
+    {   4607,14}, {   1279,13}, {   2559,14}, {   1407,13}, \
+    {   2815,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 230
+#define SQR_FFT_THRESHOLD                 2496
+
+#define MULLO_BASECASE_THRESHOLD            13
+#define MULLO_DC_THRESHOLD                  38
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 56
+#define DC_DIVAPPR_Q_THRESHOLD             173
+#define DC_BDIV_QR_THRESHOLD                55
+#define DC_BDIV_Q_THRESHOLD                 96
+
+#define INV_MULMOD_BNM1_THRESHOLD           54
+#define INV_NEWTON_THRESHOLD               202
+#define INV_APPR_THRESHOLD                 166
+
+#define BINV_NEWTON_THRESHOLD              246
+#define REDC_1_TO_REDC_2_THRESHOLD           7
+#define REDC_2_TO_REDC_N_THRESHOLD          85
+
+#define MU_DIV_QR_THRESHOLD               1499
+#define MU_DIVAPPR_Q_THRESHOLD            1652
+#define MUPI_DIV_QR_THRESHOLD               83
+#define MU_BDIV_QR_THRESHOLD              1210
+#define MU_BDIV_Q_THRESHOLD               1499
+
+#define POWM_SEC_TABLE  1,28,129,642,2387
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     127
+#define HGCD_APPR_THRESHOLD                214
+#define HGCD_REDUCE_THRESHOLD             2479
+#define GCD_DC_THRESHOLD                   487
+#define GCDEXT_DC_THRESHOLD                505
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD               802
+#define SET_STR_PRECOMPUTE_THRESHOLD      2042
+
+#define FAC_DSC_THRESHOLD                 1737
+#define FAC_ODD_THRESHOLD                   44




More information about the gmp-devel mailing list