[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Jan 24 14:53:51 UTC 2014


details:   /var/hg/gmp/rev/de7e929dba06
changeset: 16250:de7e929dba06
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Jan 24 14:43:52 2014 +0100
description:
Align loop, add z10 cycles.

details:   /var/hg/gmp/rev/f65156b5764b
changeset: 16251:f65156b5764b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Jan 24 14:55:40 2014 +0100
description:
Add z10 cycle numbers.

details:   /var/hg/gmp/rev/c6f9a3ad3e7f
changeset: 16252:c6f9a3ad3e7f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Jan 24 14:56:57 2014 +0100
description:
Amend last AMD change.

details:   /var/hg/gmp/rev/c608a3f48a7c
changeset: 16253:c608a3f48a7c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Jan 24 15:52:58 2014 +0100
description:
(fake_cpuid_table): Use proper steamroller and excavator values.

details:   /var/hg/gmp/rev/ad7a0224eacb
changeset: 16254:ad7a0224eacb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Jan 24 15:53:31 2014 +0100
description:
Retune.

details:   /var/hg/gmp/rev/486d03cb7507
changeset: 16255:486d03cb7507
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Jan 24 15:53:41 2014 +0100
description:
ChangeLog

diffstat:

 ChangeLog                    |   12 ++
 config.guess                 |    8 +-
 mpn/alpha/ev5/gmp-mparam.h   |  192 ++++++++++++++++++------------------------
 mpn/s390_64/addmul_1.asm     |    2 +-
 mpn/s390_64/aorrlsh1_n.asm   |    2 +-
 mpn/s390_64/aors_n.asm       |    2 +-
 mpn/s390_64/bdiv_dbm1c.asm   |    2 +-
 mpn/s390_64/copyd.asm        |    2 +-
 mpn/s390_64/copyi.asm        |    2 +-
 mpn/s390_64/invert_limb.asm  |    2 +-
 mpn/s390_64/logops_n.asm     |    2 +-
 mpn/s390_64/lshift.asm       |    6 +-
 mpn/s390_64/lshiftc.asm      |    6 +-
 mpn/s390_64/mod_34lsub1.asm  |    4 +-
 mpn/s390_64/mul_1.asm        |    2 +-
 mpn/s390_64/mul_basecase.asm |    2 +-
 mpn/s390_64/rshift.asm       |    6 +-
 mpn/s390_64/sqr_basecase.asm |    2 +-
 mpn/s390_64/sublsh1_n.asm    |    2 +-
 mpn/s390_64/submul_1.asm     |    2 +-
 mpn/x86_64/fat/fat.c         |    4 +-
 21 files changed, 125 insertions(+), 139 deletions(-)

diffs (truncated from 558 to 300 lines):

diff -r 6b3e031bcd37 -r 486d03cb7507 ChangeLog
--- a/ChangeLog	Thu Jan 23 21:17:43 2014 +0100
+++ b/ChangeLog	Fri Jan 24 15:53:41 2014 +0100
@@ -1,3 +1,15 @@
+2014-01-24  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/fat/fat.c (fake_cpuid_table): Use proper steamroller and
+	excavator values.
+
+	* config.guess: Amend last AMD change.
+
+	* mpn/s390_64/lshift.asm: Align loop.
+	* mpn/s390_64/rshift.asm: Likewise.
+	* mpn/s390_64/lshiftc.asm: Likewise.
+	* mpn/s390_64: Add z10 cycle numbers.
+
 2014-01-23 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* printf/repl-vsnprintf.c: Feed case 'z' in switch (type) with case 'z'
diff -r 6b3e031bcd37 -r 486d03cb7507 config.guess
--- a/config.guess	Thu Jan 23 21:17:43 2014 +0100
+++ b/config.guess	Fri Jan 24 15:53:41 2014 +0100
@@ -844,13 +844,13 @@
 	  break;
 	case 21:		/* Bulldozer */
 	  cpu_64bit = 1;
-	  if (model == 1)
+	  if (model <= 1)
 	    modelstr = "bulldozer";
-	  else if (model == 2)
+	  else if (model < 0x20)	/* really 2, [0x10-0x20) */
 	    modelstr = "piledriver";
-	  else if (model == 3)
+	  else if (model < 0x40)	/* really [0x30-0x40) */
 	    modelstr = "steamroller";
-	  else /* if (model < 0x9) */
+	  else				/* really [0x60-0x70) */
 	    modelstr = "excavator";
 	  break;
 	case 22:		/* Jaguar, an improved bobcat */
diff -r 6b3e031bcd37 -r 486d03cb7507 mpn/alpha/ev5/gmp-mparam.h
--- a/mpn/alpha/ev5/gmp-mparam.h	Thu Jan 23 21:17:43 2014 +0100
+++ b/mpn/alpha/ev5/gmp-mparam.h	Fri Jan 24 15:53:41 2014 +0100
@@ -29,156 +29,130 @@
 #define MOD_1_1P_METHOD                      2
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        78
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        20
 #define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
 #define USE_PREINV_DIVREM_1                  1  /* preinv always */
-#define DIV_QR_2_PI2_THRESHOLD              25
+#define DIV_QR_1N_PI1_METHOD                 1
+#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
 #define DIVEXACT_1_THRESHOLD                 0  /* always */
 #define BMOD_1_TO_MOD_1_THRESHOLD           80
 
 #define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                66
+#define MUL_TOOM33_THRESHOLD                45
 #define MUL_TOOM44_THRESHOLD               118
 #define MUL_TOOM6H_THRESHOLD               157
 #define MUL_TOOM8H_THRESHOLD               236
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      84
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      57
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      77
 #define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
 #define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
 #define MUL_TOOM43_TO_TOOM54_THRESHOLD      66
 
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 26
-#define SQR_TOOM3_THRESHOLD                 77
-#define SQR_TOOM4_THRESHOLD                130
-#define SQR_TOOM6_THRESHOLD                173
-#define SQR_TOOM8_THRESHOLD                260
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 22
+#define SQR_TOOM3_THRESHOLD                 74
+#define SQR_TOOM4_THRESHOLD                171
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                357
 
-#define MULMID_TOOM42_THRESHOLD             20
+#define MULMID_TOOM42_THRESHOLD             18
 
-#define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               13
+#define MULMOD_BNM1_THRESHOLD               10
+#define SQRMOD_BNM1_THRESHOLD               12
 
-#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             284  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    244, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
+  { {    284, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
     {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
-    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
-    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 7}, {     27, 9}, {      7, 8}, {     21, 9}, \
-    {     11, 8}, {     25,10}, {      7, 9}, {     15, 8}, \
-    {     33, 9}, {     23,10}, {     15, 9}, {     39,10}, \
+    {     15, 7}, {      8, 6}, {     17, 7}, {     13, 8}, \
+    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23, 8}, {     47,10}, {     15, 9}, {     39,10}, \
     {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
     {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     63, 9}, \
-    {    127,10}, {     71, 9}, {    143, 8}, {    287,10}, \
-    {     79,11}, {     47,10}, {     95, 9}, {    191,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
-    {    319, 8}, {    639,10}, {    175,11}, {     95,10}, \
-    {    191, 9}, {    383,10}, {    207, 9}, {    415,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
-    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319,11}, {    175,10}, {    351,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,11}, \
-    {    223,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    287,10}, {    575,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    415,12}, {    223,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
-    {    575,12}, {    319,11}, {    639,12}, {    351,11}, \
-    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    575,13}, {    319,12}, {    703,13}, \
-    {    383,12}, {    831,13}, {    447,12}, {    895,14}, \
-    {    255,13}, {    511,12}, {   1023,13}, {    575,12}, \
-    {   1151,13}, {    703,12}, {   1407,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 141
+    {     95,10}, {     55,11}, {     31,10}, {     63, 8}, \
+    {    255, 7}, {    511,10}, {     71, 9}, {    143, 8}, \
+    {    287, 7}, {    575, 9}, {    159, 8}, {    319,11}, \
+    {     47,12}, {     31,11}, {     63, 9}, {    255, 8}, \
+    {    511,10}, {    143, 9}, {    287,11}, {     79,10}, \
+    {    159, 9}, {    319, 8}, {    639,10}, {    175, 9}, \
+    {    351, 8}, {    703,10}, {    191, 9}, {    383,10}, \
+    {    207, 9}, {    415,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 82
 #define MUL_FFT_THRESHOLD                 3008
 
-#define SQR_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             236  /* k = 5 */
 #define SQR_FFT_TABLE3                                      \
-  { {    220, 5}, {     13, 6}, {     15, 7}, {      8, 6}, \
-    {     17, 7}, {      9, 6}, {     19, 7}, {     13, 8}, \
-    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
-    {     11, 7}, {     23, 8}, {     13, 7}, {     30, 8}, \
-    {     19, 4}, {    319, 9}, {     11, 8}, {     25,10}, \
-    {      7, 9}, {     15, 8}, {     31, 7}, {     64, 9}, \
-    {     19, 8}, {     39, 7}, {     79, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     47,11}, \
-    {     31,10}, {     63, 9}, {    127,10}, {     71, 9}, \
-    {    143, 8}, {    287,10}, {     79,11}, {     47,10}, \
-    {     95, 9}, {    191,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287,11}, \
-    {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
-    {    351,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255,11}, {    143,10}, {    287,11}, {    159,10}, \
-    {    319,11}, {    175,10}, {    351,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,11}, \
-    {    223,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    287,12}, {    159,11}, {    319,10}, \
-    {    639,11}, {    351,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,12}, {    223,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
-    {    575,12}, {    319,11}, {    639,12}, {    351,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
-    {    575,13}, {    319,12}, {    703,13}, {    383,12}, \
-    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
-    {    703,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+  { {    236, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23, 8}, {     47, 9}, {     27,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     71, 9}, {    143, 8}, {    287,10}, \
+    {     79,11}, {     47,10}, {     95,12}, {     31,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
+    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
+    {    319, 8}, {    639,10}, {    175, 9}, {    351,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
+    {    415,11}, {    111,10}, {    223,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
     { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
     {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 135
-#define SQR_FFT_THRESHOLD                 1984
+#define SQR_FFT_TABLE3_SIZE 79
+#define SQR_FFT_THRESHOLD                 2240
 
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  50
-#define MULLO_MUL_N_THRESHOLD             5397
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  45
+#define MULLO_MUL_N_THRESHOLD             5558
 
-#define DC_DIV_QR_THRESHOLD                 52
-#define DC_DIVAPPR_Q_THRESHOLD             172
-#define DC_BDIV_QR_THRESHOLD                51
-#define DC_BDIV_Q_THRESHOLD                112
+#define DC_DIV_QR_THRESHOLD                 51
+#define DC_DIVAPPR_Q_THRESHOLD             168
+#define DC_BDIV_QR_THRESHOLD                48
+#define DC_BDIV_Q_THRESHOLD                108
 
-#define INV_MULMOD_BNM1_THRESHOLD           38
+#define INV_MULMOD_BNM1_THRESHOLD           26
 #define INV_NEWTON_THRESHOLD               179
 #define INV_APPR_THRESHOLD                 180
 
-#define BINV_NEWTON_THRESHOLD              197
+#define BINV_NEWTON_THRESHOLD              182
 #define REDC_1_TO_REDC_N_THRESHOLD          51
 
 #define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD             998
+#define MU_DIVAPPR_Q_THRESHOLD            1078
 #define MUPI_DIV_QR_THRESHOLD               90
-#define MU_BDIV_QR_THRESHOLD               807
-#define MU_BDIV_Q_THRESHOLD               1078
+#define MU_BDIV_QR_THRESHOLD               855
+#define MU_BDIV_Q_THRESHOLD                942
 
-#define POWM_SEC_TABLE  2,17,188,393
+#define POWM_SEC_TABLE  2,17,91,453,1168
 
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                     105
-#define HGCD_APPR_THRESHOLD                105
-#define HGCD_REDUCE_THRESHOLD             1494
-#define GCD_DC_THRESHOLD                   285
-#define GCDEXT_DC_THRESHOLD                206
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      94
+#define HGCD_APPR_THRESHOLD                102
+#define HGCD_REDUCE_THRESHOLD             1615
+#define GCD_DC_THRESHOLD                   309
+#define GCDEXT_DC_THRESHOLD                201
 #define JACOBI_BASE_METHOD                   3
 
 #define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        29
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
 #define SET_STR_DC_THRESHOLD               426
-#define SET_STR_PRECOMPUTE_THRESHOLD      1535
+#define SET_STR_PRECOMPUTE_THRESHOLD      1505
 
-#define FAC_DSC_THRESHOLD                 1502
+#define FAC_DSC_THRESHOLD                 1404
 #define FAC_ODD_THRESHOLD                    0  /* always */
diff -r 6b3e031bcd37 -r 486d03cb7507 mpn/s390_64/addmul_1.asm
--- a/mpn/s390_64/addmul_1.asm	Thu Jan 23 21:17:43 2014 +0100
+++ b/mpn/s390_64/addmul_1.asm	Fri Jan 24 15:53:41 2014 +0100
@@ -23,7 +23,7 @@
 C z900		34
 C z990		23
 C z9		 ?
-C z10		 ?
+C z10		28
 C z196		 ?
 
 C INPUT PARAMETERS
diff -r 6b3e031bcd37 -r 486d03cb7507 mpn/s390_64/aorrlsh1_n.asm
--- a/mpn/s390_64/aorrlsh1_n.asm	Thu Jan 23 21:17:43 2014 +0100
+++ b/mpn/s390_64/aorrlsh1_n.asm	Fri Jan 24 15:53:41 2014 +0100
@@ -23,7 +23,7 @@
 C z900		 9


More information about the gmp-commit mailing list