[Gmp-commit] /home/hgfiles/gmp: 9 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu May 6 17:27:28 CEST 2010


details:   /home/hgfiles/gmp/rev/7ac751797c58
changeset: 13602:7ac751797c58
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed May 05 16:26:30 2010 +0200
description:
Retune.

details:   /home/hgfiles/gmp/rev/982db8b95dd4
changeset: 13603:982db8b95dd4
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed May 05 22:46:11 2010 +0200
description:
New mpn_mod_1_1p files.

details:   /home/hgfiles/gmp/rev/750344c15eb6
changeset: 13604:750344c15eb6
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed May 05 22:48:22 2010 +0200
description:
Undo 2009-03-01 change.

details:   /home/hgfiles/gmp/rev/027bd43953e3
changeset: 13605:027bd43953e3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed May 05 22:50:08 2010 +0200
description:
Micro-optimise.

details:   /home/hgfiles/gmp/rev/a9e03172ff72
changeset: 13606:a9e03172ff72
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed May 05 22:53:05 2010 +0200
description:
Optimise cps function.

details:   /home/hgfiles/gmp/rev/2d398d05e8c8
changeset: 13607:2d398d05e8c8
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu May 06 11:02:26 2010 +0200
description:
Fix typo.

details:   /home/hgfiles/gmp/rev/255744aa1378
changeset: 13608:255744aa1378
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu May 06 17:26:00 2010 +0200
description:
Complete rewrite.

details:   /home/hgfiles/gmp/rev/5470d1a8b83d
changeset: 13609:5470d1a8b83d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu May 06 17:26:19 2010 +0200
description:
Retune.

details:   /home/hgfiles/gmp/rev/96bb4ff88d86
changeset: 13610:96bb4ff88d86
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu May 06 17:27:16 2010 +0200
description:
Don't call tune_divrem_2.

diffstat:

 ChangeLog                            |   19 +++
 longlong.h                           |    2 +-
 mpn/alpha/ev5/gmp-mparam.h           |   10 +-
 mpn/arm/gmp-mparam.h                 |  150 ++++++++++--------------
 mpn/generic/divrem_2.c               |  144 ++++++-----------------
 mpn/generic/mod_1_1.c                |    7 +-
 mpn/ia64/gmp-mparam.h                |    2 +-
 mpn/powerpc32/gmp-mparam.h           |    8 +-
 mpn/powerpc64/mode64/mod_1_1.asm     |  150 +++++++++++++++++++++++++
 mpn/powerpc64/mode64/p3/gmp-mparam.h |    6 +-
 mpn/powerpc64/mode64/p5/gmp-mparam.h |   72 ++++++------
 mpn/powerpc64/mode64/p6/gmp-mparam.h |  179 +++++++++++++++--------------
 mpn/x86/pentium4/sse2/gmp-mparam.h   |  208 ++++++++++++++--------------------
 mpn/x86/pentium4/sse2/mod_1_1.asm    |  172 ++++++++++++++++++++++++++++
 mpn/x86_64/core2/gmp-mparam.h        |   26 ++--
 mpn/x86_64/corei/gmp-mparam.h        |    6 +-
 mpn/x86_64/gmp-mparam.h              |    2 +-
 mpn/x86_64/mod_1_1.asm               |   94 +++++++-------
 mpn/x86_64/nano/gmp-mparam.h         |   12 +-
 tune/tuneup.c                        |    4 +-
 20 files changed, 746 insertions(+), 527 deletions(-)

diffs (truncated from 1708 to 300 lines):

diff -r a3175b030a40 -r 96bb4ff88d86 ChangeLog
--- a/ChangeLog	Wed May 05 16:15:33 2010 +0200
+++ b/ChangeLog	Thu May 06 17:27:16 2010 +0200
@@ -1,5 +1,24 @@
+2010-05-06  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/tuneup.c (all): Don't call tune_divrem_2.
+
+	* mpn/generic/divrem_2.c: Complete rewrite.
+
+	* tune/tuneup.c (tune_mod_1): Fix typo.
+
 2010-05-05  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Use macro register names.
+	(mpn_mod_1_1p_cps): Rewrite.
+
+	* mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Micro-optimise.
+
+	* longlong.h: Undo 2009-03-01 change for powerpc64, it gives poor code.
+
+	* mpn/x86/pentium4/sse2/mod_1_1.asm: New file.
+
+	* mpn/powerpc64/mode64/mod_1_1.asm: New file.
+
 	* tune/tuneup.c (tune_mod_1): Use more typical divisor, for the benefit
 	of machines with early-out multipliers.
 
diff -r a3175b030a40 -r 96bb4ff88d86 longlong.h
--- a/longlong.h	Wed May 05 16:15:33 2010 +0200
+++ b/longlong.h	Thu May 06 17:27:16 2010 +0200
@@ -1271,7 +1271,7 @@
 #define count_leading_zeros(count, x) \
   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 #define COUNT_LEADING_ZEROS_0 64
-#if __GMP_GNUC_PREREQ (4,4)
+#if 0 && __GMP_GNUC_PREREQ (4,4) /* Disable, this results in libcalls! */
 #define umul_ppmm(w1, w0, u, v) \
   do {									\
     typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
diff -r a3175b030a40 -r 96bb4ff88d86 mpn/alpha/ev5/gmp-mparam.h
--- a/mpn/alpha/ev5/gmp-mparam.h	Wed May 05 16:15:33 2010 +0200
+++ b/mpn/alpha/ev5/gmp-mparam.h	Thu May 06 17:27:16 2010 +0200
@@ -29,13 +29,13 @@
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
 #define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
 #define USE_PREINV_DIVREM_1                  1  /* preinv always */
 #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define BMOD_1_TO_MOD_1_THRESHOLD           74
 
 #define MUL_TOOM22_THRESHOLD                16
 #define MUL_TOOM33_THRESHOLD                53
@@ -56,7 +56,7 @@
 #define SQR_TOOM8_THRESHOLD                260
 
 #define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               13
 
 #define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
diff -r a3175b030a40 -r 96bb4ff88d86 mpn/arm/gmp-mparam.h
--- a/mpn/arm/gmp-mparam.h	Wed May 05 16:15:33 2010 +0200
+++ b/mpn/arm/gmp-mparam.h	Thu May 06 17:27:16 2010 +0200
@@ -21,122 +21,96 @@
 #define GMP_LIMB_BITS 32
 #define BYTES_PER_MP_LIMB 4
 
-/* 593MHz ARM (gcc50.fsffrance.org) */
+/* 1193MHz ARM (gcc55.fsffrance.org) */
 
 #define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
 #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         17
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         56
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
 #define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     27
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     71
 #define USE_PREINV_DIVREM_1                  1  /* preinv always */
 #define DIVREM_2_THRESHOLD                   0  /* preinv always */
 #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           44
+#define BMOD_1_TO_MOD_1_THRESHOLD           41
 
-#define MUL_TOOM22_THRESHOLD                34
-#define MUL_TOOM33_THRESHOLD               121
-#define MUL_TOOM44_THRESHOLD               191
-#define MUL_TOOM6H_THRESHOLD               366
-#define MUL_TOOM8H_THRESHOLD               547
+#define MUL_TOOM22_THRESHOLD                36
+#define MUL_TOOM33_THRESHOLD               125
+#define MUL_TOOM44_THRESHOLD               193
+#define MUL_TOOM6H_THRESHOLD               303
+#define MUL_TOOM8H_THRESHOLD               418
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     191
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     117
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     137
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     125
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     176
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     129
 
-#define SQR_BASECASE_THRESHOLD              13
+#define SQR_BASECASE_THRESHOLD              12
 #define SQR_TOOM2_THRESHOLD                 78
-#define SQR_TOOM3_THRESHOLD                141
+#define SQR_TOOM3_THRESHOLD                137
 #define SQR_TOOM4_THRESHOLD                212
-#define SQR_TOOM6_THRESHOLD                330
+#define SQR_TOOM6_THRESHOLD                306
 #define SQR_TOOM8_THRESHOLD                422
 
-#define MULMOD_BNM1_THRESHOLD               21
-#define SQRMOD_BNM1_THRESHOLD               25
+#define MULMOD_BNM1_THRESHOLD               20
+#define SQRMOD_BNM1_THRESHOLD               26
 
-#define MUL_FFT_MODF_THRESHOLD             404  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             436  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    404, 5}, {     21, 6}, {     11, 5}, {     25, 6}, \
-    {     13, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
+  { {    436, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
     {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     21, 6}, {     43, 7}, {     29, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 7}, {     55, 9}, \
-    {     15, 8}, {     31, 7}, {     63, 8}, {     43, 9}, \
-    {     23, 8}, {     55, 9}, {     31, 8}, {     71, 9}, \
-    {     39, 8}, {     83, 9}, {     47, 8}, {     99, 9}, \
-    {     55,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     79,10}, {     47, 9}, {    103,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287,10}, {    159, 9}, {    319,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
-    {     63,11}, {    127,10}, {    287,11}, {    159,10}, \
-    {    351,11}, {    191,10}, {    415,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    319,10}, \
-    {    639,11}, {    351,12}, {    191,11}, {    415,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 79
+    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {    256, 9}, {    512,10}, {   1024,11}, {   2048,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 28
 #define MUL_FFT_THRESHOLD                 5760
 
-#define SQR_FFT_MODF_THRESHOLD             400  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             404  /* k = 5 */
 #define SQR_FFT_TABLE3                                      \
-  { {    400, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
-    {     32, 7}, {     19, 6}, {     39, 7}, {     29, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 7}, {     55, 9}, \
-    {     15, 8}, {     39, 9}, {     23, 8}, {     55,10}, \
-    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159, 8}, {    319,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    271,10}, \
-    {    143, 9}, {    303,10}, {    159,11}, {     95,10}, \
-    {    191, 9}, {    383,10}, {    207,12}, {     63,11}, \
-    {    127,10}, {    303,11}, {    159,10}, {    367,11}, \
-    {    191,10}, {    415,11}, {    223,10}, {    447,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
-    {    607,11}, {    319,10}, {    639,11}, {    351,12}, \
-    {    191,11}, {    447,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 77
-#define SQR_FFT_THRESHOLD                 3136
+  { {    404, 5}, {     13, 4}, {     27, 5}, {     27, 6}, \
+    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
+    {     35, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {    512,10}, \
+    {   1024,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 26
+#define SQR_FFT_THRESHOLD                 3776
 
 #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 120
-#define MULLO_MUL_N_THRESHOLD            11317
+#define MULLO_DC_THRESHOLD                 137
+#define MULLO_MUL_N_THRESHOLD            11479
 
-#define DC_DIV_QR_THRESHOLD                134
-#define DC_DIVAPPR_Q_THRESHOLD             442
-#define DC_BDIV_QR_THRESHOLD               127
-#define DC_BDIV_Q_THRESHOLD                296
+#define DC_DIV_QR_THRESHOLD                150
+#define DC_DIVAPPR_Q_THRESHOLD             494
+#define DC_BDIV_QR_THRESHOLD               148
+#define DC_BDIV_Q_THRESHOLD                345
 
-#define INV_MULMOD_BNM1_THRESHOLD           66
-#define INV_NEWTON_THRESHOLD               458
-#define INV_APPR_THRESHOLD                 454
+#define INV_MULMOD_BNM1_THRESHOLD           70
+#define INV_NEWTON_THRESHOLD               474
+#define INV_APPR_THRESHOLD                 478
 
-#define BINV_NEWTON_THRESHOLD              494
-#define REDC_1_TO_REDC_N_THRESHOLD         116
+#define BINV_NEWTON_THRESHOLD              542
+#define REDC_1_TO_REDC_N_THRESHOLD         117
 
-#define MU_DIV_QR_THRESHOLD               2914
-#define MU_DIVAPPR_Q_THRESHOLD            3091
-#define MUPI_DIV_QR_THRESHOLD              221
-#define MU_BDIV_QR_THRESHOLD              2259
-#define MU_BDIV_Q_THRESHOLD               2747
+#define MU_DIV_QR_THRESHOLD               2089
+#define MU_DIVAPPR_Q_THRESHOLD            2172
+#define MUPI_DIV_QR_THRESHOLD              225
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               2089
 
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     109
-#define GCD_DC_THRESHOLD                   697
-#define GCDEXT_DC_THRESHOLD                535
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     197
+#define GCD_DC_THRESHOLD                   902
+#define GCDEXT_DC_THRESHOLD                650
 #define JACOBI_BASE_METHOD                   2
 
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        29
-#define SET_STR_DC_THRESHOLD               321
-#define SET_STR_PRECOMPUTE_THRESHOLD      1037
+#define GET_STR_DC_THRESHOLD                20
+#define GET_STR_PRECOMPUTE_THRESHOLD        39
+#define SET_STR_DC_THRESHOLD              1045
+#define SET_STR_PRECOMPUTE_THRESHOLD      2147
diff -r a3175b030a40 -r 96bb4ff88d86 mpn/generic/divrem_2.c
--- a/mpn/generic/divrem_2.c	Wed May 05 16:15:33 2010 +0200
+++ b/mpn/generic/divrem_2.c	Thu May 06 17:27:16 2010 +0200
@@ -30,43 +30,28 @@
 #include "longlong.h"
 
 
-/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
-   meaning the quotient size where that should happen, the quotient size
-   being how many udiv divisions will be done.
-
-   The default is to use preinv always, CPUs where this doesn't suit have
-   tuned thresholds.  Note in particular that preinv should certainly be
-   used if that's the only division available (USE_PREINV_ALWAYS).  */
-
-#ifndef DIVREM_2_THRESHOLD
-#define DIVREM_2_THRESHOLD  0
-#endif
-
-
-/* Divide num (NP/NN) by den (DP/2) and write the NN-2 least significant
-   quotient limbs at QP and the 2 long remainder at NP.  If qxn is non-zero,
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
+   quotient limbs at qp and the 2 long remainder at np.  If qxn is non-zero,
    generate that many fraction bits and append them after the other quotient
    limbs.  Return the most significant limb of the quotient, this is always 0
    or 1.
 
    Preconditions:
    1. The most significant bit of the divisor must be set.
-   2. QP must either not overlap with the input operands at all, or
-      QP + 2 >= NP must hold true.  (This means that it's possible to put
-      the quotient in the high part of NUM, right after the remainder in NUM.
-   3. NN >= 2, even if qxn is non-zero.  */
+   2. qp must either not overlap with the input operands at all, or
+      qp + 2 >= np must hold true.  (This means that it's possible to put
+      the quotient in the high part of {np,nn}, right above the remainder.
+   3. nn >= 2, even if qxn is non-zero.  */
 
 mp_limb_t
 mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
 	      mp_ptr np, mp_size_t nn,


More information about the gmp-commit mailing list