[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Oct 31 23:35:25 CET 2011
details: /var/hg/gmp/rev/ad3b5b0da77a
changeset: 14399:ad3b5b0da77a
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Oct 31 22:42:53 2011 +0100
description:
Rewrite sqr_diag_addlsh1 code.
details: /var/hg/gmp/rev/5f65248c61d8
changeset: 14400:5f65248c61d8
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Oct 31 22:47:56 2011 +0100
description:
Rewrite sqr_diag_addlsh1 code.
details: /var/hg/gmp/rev/5e1e3150a03b
changeset: 14401:5e1e3150a03b
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Oct 31 23:14:39 2011 +0100
description:
New file.
details: /var/hg/gmp/rev/563b90c01d27
changeset: 14402:563b90c01d27
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Oct 31 23:15:43 2011 +0100
description:
Move file into powerpc64/mode32.
details: /var/hg/gmp/rev/0d3214876bdf
changeset: 14403:0d3214876bdf
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Oct 31 23:16:23 2011 +0100
description:
*** empty log message ***
details: /var/hg/gmp/rev/0e4ad7d7ed57
changeset: 14404:0e4ad7d7ed57
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Oct 31 23:17:17 2011 +0100
description:
Add cycle counts.
diffstat:
.bootstrap | 4 +
ChangeLog | 10 +
dumbmp.c | 2 +
mpn/generic/mul_fft.c | 3 +
mpn/ia64/addmul_1.asm | 33 +--
mpn/ia64/gmp-mparam.h | 144 +++++++++++------
mpn/powerpc32/addlsh1_n.asm | 74 +--------
mpn/powerpc32/sublsh1_n.asm | 75 +--------
mpn/powerpc64/mode32/sqr_diagonal.asm | 107 +++++++++++++
mpn/powerpc64/mode64/sqr_diag_addlsh1.asm | 238 ++++++++++++++++++++++++++++++
mpn/powerpc64/sqr_diagonal.asm | 107 -------------
mpn/s390_32/esame/sqr_basecase.asm | 44 ++---
mpn/s390_64/bdiv_dbm1c.asm | 2 +-
mpn/s390_64/lshift.asm | 2 +-
mpn/s390_64/lshiftc.asm | 2 +-
mpn/s390_64/rshift.asm | 2 +-
mpn/s390_64/sqr_basecase.asm | 44 ++---
mpn/x86/fat/fat_entry.asm | 6 +-
mpn/x86_64/addmul_2.asm | 21 +-
mpn/x86_64/bdiv_dbm1c.asm | 10 +-
mpn/x86_64/coreisbr/gmp-mparam.h | 126 +++++++++++----
mpn/x86_64/divrem_2.asm | 93 +++++-----
mpn/x86_64/invert_limb.asm | 13 +-
mpn/x86_64/sqr_basecase.asm | 2 +
tune/tuneup.c | 68 +++++---
25 files changed, 732 insertions(+), 500 deletions(-)
diffs (truncated from 1794 to 300 lines):
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 .bootstrap
--- a/.bootstrap Sat Oct 29 23:54:39 2011 +0200
+++ b/.bootstrap Mon Oct 31 23:17:17 2011 +0100
@@ -10,6 +10,10 @@
# script.
aclocal && libtoolize && autoconf && autoheader && automake
+cp -L ltmain.sh foo
+rm ltmain.sh
+mv foo ltmain.sh
+
cat >doc/version.texi <<EOF
@set UPDATED 19 January 2038
@set UPDATED-MONTH January 2038
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 ChangeLog
--- a/ChangeLog Sat Oct 29 23:54:39 2011 +0200
+++ b/ChangeLog Mon Oct 31 23:17:17 2011 +0100
@@ -1,3 +1,13 @@
+2011-10-31 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/sqr_diagonal.asm: Move from here...
+ * mpn/powerpc64/mode32/sqr_diagonal.asm: ...to here.
+
+ * mpn/powerpc64/mode64/sqr_diag_addlsh1.asm: New file.
+
+ * mpn/s390_64/sqr_basecase.asm: Rewrite sqr_diag_addlsh1 code.
+ * mpn/s390_32/esame/sqr_basecase.asm: Likewise.
+
2011-10-29 Torbjorn Granlund <tege at gmplib.org>
* mpn/s390_64/lshift.asm: Complete rewrite.
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 dumbmp.c
--- a/dumbmp.c Sat Oct 29 23:54:39 2011 +0200
+++ b/dumbmp.c Mon Oct 31 23:17:17 2011 +0100
@@ -421,6 +421,8 @@
mp_limb_t *tp; int tn;
tn = an; an = bn; bn = tn;
tp = ap; ap = bp; bp = tp;
+ /* This needs sign change, not done so abort. */
+ abort ();
}
cy = 0;
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c Sat Oct 29 23:54:39 2011 +0200
+++ b/mpn/generic/mul_fft.c Mon Oct 31 23:17:17 2011 +0100
@@ -271,6 +271,9 @@
/* now subtract cc and rd from r[d..n] */
+/* FIXME: Use mpn_decr_u here, first zeroing rp[n].
+ Can we also combine cc and rd to one mpn_decr_u? */
+
r[n] = -mpn_sub_1 (r + d, r + d, n - d, cc);
r[n] -= mpn_sub_1 (r + d, r + d, n - d, rd);
if (r[n] & GMP_LIMB_HIGHBIT)
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 mpn/ia64/addmul_1.asm
--- a/mpn/ia64/addmul_1.asm Sat Oct 29 23:54:39 2011 +0200
+++ b/mpn/ia64/addmul_1.asm Mon Oct 31 23:17:17 2011 +0100
@@ -55,39 +55,28 @@
zxt4 n = n C I
;;
')
-{.mmi
- adds r15 = -1, n C M I
+.mmi; adds r15 = -1, n C M I
mov r20 = rp C M I
mov.i r2 = ar.lc C I0
-}
-{.mmi
- ldf8 f7 = [up], 8 C M
+.mmi; ldf8 f7 = [up], 8 C M
ldf8 f8 = [rp], 8 C M
and r14 = 3, n C M I
;;
-}
-{.mmi
- setf.sig f6 = vl C M2 M3
+.mmi; setf.sig f6 = vl C M2 M3
cmp.eq p10, p0 = 0, r14 C M I
shr.u r31 = r15, 2 C I0
-}
-{.mmi
- cmp.eq p11, p0 = 2, r14 C M I
+.mmi; cmp.eq p11, p0 = 2, r14 C M I
cmp.eq p12, p0 = 3, r14 C M I
nop.i 0 C I
;;
-}
-{.mii
- cmp.ne p6, p7 = r0, r0 C M I
+.mii; cmp.ne p6, p7 = r0, r0 C M I
mov.i ar.lc = r31 C I0
cmp.ne p8, p9 = r0, r0 C M I
-}
-{.bbb
- (p10) br.dptk .Lb00 C B
- (p11) br.dptk .Lb10 C B
- (p12) br.dptk .Lb11 C B
+.bbb;
+ (p10) br.dptk.few .Lb00 C B
+ (p11) br.dptk.few .Lb10 C B
+ (p12) br.dptk.few .Lb11 C B
;;
-}
.Lb01: br.cloop.dptk .grt1 C B
@@ -182,7 +171,7 @@
stf8 [r20] = f38, 8
getf.sig r27 = f39
getf.sig r8 = f43
- br .Lcj2
+ br.few .Lcj2
.grt2:
ldf8 f32 = [up], 8
@@ -569,6 +558,7 @@
.pred.rel "mutex", p8, p9
(p8) add r16 = r29, r26, 1 C
(p9) add r16 = r29, r26 C
+ nop 0
;;
.pred.rel "mutex", p8, p9
st8 [r20] = r16, 8 C
@@ -579,6 +569,7 @@
.pred.rel "mutex", p6, p7
(p6) add r14 = r30, r27, 1 C
(p7) add r14 = r30, r27 C
+ nop 0
;;
.pred.rel "mutex", p6, p7
st8 [r20] = r14 C
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 mpn/ia64/gmp-mparam.h
--- a/mpn/ia64/gmp-mparam.h Sat Oct 29 23:54:39 2011 +0200
+++ b/mpn/ia64/gmp-mparam.h Mon Oct 31 23:17:17 2011 +0100
@@ -1,6 +1,6 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010 Free Software
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010, 2011 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
@@ -21,70 +21,90 @@
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
-/* 1300MHz Itanium2 (babe.fsffrance.org) */
+/* 900MHz Itanium2 (titanic.gmplib.org) */
-
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
#define MOD_1_2_TO_MOD_1_4_THRESHOLD 21
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD 12
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define MUL_TOOM22_THRESHOLD 40
-#define MUL_TOOM33_THRESHOLD 122
-#define MUL_TOOM44_THRESHOLD 212
+#define MUL_TOOM33_THRESHOLD 129
+#define MUL_TOOM44_THRESHOLD 214
#define MUL_TOOM6H_THRESHOLD 318
#define MUL_TOOM8H_THRESHOLD 430
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 146
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 129
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 145
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 126
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 151
#define SQR_BASECASE_THRESHOLD 11
#define SQR_TOOM2_THRESHOLD 84
-#define SQR_TOOM3_THRESHOLD 125
+#define SQR_TOOM3_THRESHOLD 135
#define SQR_TOOM4_THRESHOLD 494
-#define SQR_TOOM6_THRESHOLD 0 /* never toom4 */
-#define SQR_TOOM8_THRESHOLD 0 /* never toom6 */
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 0 /* always */
#define MULMOD_BNM1_THRESHOLD 23
-#define SQRMOD_BNM1_THRESHOLD 25
+#define SQRMOD_BNM1_THRESHOLD 28
-#define MUL_FFT_MODF_THRESHOLD 444 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 444, 5}, { 27, 6}, { 14, 5}, { 29, 6}, \
- { 35, 7}, { 18, 6}, { 37, 7}, { 19, 6}, \
+ { { 476, 5}, { 27, 6}, { 14, 5}, { 29, 6}, \
+ { 33, 7}, { 17, 6}, { 37, 7}, { 19, 6}, \
{ 39, 7}, { 21, 6}, { 43, 7}, { 33, 8}, \
{ 17, 7}, { 37, 8}, { 19, 7}, { 39, 8}, \
- { 21, 7}, { 43, 8}, { 29, 9}, { 15, 8}, \
- { 37, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
- { 49, 9}, { 27, 8}, { 57, 9}, { 31, 8}, \
- { 63, 9}, { 35, 8}, { 71, 9}, { 43,10}, \
+ { 21, 7}, { 43, 8}, { 37, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
+ { 57, 9}, { 31, 8}, { 63, 9}, { 43,10}, \
{ 23, 9}, { 59,10}, { 31, 9}, { 71,10}, \
- { 39, 9}, { 87,10}, { 47, 9}, { 99,10}, \
+ { 39, 9}, { 83,10}, { 47, 9}, { 99,10}, \
{ 55,11}, { 31,10}, { 87,11}, { 47,10}, \
{ 111,12}, { 31,11}, { 63,10}, { 143,11}, \
{ 79,10}, { 167,11}, { 95,10}, { 191,11}, \
{ 111,12}, { 63,11}, { 143,10}, { 287, 9}, \
{ 575,10}, { 303,11}, { 159,10}, { 319,12}, \
{ 95,11}, { 191,10}, { 399,11}, { 207,10}, \
- { 431,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 76
+ { 431,13}, { 63,12}, { 127,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,11}, { 303,12}, \
+ { 159,11}, { 335,10}, { 671,11}, { 367,12}, \
+ { 191,11}, { 399,10}, { 799,11}, { 431,12}, \
+ { 223,11}, { 447,13}, { 127,12}, { 255,11}, \
+ { 543,12}, { 287,11}, { 607,12}, { 319,11}, \
+ { 671,12}, { 351,11}, { 703,13}, { 191,12}, \
+ { 415,11}, { 863,12}, { 447,14}, { 127,13}, \
+ { 255,12}, { 607,13}, { 319,12}, { 735,13}, \
+ { 383,12}, { 799,11}, { 1599,12}, { 863,13}, \
+ { 447,12}, { 927,11}, { 1855,14}, { 255,13}, \
+ { 511,12}, { 1055,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1279,13}, { 703,14}, { 383,13}, \
+ { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \
+ { 895,12}, { 1791,15}, { 255,14}, { 511,13}, \
+ { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \
+ { 1343,12}, { 2687,13}, { 1471,14}, { 767,13}, \
+ { 1599,12}, { 3199,13}, { 1663,14}, { 895,13}, \
+ { 1855,15}, { 511,14}, { 1023,13}, { 2175,14}, \
+ { 1151,13}, { 2431,14}, { 1279,13}, { 2687,14}, \
+ { 1407,15}, { 767,14}, { 1535,13}, { 3199,14}, \
+ { 1663,13}, { 3455,14}, { 1791,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 155
#define MUL_FFT_THRESHOLD 5760
-#define SQR_FFT_MODF_THRESHOLD 440 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 436 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 440, 5}, { 14, 4}, { 29, 5}, { 29, 6}, \
- { 15, 5}, { 31, 6}, { 35, 7}, { 18, 6}, \
- { 37, 7}, { 33, 8}, { 17, 7}, { 37, 8}, \
+ { { 436, 5}, { 14, 4}, { 29, 5}, { 31, 6}, \
+ { 35, 7}, { 18, 6}, { 37, 7}, { 37, 8}, \
{ 19, 7}, { 40, 8}, { 37, 9}, { 19, 8}, \
{ 43, 9}, { 23, 8}, { 49, 9}, { 27, 8}, \
{ 57, 9}, { 43,10}, { 23, 9}, { 55,10}, \
@@ -93,45 +113,67 @@
{ 87,11}, { 47,10}, { 111,12}, { 31,11}, \
{ 63,10}, { 135,11}, { 79,10}, { 167,11}, \
{ 95,10}, { 191,11}, { 111,12}, { 63,11}, \
- { 127,10}, { 255,11}, { 143,10}, { 303,11}, \
- { 159,10}, { 319,12}, { 95,11}, { 191,10}, \
- { 399,11}, { 207,10}, { 431,13}, { 8192,14}, \
- { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
- { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
- {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 66
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,10}, { 303,11}, { 159,10}, { 319,12}, \
+ { 95,11}, { 191,10}, { 399,11}, { 207,10}, \
+ { 431,13}, { 63,12}, { 127,11}, { 271,10}, \
+ { 543,11}, { 303,12}, { 159,11}, { 335,10}, \
+ { 671,11}, { 367,10}, { 735,12}, { 191,11}, \
+ { 399,10}, { 799,11}, { 431,12}, { 223,11}, \
+ { 463,13}, { 127,12}, { 255,11}, { 543,12}, \
+ { 287,11}, { 607,12}, { 319,11}, { 671,12}, \
+ { 351,11}, { 735,13}, { 191,12}, { 383,11}, \
+ { 799,12}, { 415,11}, { 863,12}, { 447,11}, \
+ { 895,14}, { 127,13}, { 255,12}, { 543,11}, \
+ { 1087,12}, { 607,13}, { 319,12}, { 735,13}, \
+ { 383,12}, { 863,13}, { 447,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
+ { 1183,13}, { 639,12}, { 1279,13}, { 703,12}, \
+ { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \
+ { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \
+ { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1343,12}, \
+ { 2687,13}, { 1471,14}, { 767,13}, { 1663,14}, \
+ { 895,13}, { 1919,15}, { 511,14}, { 1023,13}, \
+ { 2175,14}, { 1151,13}, { 2431,14}, { 1279,13}, \
+ { 2687,14}, { 1407,15}, { 767,14}, { 1535,13}, \
+ { 3199,14}, { 1663,13}, { 3455,14}, { 1791,13}, \
More information about the gmp-commit
mailing list