[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Oct 31 23:35:25 CET 2011


details:   /var/hg/gmp/rev/ad3b5b0da77a
changeset: 14399:ad3b5b0da77a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Oct 31 22:42:53 2011 +0100
description:
Rewrite sqr_diag_addlsh1 code.

details:   /var/hg/gmp/rev/5f65248c61d8
changeset: 14400:5f65248c61d8
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Oct 31 22:47:56 2011 +0100
description:
Rewrite sqr_diag_addlsh1 code.

details:   /var/hg/gmp/rev/5e1e3150a03b
changeset: 14401:5e1e3150a03b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Oct 31 23:14:39 2011 +0100
description:
New file.

details:   /var/hg/gmp/rev/563b90c01d27
changeset: 14402:563b90c01d27
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Oct 31 23:15:43 2011 +0100
description:
Move file into powerpc64/mode32.

details:   /var/hg/gmp/rev/0d3214876bdf
changeset: 14403:0d3214876bdf
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Oct 31 23:16:23 2011 +0100
description:
*** empty log message ***

details:   /var/hg/gmp/rev/0e4ad7d7ed57
changeset: 14404:0e4ad7d7ed57
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Oct 31 23:17:17 2011 +0100
description:
Add cycle counts.

diffstat:

 .bootstrap                                |    4 +
 ChangeLog                                 |   10 +
 dumbmp.c                                  |    2 +
 mpn/generic/mul_fft.c                     |    3 +
 mpn/ia64/addmul_1.asm                     |   33 +--
 mpn/ia64/gmp-mparam.h                     |  144 +++++++++++------
 mpn/powerpc32/addlsh1_n.asm               |   74 +--------
 mpn/powerpc32/sublsh1_n.asm               |   75 +--------
 mpn/powerpc64/mode32/sqr_diagonal.asm     |  107 +++++++++++++
 mpn/powerpc64/mode64/sqr_diag_addlsh1.asm |  238 ++++++++++++++++++++++++++++++
 mpn/powerpc64/sqr_diagonal.asm            |  107 -------------
 mpn/s390_32/esame/sqr_basecase.asm        |   44 ++---
 mpn/s390_64/bdiv_dbm1c.asm                |    2 +-
 mpn/s390_64/lshift.asm                    |    2 +-
 mpn/s390_64/lshiftc.asm                   |    2 +-
 mpn/s390_64/rshift.asm                    |    2 +-
 mpn/s390_64/sqr_basecase.asm              |   44 ++---
 mpn/x86/fat/fat_entry.asm                 |    6 +-
 mpn/x86_64/addmul_2.asm                   |   21 +-
 mpn/x86_64/bdiv_dbm1c.asm                 |   10 +-
 mpn/x86_64/coreisbr/gmp-mparam.h          |  126 +++++++++++----
 mpn/x86_64/divrem_2.asm                   |   93 +++++-----
 mpn/x86_64/invert_limb.asm                |   13 +-
 mpn/x86_64/sqr_basecase.asm               |    2 +
 tune/tuneup.c                             |   68 +++++---
 25 files changed, 732 insertions(+), 500 deletions(-)

diffs (truncated from 1794 to 300 lines):

diff -r 09fabd6fab76 -r 0e4ad7d7ed57 .bootstrap
--- a/.bootstrap	Sat Oct 29 23:54:39 2011 +0200
+++ b/.bootstrap	Mon Oct 31 23:17:17 2011 +0100
@@ -10,6 +10,10 @@
 # script.
 aclocal && libtoolize && autoconf && autoheader && automake
 
+cp -L ltmain.sh foo
+rm ltmain.sh
+mv foo ltmain.sh
+
 cat >doc/version.texi <<EOF
 @set UPDATED 19 January 2038
 @set UPDATED-MONTH January 2038
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 ChangeLog
--- a/ChangeLog	Sat Oct 29 23:54:39 2011 +0200
+++ b/ChangeLog	Mon Oct 31 23:17:17 2011 +0100
@@ -1,3 +1,13 @@
+2011-10-31  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/sqr_diagonal.asm: Move from here...
+	* mpn/powerpc64/mode32/sqr_diagonal.asm: ...to here.
+
+	* mpn/powerpc64/mode64/sqr_diag_addlsh1.asm: New file.
+
+	* mpn/s390_64/sqr_basecase.asm: Rewrite sqr_diag_addlsh1 code.
+	* mpn/s390_32/esame/sqr_basecase.asm: Likewise.
+
 2011-10-29  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/s390_64/lshift.asm: Complete rewrite.
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 dumbmp.c
--- a/dumbmp.c	Sat Oct 29 23:54:39 2011 +0200
+++ b/dumbmp.c	Mon Oct 31 23:17:17 2011 +0100
@@ -421,6 +421,8 @@
       mp_limb_t *tp;  int tn;
       tn = an; an = bn; bn = tn;
       tp = ap; ap = bp; bp = tp;
+      /* This needs sign change, not done so abort.  */
+      abort ();
     }
 
   cy = 0;
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c	Sat Oct 29 23:54:39 2011 +0200
+++ b/mpn/generic/mul_fft.c	Mon Oct 31 23:17:17 2011 +0100
@@ -271,6 +271,9 @@
 
       /* now subtract cc and rd from r[d..n] */
 
+/* FIXME: Use mpn_decr_u here, first zeroing rp[n].
+   Can we also combine cc and rd to one mpn_decr_u?  */
+
       r[n] = -mpn_sub_1 (r + d, r + d, n - d, cc);
       r[n] -= mpn_sub_1 (r + d, r + d, n - d, rd);
       if (r[n] & GMP_LIMB_HIGHBIT)
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 mpn/ia64/addmul_1.asm
--- a/mpn/ia64/addmul_1.asm	Sat Oct 29 23:54:39 2011 +0200
+++ b/mpn/ia64/addmul_1.asm	Mon Oct 31 23:17:17 2011 +0100
@@ -55,39 +55,28 @@
 	zxt4		n = n			C I
 	;;
 ')
-{.mmi
-	adds		r15 = -1, n		C M I
+.mmi;	adds		r15 = -1, n		C M I
 	mov		r20 = rp		C M I
 	mov.i		r2 = ar.lc		C I0
-}
-{.mmi
-	ldf8		f7 = [up], 8		C M
+.mmi;	ldf8		f7 = [up], 8		C M
 	ldf8		f8 = [rp], 8		C M
 	and		r14 = 3, n		C M I
 	;;
-}
-{.mmi
-	setf.sig	f6 = vl			C M2 M3
+.mmi;	setf.sig	f6 = vl			C M2 M3
 	cmp.eq		p10, p0 = 0, r14	C M I
 	shr.u		r31 = r15, 2		C I0
-}
-{.mmi
-	cmp.eq		p11, p0 = 2, r14	C M I
+.mmi;	cmp.eq		p11, p0 = 2, r14	C M I
 	cmp.eq		p12, p0 = 3, r14	C M I
 	nop.i		0			C I
 	;;
-}
-{.mii
-	cmp.ne		p6, p7 = r0, r0		C M I
+.mii;	cmp.ne		p6, p7 = r0, r0		C M I
 	mov.i		ar.lc = r31		C I0
 	cmp.ne		p8, p9 = r0, r0		C M I
-}
-{.bbb
-  (p10)	br.dptk		.Lb00			C B
-  (p11)	br.dptk		.Lb10			C B
-  (p12)	br.dptk		.Lb11			C B
+.bbb;
+  (p10)	br.dptk.few	.Lb00			C B
+  (p11)	br.dptk.few	.Lb10			C B
+  (p12)	br.dptk.few	.Lb11			C B
 	;;
-}
 
 .Lb01:	br.cloop.dptk	.grt1			C B
 
@@ -182,7 +171,7 @@
 	stf8		[r20] = f38, 8
 	getf.sig	r27 = f39
 	getf.sig	r8 = f43
-	br		.Lcj2
+	br.few		.Lcj2
 
 .grt2:
 	ldf8		f32 = [up], 8
@@ -569,6 +558,7 @@
 	.pred.rel "mutex", p8, p9
    (p8)	add		r16 = r29, r26, 1	C
    (p9)	add		r16 = r29, r26		C
+	nop		0
 	;;
 	.pred.rel "mutex", p8, p9
 	st8		[r20] = r16, 8		C
@@ -579,6 +569,7 @@
 	.pred.rel "mutex", p6, p7
    (p6)	add		r14 = r30, r27, 1	C
    (p7)	add		r14 = r30, r27		C
+	nop		0
 	;;
 	.pred.rel "mutex", p6, p7
 	st8		[r20] = r14		C
diff -r 09fabd6fab76 -r 0e4ad7d7ed57 mpn/ia64/gmp-mparam.h
--- a/mpn/ia64/gmp-mparam.h	Sat Oct 29 23:54:39 2011 +0200
+++ b/mpn/ia64/gmp-mparam.h	Mon Oct 31 23:17:17 2011 +0100
@@ -1,6 +1,6 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010 Free Software
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010, 2011 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
@@ -21,70 +21,90 @@
 #define GMP_LIMB_BITS 64
 #define BYTES_PER_MP_LIMB 8
 
-/* 1300MHz Itanium2 (babe.fsffrance.org) */
+/* 900MHz Itanium2 (titanic.gmplib.org) */
 
-
+#define MOD_1_1P_METHOD                      2
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
 #define MOD_1U_TO_MOD_1_1_THRESHOLD          8
 #define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
 #define MOD_1_2_TO_MOD_1_4_THRESHOLD        21
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
 #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD              12
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
 #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
 
 #define MUL_TOOM22_THRESHOLD                40
-#define MUL_TOOM33_THRESHOLD               122
-#define MUL_TOOM44_THRESHOLD               212
+#define MUL_TOOM33_THRESHOLD               129
+#define MUL_TOOM44_THRESHOLD               214
 #define MUL_TOOM6H_THRESHOLD               318
 #define MUL_TOOM8H_THRESHOLD               430
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     146
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     129
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     145
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     126
 #define MUL_TOOM42_TO_TOOM63_THRESHOLD     151
 
 #define SQR_BASECASE_THRESHOLD              11
 #define SQR_TOOM2_THRESHOLD                 84
-#define SQR_TOOM3_THRESHOLD                125
+#define SQR_TOOM3_THRESHOLD                135
 #define SQR_TOOM4_THRESHOLD                494
-#define SQR_TOOM6_THRESHOLD                  0  /* never toom4 */
-#define SQR_TOOM8_THRESHOLD                  0  /* never toom6 */
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                  0  /* always */
 
 #define MULMOD_BNM1_THRESHOLD               23
-#define SQRMOD_BNM1_THRESHOLD               25
+#define SQRMOD_BNM1_THRESHOLD               28
 
-#define MUL_FFT_MODF_THRESHOLD             444  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
-  { {    444, 5}, {     27, 6}, {     14, 5}, {     29, 6}, \
-    {     35, 7}, {     18, 6}, {     37, 7}, {     19, 6}, \
+  { {    476, 5}, {     27, 6}, {     14, 5}, {     29, 6}, \
+    {     33, 7}, {     17, 6}, {     37, 7}, {     19, 6}, \
     {     39, 7}, {     21, 6}, {     43, 7}, {     33, 8}, \
     {     17, 7}, {     37, 8}, {     19, 7}, {     39, 8}, \
-    {     21, 7}, {     43, 8}, {     29, 9}, {     15, 8}, \
-    {     37, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
-    {     49, 9}, {     27, 8}, {     57, 9}, {     31, 8}, \
-    {     63, 9}, {     35, 8}, {     71, 9}, {     43,10}, \
+    {     21, 7}, {     43, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     57, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
     {     23, 9}, {     59,10}, {     31, 9}, {     71,10}, \
-    {     39, 9}, {     87,10}, {     47, 9}, {     99,10}, \
+    {     39, 9}, {     83,10}, {     47, 9}, {     99,10}, \
     {     55,11}, {     31,10}, {     87,11}, {     47,10}, \
     {    111,12}, {     31,11}, {     63,10}, {    143,11}, \
     {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
     {    111,12}, {     63,11}, {    143,10}, {    287, 9}, \
     {    575,10}, {    303,11}, {    159,10}, {    319,12}, \
     {     95,11}, {    191,10}, {    399,11}, {    207,10}, \
-    {    431,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 76
+    {    431,13}, {     63,12}, {    127,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
+    {    159,11}, {    335,10}, {    671,11}, {    367,12}, \
+    {    191,11}, {    399,10}, {    799,11}, {    431,12}, \
+    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    543,12}, {    287,11}, {    607,12}, {    319,11}, \
+    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
+    {    415,11}, {    863,12}, {    447,14}, {    127,13}, \
+    {    255,12}, {    607,13}, {    319,12}, {    735,13}, \
+    {    383,12}, {    799,11}, {   1599,12}, {    863,13}, \
+    {    447,12}, {    927,11}, {   1855,14}, {    255,13}, \
+    {    511,12}, {   1055,13}, {    575,12}, {   1215,13}, \
+    {    639,12}, {   1279,13}, {    703,14}, {    383,13}, \
+    {    767,12}, {   1535,13}, {    831,12}, {   1663,13}, \
+    {    895,12}, {   1791,15}, {    255,14}, {    511,13}, \
+    {   1087,12}, {   2175,13}, {   1215,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1471,14}, {    767,13}, \
+    {   1599,12}, {   3199,13}, {   1663,14}, {    895,13}, \
+    {   1855,15}, {    511,14}, {   1023,13}, {   2175,14}, \
+    {   1151,13}, {   2431,14}, {   1279,13}, {   2687,14}, \
+    {   1407,15}, {    767,14}, {   1535,13}, {   3199,14}, \
+    {   1663,13}, {   3455,14}, {   1791,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 155
 #define MUL_FFT_THRESHOLD                 5760
 
-#define SQR_FFT_MODF_THRESHOLD             440  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             436  /* k = 5 */
 #define SQR_FFT_TABLE3                                      \
-  { {    440, 5}, {     14, 4}, {     29, 5}, {     29, 6}, \
-    {     15, 5}, {     31, 6}, {     35, 7}, {     18, 6}, \
-    {     37, 7}, {     33, 8}, {     17, 7}, {     37, 8}, \
+  { {    436, 5}, {     14, 4}, {     29, 5}, {     31, 6}, \
+    {     35, 7}, {     18, 6}, {     37, 7}, {     37, 8}, \
     {     19, 7}, {     40, 8}, {     37, 9}, {     19, 8}, \
     {     43, 9}, {     23, 8}, {     49, 9}, {     27, 8}, \
     {     57, 9}, {     43,10}, {     23, 9}, {     55,10}, \
@@ -93,45 +113,67 @@
     {     87,11}, {     47,10}, {    111,12}, {     31,11}, \
     {     63,10}, {    135,11}, {     79,10}, {    167,11}, \
     {     95,10}, {    191,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    255,11}, {    143,10}, {    303,11}, \
-    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
-    {    399,11}, {    207,10}, {    431,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 66
+    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
+    {    575,10}, {    303,11}, {    159,10}, {    319,12}, \
+    {     95,11}, {    191,10}, {    399,11}, {    207,10}, \
+    {    431,13}, {     63,12}, {    127,11}, {    271,10}, \
+    {    543,11}, {    303,12}, {    159,11}, {    335,10}, \
+    {    671,11}, {    367,10}, {    735,12}, {    191,11}, \
+    {    399,10}, {    799,11}, {    431,12}, {    223,11}, \
+    {    463,13}, {    127,12}, {    255,11}, {    543,12}, \
+    {    287,11}, {    607,12}, {    319,11}, {    671,12}, \
+    {    351,11}, {    735,13}, {    191,12}, {    383,11}, \
+    {    799,12}, {    415,11}, {    863,12}, {    447,11}, \
+    {    895,14}, {    127,13}, {    255,12}, {    543,11}, \
+    {   1087,12}, {    607,13}, {    319,12}, {    735,13}, \
+    {    383,12}, {    863,13}, {    447,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1183,13}, {    639,12}, {   1279,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1535,13}, \
+    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1471,14}, {    767,13}, {   1663,14}, \
+    {    895,13}, {   1919,15}, {    511,14}, {   1023,13}, \
+    {   2175,14}, {   1151,13}, {   2431,14}, {   1279,13}, \
+    {   2687,14}, {   1407,15}, {    767,14}, {   1535,13}, \
+    {   3199,14}, {   1663,13}, {   3455,14}, {   1791,13}, \


More information about the gmp-commit mailing list