[Gmp-commit] /home/hgfiles/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Dec 28 21:22:25 CET 2010
details: /home/hgfiles/gmp/rev/7041aec2d51d
changeset: 13724:7041aec2d51d
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 27 10:17:51 2010 +0100
description:
Whitespace cleanup.
details: /home/hgfiles/gmp/rev/c52a8774abe4
changeset: 13725:c52a8774abe4
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 27 10:30:21 2010 +0100
description:
Whitespace cleanup.
details: /home/hgfiles/gmp/rev/24e39d57f1e7
changeset: 13726:24e39d57f1e7
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Dec 28 16:28:41 2010 +0100
description:
Update with several recent thresholds.
diffstat:
ChangeLog | 4 ++++
mpn/generic/matrix22_mul.c | 2 +-
mpn/minithres/gmp-mparam.h | 12 ++++++------
mpn/powerpc64/mode64/aorslsh1_n.asm | 12 ++++++------
mpn/powerpc64/mode64/aorslsh2_n.asm | 12 ++++++------
mpn/powerpc64/mode64/mul_basecase.asm | 2 +-
mpn/x86/k7/mmx/divrem_1.asm | 12 ++++++------
mpn/x86/k7/mod_1_1.asm | 2 +-
mpn/x86/p6/README | 2 +-
mpn/x86/p6/dive_1.asm | 2 +-
mpn/x86/p6/mode1o.asm | 2 +-
mpn/x86/pentium4/sse2/dive_1.asm | 14 +++++++-------
mpn/x86/pentium4/sse2/mode1o.asm | 14 +++++++-------
mpn/x86_64/aorrlsh1_n.asm | 2 +-
mpn/x86_64/aorrlshC_n.asm | 2 +-
mpn/x86_64/aors_n.asm | 2 +-
mpn/x86_64/bdiv_dbm1c.asm | 4 ++--
mpz/jacobi.c | 4 ++--
18 files changed, 55 insertions(+), 51 deletions(-)
diffs (300 lines):
diff -r 18356655f1c6 -r 24e39d57f1e7 ChangeLog
--- a/ChangeLog Sun Dec 19 15:56:09 2010 +0100
+++ b/ChangeLog Tue Dec 28 16:28:41 2010 +0100
@@ -1,3 +1,7 @@
+2010-12-28 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/minithres/gmp-mparam.h: Update with several recent thresholds.
+
2010-12-19 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86/k7/mod_1_1.asm: Canonicalise cmov forms.
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/generic/matrix22_mul.c
--- a/mpn/generic/matrix22_mul.c Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/generic/matrix22_mul.c Tue Dec 28 16:28:41 2010 +0100
@@ -149,7 +149,7 @@
{
s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
s0s = 1; /* s4 = -r0 + r1 - r2 + r3 */
- /* Reverse sign! */
+ /* Reverse sign! */
}
else
{
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/minithres/gmp-mparam.h
--- a/mpn/minithres/gmp-mparam.h Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/minithres/gmp-mparam.h Tue Dec 28 16:28:41 2010 +0100
@@ -23,14 +23,14 @@
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1_1_THRESHOLD 2
-#define MOD_1_2_THRESHOLD 3
-#define MOD_1_4_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 3
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 4
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 1
#define USE_PREINV_DIVREM_1 1 /* native */
-#define USE_PREINV_MOD_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 3
#define MUL_TOOM22_THRESHOLD 8
#define MUL_TOOM33_THRESHOLD 20
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/powerpc64/mode64/aorslsh1_n.asm
--- a/mpn/powerpc64/mode64/aorslsh1_n.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/powerpc64/mode64/aorslsh1_n.asm Tue Dec 28 16:28:41 2010 +0100
@@ -25,16 +25,16 @@
ifdef(`OPERATION_addlsh1_n',`
define(ADDSUBC, addc)
- define(ADDSUBE, adde)
- define(INITCY, `addic $1, r1, 0')
- define(RETVAL, `addze r3, $1')
+ define(ADDSUBE, adde)
+ define(INITCY, `addic $1, r1, 0')
+ define(RETVAL, `addze r3, $1')
define(func, mpn_addlsh1_n)
')
ifdef(`OPERATION_sublsh1_n',`
define(ADDSUBC, subfc)
- define(ADDSUBE, subfe)
- define(INITCY, `addic $1, r1, -1')
- define(RETVAL, `subfze r3, $1
+ define(ADDSUBE, subfe)
+ define(INITCY, `addic $1, r1, -1')
+ define(RETVAL, `subfze r3, $1
neg r3, r3')
define(func, mpn_sublsh1_n)
')
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/powerpc64/mode64/aorslsh2_n.asm
--- a/mpn/powerpc64/mode64/aorslsh2_n.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/powerpc64/mode64/aorslsh2_n.asm Tue Dec 28 16:28:41 2010 +0100
@@ -25,16 +25,16 @@
ifdef(`OPERATION_addlsh2_n',`
define(ADDSUBC, addc)
- define(ADDSUBE, adde)
- define(INITCY, `addic $1, r1, 0')
- define(RETVAL, `addze r3, $1')
+ define(ADDSUBE, adde)
+ define(INITCY, `addic $1, r1, 0')
+ define(RETVAL, `addze r3, $1')
define(func, mpn_addlsh2_n)
')
ifdef(`OPERATION_sublsh2_n',`
define(ADDSUBC, subfc)
- define(ADDSUBE, subfe)
- define(INITCY, `addic $1, r1, -1')
- define(RETVAL, `subfze r3, $1
+ define(ADDSUBE, subfe)
+ define(INITCY, `addic $1, r1, -1')
+ define(RETVAL, `subfze r3, $1
neg r3, r3')
define(func, mpn_sublsh2_n)
')
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/powerpc64/mode64/mul_basecase.asm
--- a/mpn/powerpc64/mode64/mul_basecase.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/powerpc64/mode64/mul_basecase.asm Tue Dec 28 16:28:41 2010 +0100
@@ -24,7 +24,7 @@
C POWER3/PPC630 6-18
C POWER4/PPC970 8
C POWER5 8
-C POWER6 24
+C POWER6 24
C INPUT PARAMETERS
define(`rp', `r3')
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86/k7/mmx/divrem_1.asm
--- a/mpn/x86/k7/mmx/divrem_1.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86/k7/mmx/divrem_1.asm Tue Dec 28 16:28:41 2010 +0100
@@ -724,12 +724,12 @@
C rnd() means rounding down to a multiple of d.
C
C m*n2 + b*n2 <= m*(d-1) + b*(d-1)
-C = m*d + b*d - m - b
-C = floor((b(b-d)-1)/d)*d + b*d - m - b
-C = rnd(b(b-d)-1) + b*d - m - b
-C = rnd(b(b-d)-1 + b*d) - m - b
-C = rnd(b*b-1) - m - b
-C <= (b-2)*b
+C = m*d + b*d - m - b
+C = floor((b(b-d)-1)/d)*d + b*d - m - b
+C = rnd(b(b-d)-1) + b*d - m - b
+C = rnd(b(b-d)-1 + b*d) - m - b
+C = rnd(b*b-1) - m - b
+C <= (b-2)*b
C
C Unchanged from the general case is that the final quotient limb q can be
C either q1 or q1+1, and the q1+1 case occurs often. This can be seen from
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86/k7/mod_1_1.asm
--- a/mpn/x86/k7/mod_1_1.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86/k7/mod_1_1.asm Tue Dec 28 16:28:41 2010 +0100
@@ -61,7 +61,7 @@
ALIGN(16)
L(top): mul %edi C 0
mov -12(%esi), %ebx C
- add %eax, %ebx C 4
+ add %eax, %ebx C 4
mov %ecx, %eax C 2
mov $0, %ecx C
adc %edx, %ecx C 6
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86/p6/README
--- a/mpn/x86/p6/README Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86/p6/README Tue Dec 28 16:28:41 2010 +0100
@@ -52,7 +52,7 @@
mpn_mul_basecase 8.2 cycles/crossproduct (approx)
mpn_sqr_basecase 4.0 cycles/crossproduct (approx)
- or 7.75 cycles/triangleproduct (approx)
+ or 7.75 cycles/triangleproduct (approx)
Pentium II and III have MMX and get the following improvements.
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86/p6/dive_1.asm
--- a/mpn/x86/p6/dive_1.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86/p6/dive_1.asm Tue Dec 28 16:28:41 2010 +0100
@@ -127,7 +127,7 @@
C imull %ebp, %eax 4
C mull PARAM_DIVISOR 5
C ----
-C total 10
+C total 10
C
C and this is the measured speed. No special scheduling is necessary, out
C of order execution hides the load latency.
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86/p6/mode1o.asm
--- a/mpn/x86/p6/mode1o.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86/p6/mode1o.asm Tue Dec 28 16:28:41 2010 +0100
@@ -113,7 +113,7 @@
C imull %edi, %eax 4
C mull PARAM_DIVISOR 5
C ----
-C total 10
+C total 10
C
C and this is the measured speed. No special scheduling is necessary, out
C of order execution hides the load latency.
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86/pentium4/sse2/dive_1.asm
--- a/mpn/x86/pentium4/sse2/dive_1.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86/pentium4/sse2/dive_1.asm Tue Dec 28 16:28:41 2010 +0100
@@ -139,13 +139,13 @@
C The dependent chain here is as follows.
C
-C latency
-C psubq s = (src-cbit) - climb 2
-C pmuludq q = s*inverse 8
-C pmuludq prod = q*divisor 8
-C psrlq climb = high(prod) 2
-C --
-C 20
+C latency
+C psubq s = (src-cbit) - climb 2
+C pmuludq q = s*inverse 8
+C pmuludq prod = q*divisor 8
+C psrlq climb = high(prod) 2
+C --
+C 20
C
C Yet the loop measures 19.0 c/l, so obviously there's something gained
C there over a straight reading of the chip documentation.
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86/pentium4/sse2/mode1o.asm
--- a/mpn/x86/pentium4/sse2/mode1o.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86/pentium4/sse2/mode1o.asm Tue Dec 28 16:28:41 2010 +0100
@@ -113,13 +113,13 @@
C The dependent chain here is as follows.
C
-C latency
-C psubq s = (src-cbit) - climb 2
-C pmuludq q = s*inverse 8
-C pmuludq prod = q*divisor 8
-C psrlq climb = high(prod) 2
-C --
-C 20
+C latency
+C psubq s = (src-cbit) - climb 2
+C pmuludq q = s*inverse 8
+C pmuludq prod = q*divisor 8
+C psrlq climb = high(prod) 2
+C --
+C 20
C
C Yet the loop measures 19.0 c/l, so obviously there's something gained
C there over a straight reading of the chip documentation.
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86_64/aorrlsh1_n.asm
--- a/mpn/x86_64/aorrlsh1_n.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86_64/aorrlsh1_n.asm Tue Dec 28 16:28:41 2010 +0100
@@ -25,7 +25,7 @@
C AMD K8,K9 2
C AMD K10 2
C Intel P4 13
-C Intel core2 3.45
+C Intel core2 3.45
C Intel corei 3.45
C Intel atom ?
C VIA nano ?
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86_64/aorrlshC_n.asm
--- a/mpn/x86_64/aorrlshC_n.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86_64/aorrlshC_n.asm Tue Dec 28 16:28:41 2010 +0100
@@ -23,7 +23,7 @@
C AMD K8,K9 2
C AMD K10 2
C Intel P4 ?
-C Intel core2 3
+C Intel core2 3
C Intel corei 2.75
C Intel atom ?
C VIA nano ?
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86_64/aors_n.asm Tue Dec 28 16:28:41 2010 +0100
@@ -24,7 +24,7 @@
C AMD K8,K9 1.5
C AMD K10 1.5
C Intel P4 ?
-C Intel core2 4.9
+C Intel core2 4.9
C Intel corei ?
C Intel atom 4
C VIA nano 3.25
diff -r 18356655f1c6 -r 24e39d57f1e7 mpn/x86_64/bdiv_dbm1c.asm
--- a/mpn/x86_64/bdiv_dbm1c.asm Sun Dec 19 15:56:09 2010 +0100
+++ b/mpn/x86_64/bdiv_dbm1c.asm Tue Dec 28 16:28:41 2010 +0100
@@ -23,8 +23,8 @@
C AMD K8,K9 2.25
C AMD K10 2.25
C Intel P4 12.5
-C Intel core2 4
-C Intel corei 3.8
+C Intel core2 4
+C Intel corei 3.8
C Intel atom 20
C VIA nano 4
diff -r 18356655f1c6 -r 24e39d57f1e7 mpz/jacobi.c
--- a/mpz/jacobi.c Sun Dec 19 15:56:09 2010 +0100
+++ b/mpz/jacobi.c Tue Dec 28 16:28:41 2010 +0100
@@ -88,7 +88,7 @@
/* The MPN jacobi functions requies positive a and b, and b odd. So
we must to handle the cases of a or b zero, then signs, and then
the case of even b.
-
+
In addition, to reduce the number of cases, we arrange so that a
is odd, and asize >= bsize. */
@@ -121,7 +121,7 @@
result_bit1 ^= JACOBI_N1B_BIT1(blow);
asize = -asize;
}
-
+
STRIP_TWOS(result_bit1, atwos, blow, asrcp, asize, alow);
/* Both numbers odd, so arrange so that asize >= bsize */
More information about the gmp-commit
mailing list