[Gmp-commit] /home/hgfiles/gmp: 7 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Dec 19 00:01:56 CET 2010
details: /home/hgfiles/gmp/rev/7ac1ad2e945a
changeset: 13715:7ac1ad2e945a
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Dec 18 19:51:17 2010 +0100
description:
Add some comments.
details: /home/hgfiles/gmp/rev/79f7c30d2c34
changeset: 13716:79f7c30d2c34
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Dec 18 19:58:03 2010 +0100
description:
Remove constant index.
details: /home/hgfiles/gmp/rev/42245b2a48f7
changeset: 13717:42245b2a48f7
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Dec 18 20:05:26 2010 +0100
description:
*** empty log message ***
details: /home/hgfiles/gmp/rev/6428c419c582
changeset: 13718:6428c419c582
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Dec 18 21:40:38 2010 +0100
description:
Tweak core2 divrem_1.asm slightly, correct cycle counts.
details: /home/hgfiles/gmp/rev/962f420b7e22
changeset: 13719:962f420b7e22
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Dec 18 22:45:57 2010 +0100
description:
Amend last change.
details: /home/hgfiles/gmp/rev/7292f9e5b692
changeset: 13720:7292f9e5b692
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Dec 18 23:47:59 2010 +0100
description:
Generalise code for putting THRESHOLDs in config.m4.
Add BMOD_1_TO_MOD_1_THRESHOLD to list.
details: /home/hgfiles/gmp/rev/7ff0fb66737f
changeset: 13721:7ff0fb66737f
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Dec 19 00:01:46 2010 +0100
description:
Call mpn_mod_1 for operands with mode than BMOD_1_TO_MOD_1_THRESHOLD limbs.
diffstat:
ChangeLog | 16 +++++++++++++
configure.in | 14 ++++++-----
mpn/x86_64/addmul_2.asm | 4 +-
mpn/x86_64/core2/divrem_1.asm | 49 ++++++++++++++++++++--------------------
mpn/x86_64/fat/fat.c | 6 ++--
mpn/x86_64/gcd_1.asm | 15 +++++++-----
mpn/x86_64/lshiftc.asm | 8 +++---
mpn/x86_64/mul_basecase.asm | 2 +-
mpn/x86_64/pentium4/lshift.asm | 8 +++---
mpn/x86_64/pentium4/lshiftc.asm | 8 +++---
mpn/x86_64/pentium4/rshift.asm | 8 +++---
11 files changed, 79 insertions(+), 59 deletions(-)
diffs (truncated from 337 to 300 lines):
diff -r 5995c8359734 -r 7ff0fb66737f ChangeLog
--- a/ChangeLog Thu Dec 16 21:31:10 2010 +0100
+++ b/ChangeLog Sun Dec 19 00:01:46 2010 +0100
@@ -1,3 +1,19 @@
+2010-12-18 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/gcd_1.asm: Call mpn_mod_1 for operands with mode than
+ BMOD_1_TO_MOD_1_THRESHOLD limbs.
+
+ * configure.in: Generalise code for putting THRESHOLDs in config.m4.
+ Add BMOD_1_TO_MOD_1_THRESHOLD to list.
+
+ * mpn/x86_64/core2/divrem_1.asm: Tweak slightly, correct cycle counts.
+
+ * mpn/x86_64/addmul_2.asm: Remove constant index.
+ * mpn/x86_64/lshiftc.asm: Likewise.
+ * mpn/x86_64/pentium4/lshift.asm: Likewise.
+ * mpn/x86_64/pentium4/lshiftc.asm: Likewise.
+ * mpn/x86_64/pentium4/rshift.asm: Likewise.
+
2010-12-16 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/mod_34lsub1.asm: Complete rewrite.
diff -r 5995c8359734 -r 7ff0fb66737f configure.in
--- a/configure.in Thu Dec 16 21:31:10 2010 +0100
+++ b/configure.in Sun Dec 19 00:01:46 2010 +0100
@@ -3222,15 +3222,17 @@
[The gmp-mparam.h file (a string) the tune program should suggest updating.])
-# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
-# Some versions of sqr_basecase.asm use this.
+# Copy relevant THRESHOLD parameters from gmp-mparam.h to config.m4.
+# We only do this for THRESHOLDs that are used by some assembly files.
# Fat binaries do this on a per-file basis, so skip in that case.
#
if test -z "$fat_path"; then
- tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
- if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
- GMP_DEFINE_RAW(["define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"])
- fi
+ for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD; do
+ threshold=`sed -n 's/^#define '$i'[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+ if test -n "threshold"; then
+ GMP_DEFINE_RAW(["define(<$i>,<$threshold>)"])
+ fi
+ done
fi
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/addmul_2.asm
--- a/mpn/x86_64/addmul_2.asm Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/addmul_2.asm Sun Dec 19 00:01:46 2010 +0100
@@ -158,10 +158,10 @@
add $4, n
js L(top)
- add w3, (rp,n,8)
+ add w3, 24(rp)
adc %rax, w0
adc %rdx, w1
- mov w0, 8(rp,n,8)
+ mov w0, 32(rp)
mov w1, %rax
pop %rbp
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/core2/divrem_1.asm
--- a/mpn/x86_64/core2/divrem_1.asm Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/core2/divrem_1.asm Sun Dec 19 00:01:46 2010 +0100
@@ -22,13 +22,13 @@
C norm unorm frac
-C AMD K8,K9 13 14 12
-C AMD K10 13 14 12
-C Intel P4 47 45 43
-C Intel core2 23 23 19.5
+C AMD K8,K9 15 15 12
+C AMD K10 15 15 12
+C Intel P4 44 44 43
+C Intel core2 24 24 19.5
C Intel corei 19 19 18
-C Intel atom 43 51 36
-C VIA nano 25 43 24
+C Intel atom 51 51 36
+C VIA nano 46 44 22.5
C mp_limb_t
C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
@@ -76,7 +76,7 @@
mov 40(%rsp), R8(cnt)
shl R8(cnt), d
- jmp L(uent)
+ jmp L(ent)
EPILOGUE()
ALIGN(16)
@@ -127,38 +127,37 @@
mov %rax, dinv
mov %rbp, %rax
test un, un
- je L(87)
-L(uent):mov -8(up,un,8), %rbp
+ je L(frac)
+L(ent): mov -8(up,un,8), %rbp
shr R8(%rcx), %rax
shld R8(%rcx), %rbp, %rax
sub $2, un
- js L(uend)
+ js L(end)
ALIGN(16)
-L(utop):lea 1(%rax), %r11
+L(top): lea 1(%rax), %r11
mul dinv
mov (up,un,8), %r10
shld R8(%rcx), %r10, %rbp
- add %rbp, %rax
+ mov %rbp, %r13
+ add %rax, %r13
adc %r11, %rdx
- mov %rax, %r11
- mov %rdx, %r13
+ mov %rdx, %r11
imul d, %rdx
sub %rdx, %rbp
- mov d, %rax
- add %rbp, %rax
+ lea (d,%rbp), %rax
sub $8, qp
- cmp %r11, %rbp
- cmovb %rbp, %rax
- adc $-1, %r13
+ cmp %r13, %rbp
+ cmovc %rbp, %rax
+ adc $-1, %r11
cmp d, %rax
jae L(ufx)
L(uok): dec un
- mov %r13, 8(qp)
+ mov %r11, 8(qp)
mov %r10, %rbp
- jns L(utop)
+ jns L(top)
-L(uend):lea 1(%rax), %r11
+L(end): lea 1(%rax), %r11
sal R8(%rcx), %rbp
mul dinv
add %rbp, %rax
@@ -176,16 +175,16 @@
jae L(efx)
L(eok): mov %r13, (qp)
sub $8, qp
- jmp L(87)
+ jmp L(frac)
L(ufx): sub d, %rax
- inc %r13
+ inc %r11
jmp L(uok)
L(efx): sub d, %rax
inc %r13
jmp L(eok)
-L(87): mov d, %rbp
+L(frac):mov d, %rbp
neg %rbp
jmp L(fent)
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/fat/fat.c Sun Dec 19 00:01:46 2010 +0100
@@ -168,12 +168,12 @@
case 0x25: /* WSM Clarkdale/Arrandale */
case 0x28:
case 0x29:
- case 0x2a:
+ case 0x2a: /* SB */
case 0x2b:
case 0x2c: /* WSM Gulftown */
- case 0x2d:
+ case 0x2d: /* SBC-EP */
case 0x2e: /* NHM Beckton */
- case 0x2f:
+ case 0x2f: /* WSM Eagleton */
CPUVEC_SETUP_core2;
CPUVEC_SETUP_corei;
break;
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/gcd_1.asm
--- a/mpn/x86_64/gcd_1.asm Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/gcd_1.asm Sun Dec 19 00:01:46 2010 +0100
@@ -82,7 +82,13 @@
push %rdx
sub $8, %rsp C maintain ABI required rsp alignment
+ cmp $BMOD_1_TO_MOD_1_THRESHOLD, %rsi
+ jl L(bmod)
+ CALL( mpn_mod_1)
+ jmp L(reduced)
+L(bmod):
CALL( mpn_modexact_1_odd)
+L(reduced):
add $8, %rsp
pop %rdx
@@ -91,15 +97,12 @@
test %rax, %rax
mov %rax, %rcx
- jnz L(strip_x)
+ LEA( ctz_table, %r9)
+ jnz L(strip_x_top)
mov %rdx, %rax
jmp L(done)
-L(strip_x):
- LEA( ctz_table, %r9)
- jmp L(strip_x_top)
-
ALIGN(16)
L(top):
cmovc %r10, %rcx C if x-y gave carry, use x,y-x 0
@@ -109,7 +112,7 @@
mov %rcx, %rax C 1
and $MASK, R32(%rcx) C 1
- mov (%r9,%rcx), R8(%rcx) C 1
+ movzb (%r9,%rcx), R32(%rcx) C 2
shr R8(%rcx), %rax C 4
cmp $MAXSHIFT, R8(%rcx) C 4
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/lshiftc.asm
--- a/mpn/x86_64/lshiftc.asm Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/lshiftc.asm Sun Dec 19 00:01:46 2010 +0100
@@ -146,16 +146,16 @@
jae L(top) C 2
L(end):
neg R32(%rcx) C put rsh count in cl
- mov 16(up,n,8), %r8
+ mov 8(up), %r8
shr R8(%rcx), %r8
or %r8, %r10
- mov 8(up,n,8), %r9
+ mov (up), %r9
shr R8(%rcx), %r9
or %r9, %r11
not %r10
not %r11
- mov %r10, 24(rp,n,8)
- mov %r11, 16(rp,n,8)
+ mov %r10, 16(rp)
+ mov %r11, 8(rp)
neg R32(%rcx) C put lsh count in cl
L(ast): mov (up), %r10
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/mul_basecase.asm
--- a/mpn/x86_64/mul_basecase.asm Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/mul_basecase.asm Sun Dec 19 00:01:46 2010 +0100
@@ -294,7 +294,7 @@
mov w3, -32(rp,n,8)
js L(mul_2_top)
- mov -32(up,n,8), %rax
+ mov -32(up,n,8), %rax C FIXME: n is constant
mul v1
add %rax, w0
mov w0, (rp)
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/pentium4/lshift.asm
--- a/mpn/x86_64/pentium4/lshift.asm Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/pentium4/lshift.asm Sun Dec 19 00:01:46 2010 +0100
@@ -133,14 +133,14 @@
jae L(top) C 2
L(end):
- movq 16(up,n,8), %mm0
+ movq 8(up), %mm0
psrlq %mm5, %mm0
por %mm0, %mm2
- movq 8(up,n,8), %mm1
+ movq (up), %mm1
psrlq %mm5, %mm1
por %mm1, %mm3
- movq %mm2, 24(rp,n,8)
- movq %mm3, 16(rp,n,8)
+ movq %mm2, 16(rp)
+ movq %mm3, 8(rp)
L(ast): movq (up), %mm2
psllq %mm4, %mm2
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/pentium4/lshiftc.asm
--- a/mpn/x86_64/pentium4/lshiftc.asm Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/pentium4/lshiftc.asm Sun Dec 19 00:01:46 2010 +0100
@@ -143,15 +143,15 @@
jae L(top)
L(end): pxor %mm6, %mm2
- movq 16(up,n,8), %mm0
More information about the gmp-commit
mailing list