[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Feb 22 22:08:40 CET 2011
details: /var/hg/gmp/rev/4dd77cf2faf2
changeset: 13886:4dd77cf2faf2
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Feb 22 21:44:07 2011 +0100
description:
(SHLD_SLOW, SHRD_SLOW): Define.
details: /var/hg/gmp/rev/95bd581e32a5
changeset: 13887:95bd581e32a5
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Feb 22 21:51:12 2011 +0100
description:
Use R8 and R32 more regularly.
details: /var/hg/gmp/rev/0c678f22f3f8
changeset: 13888:0c678f22f3f8
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Feb 22 22:08:27 2011 +0100
description:
Export SHLD_SLOW and SHRD_SLOW to config.m4, also fixing typo in exporting code.
diffstat:
ChangeLog | 8 +++++
configure.in | 12 ++++----
mpn/x86_64/addaddmul_1msb0.asm | 12 ++++----
mpn/x86_64/aors_n.asm | 6 ++--
mpn/x86_64/aorsmul_1.asm | 8 ++--
mpn/x86_64/atom/gmp-mparam.h | 3 ++
mpn/x86_64/atom/sublsh1_n.asm | 8 ++--
mpn/x86_64/com.asm | 6 ++--
mpn/x86_64/copyd.asm | 4 +-
mpn/x86_64/copyi.asm | 4 +-
mpn/x86_64/core2/aors_n.asm | 12 ++++----
mpn/x86_64/core2/lshift.asm | 42 ++++++++++++++--------------
mpn/x86_64/core2/lshiftc.asm | 42 ++++++++++++++--------------
mpn/x86_64/core2/rshift.asm | 42 ++++++++++++++--------------
mpn/x86_64/logops_n.asm | 23 ++++++++-------
mpn/x86_64/lshsub_n.asm | 54 ++++++++++++++++++------------------
mpn/x86_64/mod_1_2.asm | 5 ++-
mpn/x86_64/mode1o.asm | 28 +++++++++---------
mpn/x86_64/mul_1.asm | 8 ++--
mpn/x86_64/nano/gmp-mparam.h | 3 ++
mpn/x86_64/pentium4/lshift.asm | 18 ++++++------
mpn/x86_64/pentium4/lshiftc.asm | 18 ++++++------
mpn/x86_64/pentium4/mod_34lsub1.asm | 4 +-
mpn/x86_64/pentium4/rshift.asm | 18 ++++++------
mpn/x86_64/redc_1.asm | 46 +++++++++++++++---------------
mpn/x86_64/rsh1aors_n.asm | 4 +-
26 files changed, 227 insertions(+), 211 deletions(-)
diffs (truncated from 1270 to 300 lines):
diff -r f705ef6c2c10 -r 0c678f22f3f8 ChangeLog
--- a/ChangeLog Tue Feb 22 20:43:22 2011 +0100
+++ b/ChangeLog Tue Feb 22 22:08:27 2011 +0100
@@ -1,3 +1,11 @@
+2011-02-22 Torbjorn Granlund <tege at gmplib.org>
+
+ * configure.in: Export SHLD_SLOW and SHRD_SLOW to config.m4, also
+ fixing typo in exporting code.
+
+ * mpn/x86_64/nano/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+ * mpn/x86_64/atom/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+
2011-02-22 Niels Möller <nisse at lysator.liu.se>
* mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Rewrite.
diff -r f705ef6c2c10 -r 0c678f22f3f8 configure.in
--- a/configure.in Tue Feb 22 20:43:22 2011 +0100
+++ b/configure.in Tue Feb 22 22:08:27 2011 +0100
@@ -3240,15 +3240,15 @@
[The gmp-mparam.h file (a string) the tune program should suggest updating.])
-# Copy relevant THRESHOLD parameters from gmp-mparam.h to config.m4.
-# We only do this for THRESHOLDs that are used by some assembly files.
+# Copy relevant parameters from gmp-mparam.h to config.m4.
+# We only do this for parameters that are used by some assembly files.
# Fat binaries do this on a per-file basis, so skip in that case.
#
if test -z "$fat_path"; then
- for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD; do
- threshold=`sed -n 's/^#define '$i'[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
- if test -n "threshold"; then
- GMP_DEFINE_RAW(["define(<$i>,<$threshold>)"])
+ for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
+ value=`sed -n 's/^#define '$i'[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+ if test -n "$value"; then
+ GMP_DEFINE_RAW(["define(<$i>,<$value>)"])
fi
done
fi
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/addaddmul_1msb0.asm
--- a/mpn/x86_64/addaddmul_1msb0.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/addaddmul_1msb0.asm Tue Feb 22 22:08:27 2011 +0100
@@ -71,7 +71,7 @@
mul %r8
add %rax, %r10
mov -16(bp,n,8), %rax
- mov $0, %r11d
+ mov $0, R32(%r11)
adc %rdx, %r11
mul %r9
add %rax, %r10
@@ -81,7 +81,7 @@
mul %r8
add %rax, %r11
mov -8(bp,n,8), %rax
- mov $0, %r12d
+ mov $0, R32(%r12)
adc %rdx, %r12
mul %r9
add %rax, %r11
@@ -91,7 +91,7 @@
add %rax, %r12
mov %r11, -8(rp,n,8)
mov (bp,n,8), %rax
- mov $0, %r10d
+ mov $0, R32(%r10)
adc %rdx, %r10
add $3, n
js L(top)
@@ -108,7 +108,7 @@
mul %r8
add %rax, %r10
mov -16(bp), %rax
- mov $0, %r11d
+ mov $0, R32(%r11)
adc %rdx, %r11
mul %r9
add %rax, %r10
@@ -118,7 +118,7 @@
mul %r8
add %rax, %r11
mov -8(bp), %rax
- mov $0, %r12d
+ mov $0, R32(%r12)
adc %rdx, %r12
mul %r9
add %rax, %r11
@@ -137,7 +137,7 @@
mul %r8
add %rax, %r10
mov -8(bp), %rax
- mov $0, %r11d
+ mov $0, R32(%r11)
adc %rdx, %r11
mul %r9
add %rax, %r10
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/aors_n.asm Tue Feb 22 22:08:27 2011 +0100
@@ -84,7 +84,7 @@
jnz L(2)
ADCSBB (vp), %r8
mov %r8, (rp)
- adc %eax, %eax
+ adc R32(%rax), R32(%rax)
ret
L(2): dec R32(%rax)
@@ -94,7 +94,7 @@
ADCSBB 8(vp), %r9
mov %r8, (rp)
mov %r9, 8(rp)
- adc %eax, %eax
+ adc R32(%rax), R32(%rax)
ret
L(3): mov 16(up), %r10
@@ -141,6 +141,6 @@
inc R32(%rax)
dec R32(%rax)
jnz L(lt4)
- adc %eax, %eax
+ adc R32(%rax), R32(%rax)
ret
EPILOGUE()
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/aorsmul_1.asm
--- a/mpn/x86_64/aorsmul_1.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/aorsmul_1.asm Tue Feb 22 22:08:27 2011 +0100
@@ -114,7 +114,7 @@
adc %rax, %r9
mov (up,n,8), %rax
adc %rdx, %r8
- mov $0, %r10d
+ mov $0, R32(%r10)
L(L1): mul vl
ADDSUB %r9, 8(rp,n,8)
adc %rax, %r8
@@ -127,11 +127,11 @@
L(L3): mov 16(up,n,8), %rax
mul vl
ADDSUB %rbx, 24(rp,n,8)
- mov $0, %r8d # zero
- mov %r8, %rbx # zero
+ mov $0, R32(%r8) C zero
+ mov %r8, %rbx C zero
adc %rax, %r10
mov 24(up,n,8), %rax
- mov %r8, %r9 # zero
+ mov %r8, %r9 C zero
adc %rdx, %r9
L(L2): mul vl
add $4, n
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/atom/gmp-mparam.h
--- a/mpn/x86_64/atom/gmp-mparam.h Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/atom/gmp-mparam.h Tue Feb 22 22:08:27 2011 +0100
@@ -21,6 +21,9 @@
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
/* These routines exists for all x86_64 chips, but they are slower on Atom
than separate add/sub and shift. Make sure they are not really used. */
#undef HAVE_NATIVE_mpn_rsh1add_n
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/atom/sublsh1_n.asm
--- a/mpn/x86_64/atom/sublsh1_n.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/atom/sublsh1_n.asm Tue Feb 22 22:08:27 2011 +0100
@@ -149,7 +149,7 @@
ALIGN(16)
L(top): mov (vp), %r8
- add %eax, %eax
+ add R32(%rax), R32(%rax)
lea 64(vp), vp
adc %r8, %r8
mov -56(vp), %r9
@@ -166,8 +166,8 @@
adc %r14, %r14
mov -8(vp), %r15
adc %r15, %r15
- sbb %eax, %eax
- add %ebp, %ebp
+ sbb R32(%rax), R32(%rax)
+ add R32(%rbp), R32(%rbp)
mov (up), %rbp
lea 64(rp), rp
mov 8(up), %rbx
@@ -194,7 +194,7 @@
sbb %r15, %rbx
lea 64(up), up
mov %rbx, 56(rp)
- sbb %ebp, %ebp
+ sbb R32(%rbp), R32(%rbp)
L(x): sub $8, n
jge L(top)
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/com.asm
--- a/mpn/x86_64/com.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/com.asm Tue Feb 22 22:08:27 2011 +0100
@@ -40,13 +40,13 @@
ALIGN(32)
PROLOGUE(mpn_com)
movq (up), %r8
- movl %edx, %eax
+ movl R32(%rdx), R32(%rax)
leaq (up,n,8), up
leaq (rp,n,8), rp
negq n
- andl $3, %eax
+ andl $3, R32(%rax)
je L(b00)
- cmpl $2, %eax
+ cmpl $2, R32(%rax)
jc L(b01)
je L(b10)
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/copyd.asm
--- a/mpn/x86_64/copyd.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/copyd.asm Tue Feb 22 22:08:27 2011 +0100
@@ -61,13 +61,13 @@
movq %r11, (rp)
jnc L(oop)
-L(end): shrl %edx C edx = lowpart(n)
+L(end): shrl R32(%rdx) C edx = lowpart(n)
jnc 1f
movq (up), %r8
movq %r8, -8(rp)
leaq -8(rp), rp
leaq -8(up), up
-1: shrl %edx C edx = lowpart(n)
+1: shrl R32(%rdx) C edx = lowpart(n)
jnc 1f
movq (up), %r8
movq -8(up), %r9
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/copyi.asm
--- a/mpn/x86_64/copyi.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/copyi.asm Tue Feb 22 22:08:27 2011 +0100
@@ -60,13 +60,13 @@
movq %r11, (rp)
jnc L(oop)
-L(end): shrl %edx C edx = lowpart(n)
+L(end): shrl R32(%rdx) C edx = lowpart(n)
jnc 1f
movq (up), %r8
movq %r8, 8(rp)
leaq 8(rp), rp
leaq 8(up), up
-1: shrl %edx C edx = lowpart(n)
+1: shrl R32(%rdx) C edx = lowpart(n)
jnc 1f
movq (up), %r8
movq 8(up), %r9
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/core2/aors_n.asm
--- a/mpn/x86_64/core2/aors_n.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/core2/aors_n.asm Tue Feb 22 22:08:27 2011 +0100
@@ -66,12 +66,12 @@
lea -8(up,n,8), up
lea -8(vp,n,8), vp
lea -16(rp,n,8), rp
- mov %ecx, %eax
+ mov R32(%rcx), R32(%rax)
neg n
- and $3, %eax
+ and $3, R32(%rax)
je L(b00)
- add %rax, n C clear low rcx bits for jrcxz
- cmp $2, %eax
+ add %rax, n C clear low rcx bits for jrcxz
+ cmp $2, R32(%rax)
jl L(b01)
je L(b10)
@@ -94,8 +94,8 @@
L(end): ADCSBB %r11, %r10
mov %r10, 8(rp)
- mov %ecx, %eax C clear eax, ecx contains 0
- adc %eax, %eax
+ mov R32(%rcx), R32(%rax) C clear eax, ecx contains 0
+ adc R32(%rax), R32(%rax)
ret
ALIGN(16)
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/core2/lshift.asm
--- a/mpn/x86_64/core2/lshift.asm Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/core2/lshift.asm Tue Feb 22 22:08:27 2011 +0100
@@ -43,25 +43,25 @@
lea -8(rp,n,8), rp
lea -8(up,n,8), up
- mov %edx, %eax
- and $3, %eax
+ mov R32(%rdx), R32(%rax)
+ and $3, R32(%rax)
jne L(nb00)
L(b00): C n = 4, 8, 12, ...
More information about the gmp-commit
mailing list