[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Feb 22 22:08:40 CET 2011


details:   /var/hg/gmp/rev/4dd77cf2faf2
changeset: 13886:4dd77cf2faf2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Feb 22 21:44:07 2011 +0100
description:
(SHLD_SLOW, SHRD_SLOW): Define.

details:   /var/hg/gmp/rev/95bd581e32a5
changeset: 13887:95bd581e32a5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Feb 22 21:51:12 2011 +0100
description:
Use R8 and R32 more regularly.

details:   /var/hg/gmp/rev/0c678f22f3f8
changeset: 13888:0c678f22f3f8
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Feb 22 22:08:27 2011 +0100
description:
Export SHLD_SLOW and SHRD_SLOW to config.m4, also fixing typo in exporting code.

diffstat:

 ChangeLog                           |   8 +++++
 configure.in                        |  12 ++++----
 mpn/x86_64/addaddmul_1msb0.asm      |  12 ++++----
 mpn/x86_64/aors_n.asm               |   6 ++--
 mpn/x86_64/aorsmul_1.asm            |   8 ++--
 mpn/x86_64/atom/gmp-mparam.h        |   3 ++
 mpn/x86_64/atom/sublsh1_n.asm       |   8 ++--
 mpn/x86_64/com.asm                  |   6 ++--
 mpn/x86_64/copyd.asm                |   4 +-
 mpn/x86_64/copyi.asm                |   4 +-
 mpn/x86_64/core2/aors_n.asm         |  12 ++++----
 mpn/x86_64/core2/lshift.asm         |  42 ++++++++++++++--------------
 mpn/x86_64/core2/lshiftc.asm        |  42 ++++++++++++++--------------
 mpn/x86_64/core2/rshift.asm         |  42 ++++++++++++++--------------
 mpn/x86_64/logops_n.asm             |  23 ++++++++-------
 mpn/x86_64/lshsub_n.asm             |  54 ++++++++++++++++++------------------
 mpn/x86_64/mod_1_2.asm              |   5 ++-
 mpn/x86_64/mode1o.asm               |  28 +++++++++---------
 mpn/x86_64/mul_1.asm                |   8 ++--
 mpn/x86_64/nano/gmp-mparam.h        |   3 ++
 mpn/x86_64/pentium4/lshift.asm      |  18 ++++++------
 mpn/x86_64/pentium4/lshiftc.asm     |  18 ++++++------
 mpn/x86_64/pentium4/mod_34lsub1.asm |   4 +-
 mpn/x86_64/pentium4/rshift.asm      |  18 ++++++------
 mpn/x86_64/redc_1.asm               |  46 +++++++++++++++---------------
 mpn/x86_64/rsh1aors_n.asm           |   4 +-
 26 files changed, 227 insertions(+), 211 deletions(-)

diffs (truncated from 1270 to 300 lines):

diff -r f705ef6c2c10 -r 0c678f22f3f8 ChangeLog
--- a/ChangeLog	Tue Feb 22 20:43:22 2011 +0100
+++ b/ChangeLog	Tue Feb 22 22:08:27 2011 +0100
@@ -1,3 +1,11 @@
+2011-02-22  Torbjorn Granlund  <tege at gmplib.org>
+
+	* configure.in: Export SHLD_SLOW and SHRD_SLOW to config.m4, also
+	fixing typo in exporting code.
+
+	* mpn/x86_64/nano/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+	* mpn/x86_64/atom/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+
 2011-02-22  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Rewrite.
diff -r f705ef6c2c10 -r 0c678f22f3f8 configure.in
--- a/configure.in	Tue Feb 22 20:43:22 2011 +0100
+++ b/configure.in	Tue Feb 22 22:08:27 2011 +0100
@@ -3240,15 +3240,15 @@
 [The gmp-mparam.h file (a string) the tune program should suggest updating.])
 
 
-# Copy relevant THRESHOLD parameters from gmp-mparam.h to config.m4.
-# We only do this for THRESHOLDs that are used by some assembly files.
+# Copy relevant parameters from gmp-mparam.h to config.m4.
+# We only do this for parameters that are used by some assembly files.
 # Fat binaries do this on a per-file basis, so skip in that case.
 #
 if test -z "$fat_path"; then
-  for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD; do
-    threshold=`sed -n 's/^#define '$i'[ 	]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
-    if test -n "threshold"; then
-      GMP_DEFINE_RAW(["define(<$i>,<$threshold>)"])
+  for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
+    value=`sed -n 's/^#define '$i'[ 	]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+    if test -n "$value"; then
+      GMP_DEFINE_RAW(["define(<$i>,<$value>)"])
     fi
   done
 fi
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/addaddmul_1msb0.asm
--- a/mpn/x86_64/addaddmul_1msb0.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/addaddmul_1msb0.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -71,7 +71,7 @@
 	mul	%r8
 	add	%rax, %r10
 	mov	-16(bp,n,8), %rax
-	mov	$0, %r11d
+	mov	$0, R32(%r11)
 	adc	%rdx, %r11
 	mul	%r9
 	add	%rax, %r10
@@ -81,7 +81,7 @@
 	mul	%r8
 	add	%rax, %r11
 	mov	-8(bp,n,8), %rax
-	mov	$0, %r12d
+	mov	$0, R32(%r12)
 	adc	%rdx, %r12
 	mul	%r9
 	add	%rax, %r11
@@ -91,7 +91,7 @@
 	add	%rax, %r12
 	mov	%r11, -8(rp,n,8)
 	mov	(bp,n,8), %rax
-	mov	$0, %r10d
+	mov	$0, R32(%r10)
 	adc	%rdx, %r10
 	add	$3, n
 	js	L(top)
@@ -108,7 +108,7 @@
 	mul	%r8
 	add	%rax, %r10
 	mov	-16(bp), %rax
-	mov	$0, %r11d
+	mov	$0, R32(%r11)
 	adc	%rdx, %r11
 	mul	%r9
 	add	%rax, %r10
@@ -118,7 +118,7 @@
 	mul	%r8
 	add	%rax, %r11
 	mov	-8(bp), %rax
-	mov	$0, %r12d
+	mov	$0, R32(%r12)
 	adc	%rdx, %r12
 	mul	%r9
 	add	%rax, %r11
@@ -137,7 +137,7 @@
 	mul	%r8
 	add	%rax, %r10
 	mov	-8(bp), %rax
-	mov	$0, %r11d
+	mov	$0, R32(%r11)
 	adc	%rdx, %r11
 	mul	%r9
 	add	%rax, %r10
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/aors_n.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -84,7 +84,7 @@
 	jnz	L(2)
 	ADCSBB	(vp), %r8
 	mov	%r8, (rp)
-	adc	%eax, %eax
+	adc	R32(%rax), R32(%rax)
 	ret
 
 L(2):	dec	R32(%rax)
@@ -94,7 +94,7 @@
 	ADCSBB	8(vp), %r9
 	mov	%r8, (rp)
 	mov	%r9, 8(rp)
-	adc	%eax, %eax
+	adc	R32(%rax), R32(%rax)
 	ret
 
 L(3):	mov	16(up), %r10
@@ -141,6 +141,6 @@
 	inc	R32(%rax)
 	dec	R32(%rax)
 	jnz	L(lt4)
-	adc	%eax, %eax
+	adc	R32(%rax), R32(%rax)
 	ret
 EPILOGUE()
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/aorsmul_1.asm
--- a/mpn/x86_64/aorsmul_1.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/aorsmul_1.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -114,7 +114,7 @@
 	adc	%rax, %r9
 	mov	(up,n,8), %rax
 	adc	%rdx, %r8
-	mov	$0, %r10d
+	mov	$0, R32(%r10)
 L(L1):	mul	vl
 	ADDSUB	%r9, 8(rp,n,8)
 	adc	%rax, %r8
@@ -127,11 +127,11 @@
 L(L3):	mov	16(up,n,8), %rax
 	mul	vl
 	ADDSUB	%rbx, 24(rp,n,8)
-	mov	$0, %r8d		# zero
-	mov	%r8, %rbx		# zero
+	mov	$0, R32(%r8)		C zero
+	mov	%r8, %rbx		C zero
 	adc	%rax, %r10
 	mov	24(up,n,8), %rax
-	mov	%r8, %r9		# zero
+	mov	%r8, %r9		C zero
 	adc	%rdx, %r9
 L(L2):	mul	vl
 	add	$4, n
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/atom/gmp-mparam.h
--- a/mpn/x86_64/atom/gmp-mparam.h	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/atom/gmp-mparam.h	Tue Feb 22 22:08:27 2011 +0100
@@ -21,6 +21,9 @@
 #define GMP_LIMB_BITS 64
 #define BYTES_PER_MP_LIMB 8
 
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
 /* These routines exists for all x86_64 chips, but they are slower on Atom
    than separate add/sub and shift.  Make sure they are not really used.  */
 #undef HAVE_NATIVE_mpn_rsh1add_n
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/atom/sublsh1_n.asm
--- a/mpn/x86_64/atom/sublsh1_n.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/atom/sublsh1_n.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -149,7 +149,7 @@
 
 	ALIGN(16)
 L(top):	mov	(vp), %r8
-	add	%eax, %eax
+	add	R32(%rax), R32(%rax)
 	lea	64(vp), vp
 	adc	%r8, %r8
 	mov	-56(vp), %r9
@@ -166,8 +166,8 @@
 	adc	%r14, %r14
 	mov	-8(vp), %r15
 	adc	%r15, %r15
-	sbb	%eax, %eax
-	add	%ebp, %ebp
+	sbb	R32(%rax), R32(%rax)
+	add	R32(%rbp), R32(%rbp)
 	mov	(up), %rbp
 	lea	64(rp), rp
 	mov	8(up), %rbx
@@ -194,7 +194,7 @@
 	sbb	%r15, %rbx
 	lea	64(up), up
 	mov	%rbx, 56(rp)
-	sbb	%ebp, %ebp
+	sbb	R32(%rbp), R32(%rbp)
 L(x):	sub	$8, n
 	jge	L(top)
 
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/com.asm
--- a/mpn/x86_64/com.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/com.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -40,13 +40,13 @@
 	ALIGN(32)
 PROLOGUE(mpn_com)
 	movq	(up), %r8
-	movl	%edx, %eax
+	movl	R32(%rdx), R32(%rax)
 	leaq	(up,n,8), up
 	leaq	(rp,n,8), rp
 	negq	n
-	andl	$3, %eax
+	andl	$3, R32(%rax)
 	je	L(b00)
-	cmpl	$2, %eax
+	cmpl	$2, R32(%rax)
 	jc	L(b01)
 	je	L(b10)
 
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/copyd.asm
--- a/mpn/x86_64/copyd.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/copyd.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -61,13 +61,13 @@
 	movq	%r11, (rp)
 	jnc	L(oop)
 
-L(end):	shrl	%edx			C edx = lowpart(n)
+L(end):	shrl	R32(%rdx)		C edx = lowpart(n)
 	jnc	1f
 	movq	(up), %r8
 	movq	%r8, -8(rp)
 	leaq	-8(rp), rp
 	leaq	-8(up), up
-1:	shrl	%edx			C edx = lowpart(n)
+1:	shrl	R32(%rdx)		C edx = lowpart(n)
 	jnc	1f
 	movq	(up), %r8
 	movq	-8(up), %r9
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/copyi.asm
--- a/mpn/x86_64/copyi.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/copyi.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -60,13 +60,13 @@
 	movq	%r11, (rp)
 	jnc	L(oop)
 
-L(end):	shrl	%edx			C edx = lowpart(n)
+L(end):	shrl	R32(%rdx)		C edx = lowpart(n)
 	jnc	1f
 	movq	(up), %r8
 	movq	%r8, 8(rp)
 	leaq	8(rp), rp
 	leaq	8(up), up
-1:	shrl	%edx			C edx = lowpart(n)
+1:	shrl	R32(%rdx)		C edx = lowpart(n)
 	jnc	1f
 	movq	(up), %r8
 	movq	8(up), %r9
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/core2/aors_n.asm
--- a/mpn/x86_64/core2/aors_n.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/core2/aors_n.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -66,12 +66,12 @@
 	lea	-8(up,n,8), up
 	lea	-8(vp,n,8), vp
 	lea	-16(rp,n,8), rp
-	mov	%ecx, %eax
+	mov	R32(%rcx), R32(%rax)
 	neg	n
-	and	$3, %eax
+	and	$3, R32(%rax)
 	je	L(b00)
-	add	%rax, n		C clear low rcx bits for jrcxz
-	cmp	$2, %eax
+	add	%rax, n			C clear low rcx bits for jrcxz
+	cmp	$2, R32(%rax)
 	jl	L(b01)
 	je	L(b10)
 
@@ -94,8 +94,8 @@
 
 L(end):	ADCSBB	%r11, %r10
 	mov	%r10, 8(rp)
-	mov	%ecx, %eax		C clear eax, ecx contains 0
-	adc	%eax, %eax
+	mov	R32(%rcx), R32(%rax)	C clear eax, ecx contains 0
+	adc	R32(%rax), R32(%rax)
 	ret
 
 	ALIGN(16)
diff -r f705ef6c2c10 -r 0c678f22f3f8 mpn/x86_64/core2/lshift.asm
--- a/mpn/x86_64/core2/lshift.asm	Tue Feb 22 20:43:22 2011 +0100
+++ b/mpn/x86_64/core2/lshift.asm	Tue Feb 22 22:08:27 2011 +0100
@@ -43,25 +43,25 @@
 	lea	-8(rp,n,8), rp
 	lea	-8(up,n,8), up
 
-	mov	%edx, %eax
-	and	$3, %eax
+	mov	R32(%rdx), R32(%rax)
+	and	$3, R32(%rax)
 	jne	L(nb00)
 L(b00):	C n = 4, 8, 12, ...


More information about the gmp-commit mailing list