[Gmp-commit] /home/hgfiles/gmp: 7 new changesets

Sun Dec 19 00:01:56 CET 2010

details:   /home/hgfiles/gmp/rev/7ac1ad2e945a
changeset: 13715:7ac1ad2e945a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Dec 18 19:51:17 2010 +0100
description:
Add some comments.

details:   /home/hgfiles/gmp/rev/79f7c30d2c34
changeset: 13716:79f7c30d2c34
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Dec 18 19:58:03 2010 +0100
description:
Remove constant index.

details:   /home/hgfiles/gmp/rev/42245b2a48f7
changeset: 13717:42245b2a48f7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Dec 18 20:05:26 2010 +0100
description:
*** empty log message ***

details:   /home/hgfiles/gmp/rev/6428c419c582
changeset: 13718:6428c419c582
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Dec 18 21:40:38 2010 +0100
description:
Tweak core2 divrem_1.asm slightly, correct cycle counts.

details:   /home/hgfiles/gmp/rev/962f420b7e22
changeset: 13719:962f420b7e22
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Dec 18 22:45:57 2010 +0100
description:
Amend last change.

details:   /home/hgfiles/gmp/rev/7292f9e5b692
changeset: 13720:7292f9e5b692
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Dec 18 23:47:59 2010 +0100
description:
Generalise code for putting THRESHOLDs in config.m4.
Add BMOD_1_TO_MOD_1_THRESHOLD to list.

details:   /home/hgfiles/gmp/rev/7ff0fb66737f
changeset: 13721:7ff0fb66737f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Dec 19 00:01:46 2010 +0100
description:
Call mpn_mod_1 for operands with mode than BMOD_1_TO_MOD_1_THRESHOLD limbs.

diffstat:

 ChangeLog                       |  16 +++++++++++++
 configure.in                    |  14 ++++++-----
 mpn/x86_64/addmul_2.asm         |   4 +-
 mpn/x86_64/core2/divrem_1.asm   |  49 ++++++++++++++++++++--------------------
 mpn/x86_64/fat/fat.c            |   6 ++--
 mpn/x86_64/gcd_1.asm            |  15 +++++++-----
 mpn/x86_64/lshiftc.asm          |   8 +++---
 mpn/x86_64/mul_basecase.asm     |   2 +-
 mpn/x86_64/pentium4/lshift.asm  |   8 +++---
 mpn/x86_64/pentium4/lshiftc.asm |   8 +++---
 mpn/x86_64/pentium4/rshift.asm  |   8 +++---
 11 files changed, 79 insertions(+), 59 deletions(-)

diffs (truncated from 337 to 300 lines):

diff -r 5995c8359734 -r 7ff0fb66737f ChangeLog

--- a/ChangeLog	Thu Dec 16 21:31:10 2010 +0100
+++ b/ChangeLog	Sun Dec 19 00:01:46 2010 +0100
@@ -1,3 +1,19 @@
+2010-12-18  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/gcd_1.asm: Call mpn_mod_1 for operands with mode than
+	BMOD_1_TO_MOD_1_THRESHOLD limbs.
+
+	* configure.in: Generalise code for putting THRESHOLDs in config.m4.
+	Add BMOD_1_TO_MOD_1_THRESHOLD to list.
+
+	* mpn/x86_64/core2/divrem_1.asm: Tweak slightly, correct cycle counts.
+
+	* mpn/x86_64/addmul_2.asm: Remove constant index.
+	* mpn/x86_64/lshiftc.asm: Likewise.
+	* mpn/x86_64/pentium4/lshift.asm: Likewise.
+	* mpn/x86_64/pentium4/lshiftc.asm: Likewise.
+	* mpn/x86_64/pentium4/rshift.asm: Likewise.
+
 2010-12-16  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/mod_34lsub1.asm: Complete rewrite.
diff -r 5995c8359734 -r 7ff0fb66737f configure.in
--- a/configure.in	Thu Dec 16 21:31:10 2010 +0100
+++ b/configure.in	Sun Dec 19 00:01:46 2010 +0100
@@ -3222,15 +3222,17 @@
 [The gmp-mparam.h file (a string) the tune program should suggest updating.])
 
 
-# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
-# Some versions of sqr_basecase.asm use this.
+# Copy relevant THRESHOLD parameters from gmp-mparam.h to config.m4.
+# We only do this for THRESHOLDs that are used by some assembly files.
 # Fat binaries do this on a per-file basis, so skip in that case.
 #
 if test -z "$fat_path"; then
-  tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[ 	]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
-  if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
-    GMP_DEFINE_RAW(["define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"])
-  fi
+  for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD; do
+    threshold=`sed -n 's/^#define '$i'[ 	]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+    if test -n "threshold"; then
+      GMP_DEFINE_RAW(["define(<$i>,<$threshold>)"])
+    fi
+  done
 fi
 
 
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/addmul_2.asm
--- a/mpn/x86_64/addmul_2.asm	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/addmul_2.asm	Sun Dec 19 00:01:46 2010 +0100
@@ -158,10 +158,10 @@
 	add	$4, n
 	js	L(top)
 
-	add	w3, (rp,n,8)
+	add	w3, 24(rp)
 	adc	%rax, w0
 	adc	%rdx, w1
-	mov	w0, 8(rp,n,8)
+	mov	w0, 32(rp)
 	mov	w1, %rax
 
 	pop	%rbp
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/core2/divrem_1.asm
--- a/mpn/x86_64/core2/divrem_1.asm	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/core2/divrem_1.asm	Sun Dec 19 00:01:46 2010 +0100
@@ -22,13 +22,13 @@
 
 
 C		norm	unorm	frac
-C AMD K8,K9	13	14	12
-C AMD K10	13	14	12
-C Intel P4	47	45	43
-C Intel core2	23	23	19.5
+C AMD K8,K9	15	15	12
+C AMD K10	15	15	12
+C Intel P4	44	44	43
+C Intel core2	24	24	19.5
 C Intel corei	19	19	18
-C Intel atom	43	51	36
-C VIA nano	25	43	24
+C Intel atom	51	51	36
+C VIA nano	46	44	22.5
 
 C mp_limb_t
 C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
@@ -76,7 +76,7 @@
 
 	mov	40(%rsp), R8(cnt)
 	shl	R8(cnt), d
-	jmp	L(uent)
+	jmp	L(ent)
 EPILOGUE()
 
 	ALIGN(16)
@@ -127,38 +127,37 @@
 	mov	%rax, dinv
 	mov	%rbp, %rax
 	test	un, un
-	je	L(87)
-L(uent):mov	-8(up,un,8), %rbp
+	je	L(frac)
+L(ent):	mov	-8(up,un,8), %rbp
 	shr	R8(%rcx), %rax
 	shld	R8(%rcx), %rbp, %rax
 	sub	$2, un
-	js	L(uend)
+	js	L(end)
 
 	ALIGN(16)
-L(utop):lea	1(%rax), %r11
+L(top):	lea	1(%rax), %r11
 	mul	dinv
 	mov	(up,un,8), %r10
 	shld	R8(%rcx), %r10, %rbp
-	add	%rbp, %rax
+	mov	%rbp, %r13
+	add	%rax, %r13
 	adc	%r11, %rdx
-	mov	%rax, %r11
-	mov	%rdx, %r13
+	mov	%rdx, %r11
 	imul	d, %rdx
 	sub	%rdx, %rbp
-	mov	d, %rax
-	add	%rbp, %rax
+	lea	(d,%rbp), %rax
 	sub	$8, qp
-	cmp	%r11, %rbp
-	cmovb	%rbp, %rax
-	adc	$-1, %r13
+	cmp	%r13, %rbp
+	cmovc	%rbp, %rax
+	adc	$-1, %r11
 	cmp	d, %rax
 	jae	L(ufx)
 L(uok):	dec	un
-	mov	%r13, 8(qp)
+	mov	%r11, 8(qp)
 	mov	%r10, %rbp
-	jns	L(utop)
+	jns	L(top)
 
-L(uend):lea	1(%rax), %r11
+L(end):	lea	1(%rax), %r11
 	sal	R8(%rcx), %rbp
 	mul	dinv
 	add	%rbp, %rax
@@ -176,16 +175,16 @@
 	jae	L(efx)
 L(eok):	mov	%r13, (qp)
 	sub	$8, qp
-	jmp	L(87)
+	jmp	L(frac)
 
 L(ufx):	sub	d, %rax
-	inc	%r13
+	inc	%r11
 	jmp	L(uok)
 L(efx):	sub	d, %rax
 	inc	%r13
 	jmp	L(eok)
 
-L(87):	mov	d, %rbp
+L(frac):mov	d, %rbp
 	neg	%rbp
 	jmp	L(fent)
 
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/fat/fat.c	Sun Dec 19 00:01:46 2010 +0100
@@ -168,12 +168,12 @@
 	    case 0x25:		/* WSM Clarkdale/Arrandale */
 	    case 0x28:
 	    case 0x29:
-	    case 0x2a:
+	    case 0x2a:		/* SB */
 	    case 0x2b:
 	    case 0x2c:		/* WSM Gulftown */
-	    case 0x2d:
+	    case 0x2d:		/* SBC-EP */
 	    case 0x2e:		/* NHM Beckton */
-	    case 0x2f:
+	    case 0x2f:		/* WSM Eagleton */
 	      CPUVEC_SETUP_core2;
 	      CPUVEC_SETUP_corei;
 	      break;
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/gcd_1.asm
--- a/mpn/x86_64/gcd_1.asm	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/gcd_1.asm	Sun Dec 19 00:01:46 2010 +0100
@@ -82,7 +82,13 @@
 	push	%rdx
 	sub	$8, %rsp		C maintain ABI required rsp alignment
 
+	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, %rsi
+	jl	L(bmod)
+	CALL(	mpn_mod_1)
+	jmp	L(reduced)
+L(bmod):
 	CALL(	mpn_modexact_1_odd)
+L(reduced):
 
 	add	$8, %rsp
 	pop	%rdx
@@ -91,15 +97,12 @@
 	test	%rax, %rax
 
 	mov	%rax, %rcx
-	jnz	L(strip_x)
+	LEA(	ctz_table, %r9)
+	jnz	L(strip_x_top)
 
 	mov	%rdx, %rax
 	jmp	L(done)
 
-L(strip_x):
-	LEA(	ctz_table, %r9)
-	jmp	L(strip_x_top)
-
 	ALIGN(16)
 L(top):
 	cmovc	%r10, %rcx		C if x-y gave carry, use x,y-x	0
@@ -109,7 +112,7 @@
 	mov	%rcx, %rax		C				1
 	and	$MASK, R32(%rcx)	C				1
 
-	mov	(%r9,%rcx), R8(%rcx)	C				1
+	movzb	(%r9,%rcx), R32(%rcx)	C				2
 
 	shr	R8(%rcx), %rax		C				4
 	cmp	$MAXSHIFT, R8(%rcx)	C				4
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/lshiftc.asm
--- a/mpn/x86_64/lshiftc.asm	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/lshiftc.asm	Sun Dec 19 00:01:46 2010 +0100
@@ -146,16 +146,16 @@
 	jae	L(top)			C				      2
 L(end):
 	neg	R32(%rcx)		C put rsh count in cl
-	mov	16(up,n,8), %r8
+	mov	8(up), %r8
 	shr	R8(%rcx), %r8
 	or	%r8, %r10
-	mov	8(up,n,8), %r9
+	mov	(up), %r9
 	shr	R8(%rcx), %r9
 	or	%r9, %r11
 	not	%r10
 	not	%r11
-	mov	%r10, 24(rp,n,8)
-	mov	%r11, 16(rp,n,8)
+	mov	%r10, 16(rp)
+	mov	%r11, 8(rp)
 
 	neg	R32(%rcx)		C put lsh count in cl
 L(ast):	mov	(up), %r10
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/mul_basecase.asm
--- a/mpn/x86_64/mul_basecase.asm	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/mul_basecase.asm	Sun Dec 19 00:01:46 2010 +0100
@@ -294,7 +294,7 @@
 	mov	w3, -32(rp,n,8)
 	js	L(mul_2_top)
 
-	mov	-32(up,n,8), %rax
+	mov	-32(up,n,8), %rax	C FIXME: n is constant
 	mul	v1
 	add	%rax, w0
 	mov	w0, (rp)
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/pentium4/lshift.asm
--- a/mpn/x86_64/pentium4/lshift.asm	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/pentium4/lshift.asm	Sun Dec 19 00:01:46 2010 +0100
@@ -133,14 +133,14 @@
 
 	jae	L(top)			C				      2
 L(end):
-	movq	16(up,n,8), %mm0
+	movq	8(up), %mm0
 	psrlq	%mm5, %mm0
 	por	%mm0, %mm2
-	movq	8(up,n,8), %mm1
+	movq	(up), %mm1
 	psrlq	%mm5, %mm1
 	por	%mm1, %mm3
-	movq	%mm2, 24(rp,n,8)
-	movq	%mm3, 16(rp,n,8)
+	movq	%mm2, 16(rp)
+	movq	%mm3, 8(rp)
 
 L(ast):	movq	(up), %mm2
 	psllq	%mm4, %mm2
diff -r 5995c8359734 -r 7ff0fb66737f mpn/x86_64/pentium4/lshiftc.asm
--- a/mpn/x86_64/pentium4/lshiftc.asm	Thu Dec 16 21:31:10 2010 +0100
+++ b/mpn/x86_64/pentium4/lshiftc.asm	Sun Dec 19 00:01:46 2010 +0100
@@ -143,15 +143,15 @@
 	jae	L(top)
 
 L(end):	pxor	%mm6, %mm2
-	movq	16(up,n,8), %mm0