[Gmp-commit] /var/hg/gmp: Shorten critical path.

mercurial at gmplib.org mercurial at gmplib.org
Tue Mar 13 20:24:43 CET 2012


details:   /var/hg/gmp/rev/d16912b36656
changeset: 14756:d16912b36656
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Mar 13 20:24:40 2012 +0100
description:
Shorten critical path.

diffstat:

 ChangeLog                  |   2 ++
 mpn/x86_64/core2/gcd_1.asm |  41 +++++++++++++++++++++--------------------
 2 files changed, 23 insertions(+), 20 deletions(-)

diffs (81 lines):

diff -r 090afe7a7fcd -r d16912b36656 ChangeLog
--- a/ChangeLog	Tue Mar 13 15:57:10 2012 +0100
+++ b/ChangeLog	Tue Mar 13 20:24:40 2012 +0100
@@ -2,6 +2,8 @@
 
 	* mpn/x86_64/core2/gcd_1.asm: Add hack to support fat builds.
 
+	* mpn/x86_64/core2/gcd_1.asm: Shorten critical path.
+
 2012-03-12  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/core2/gcd_1.asm: New file.
diff -r 090afe7a7fcd -r d16912b36656 mpn/x86_64/core2/gcd_1.asm
--- a/mpn/x86_64/core2/gcd_1.asm	Tue Mar 13 15:57:10 2012 +0100
+++ b/mpn/x86_64/core2/gcd_1.asm	Tue Mar 13 20:24:40 2012 +0100
@@ -28,16 +28,18 @@
 C AMD K8,K9	 8.5
 C AMD K10	 5
 C AMD bd1	 5
-C AMD bobcat	11
-C Intel P4	24 
-C Intel core2	 5.5
-C Intel NHM	 6
-C Intel SBR	 6
+C AMD bobcat	10
+C Intel P4	18 
+C Intel core2	 4.3
+C Intel NHM	 5
+C Intel SBR	 5
 C Intel atom	17
-C VIA nano	 6.5
-
+C VIA nano	 5.3
 C Numbers measured with: speed -CD -s1-64 mpn_gcd_1
 
+C TODO
+C  * Optimise inner-loop for specific CPUs.  The code relies too much on OoO
+C    execution.
 
 C INPUT PARAMETERS
 define(`up',    `%rdi')
@@ -93,27 +95,26 @@
 	pop	%rdx
 	pop	%r8
 
+	bsf	%rax, %rcx
+
 	test	%rax, %rax
-
-	mov	%rax, %rcx
 	jnz	L(mid)
 
-	mov	%rdx, %rax
 	jmp	L(done)
 
-	ALIGN(16)		C			K10	C2	NHM	SBR
-L(top):	cmovc	%r10, %rax	C if x-y carried	0,7	0,6	0,7	0
-	cmovc	%rcx, %rdx	C use x,y-x		0	1	1	1
-L(mid):	bsf	%rax, %rcx	C			1	2	2	2
-	mov	%rdx, %r10	C			1	3	3	3
-	shr	R8(%rcx), %rax	C			5	4	5	5
-	mov	%rax, %rcx	C			6	5	6	7
-	sub	%rax, %r10	C			6	5	6	7
-	sub	%rdx, %rax	C			6	5	6	7
+	ALIGN(16)		C		K10	BD	C2	NHM	SBR
+L(top):	cmovc	%r10, %rax	C if x-y < 0	0,6	0,5	0,6	0,5	0,6
+	cmovc	%r9, %rdx	C use x,y-x	0	0	2	1	1
+	bsf	%r10, %rcx	C		0	0	0	0	0
+L(mid):	mov	%rdx, %r10	C		1	1	4	3	3
+	shr	R8(%rcx), %rax	C		4	3	2	3	3
+	mov	%rax, %r9	C		5	4	3	4	5
+	sub	%rax, %r10	C		5	4	5	4	5
+	sub	%rdx, %rax	C		5	4	4	4	5
 	jnz	L(top)		C
 
-	mov	%rcx, %rax
 L(done):
+	mov	%rdx, %rax
 	mov	%r8, %rcx
 	shl	R8(%rcx), %rax
 	DOS64_EXIT()


More information about the gmp-commit mailing list