[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Mar 5 23:16:19 CET 2011


details:   /var/hg/gmp/rev/c260881363d4
changeset: 13999:c260881363d4
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Mar 05 23:16:07 2011 +0100
description:
Write proper feed-in code.

details:   /var/hg/gmp/rev/f6e322dd8330
changeset: 14000:f6e322dd8330
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Mar 05 23:16:16 2011 +0100
description:
*** empty log message ***

diffstat:

 ChangeLog                 |    4 +
 mpn/x86_64/bdiv_dbm1c.asm |  105 ++++++++++++++++++++-------------------------
 2 files changed, 51 insertions(+), 58 deletions(-)

diffs (146 lines):

diff -r 81e561509e1a -r f6e322dd8330 ChangeLog
--- a/ChangeLog	Fri Mar 04 22:38:26 2011 +0100
+++ b/ChangeLog	Sat Mar 05 23:16:16 2011 +0100
@@ -1,3 +1,7 @@
+2011-03-05  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/bdiv_dbm1c.asm: Write proper feed-in code.
+
 2011-03-04  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/addmul_2.asm: Rewrite for linear performance.
diff -r 81e561509e1a -r f6e322dd8330 mpn/x86_64/bdiv_dbm1c.asm
--- a/mpn/x86_64/bdiv_dbm1c.asm	Fri Mar 04 22:38:26 2011 +0100
+++ b/mpn/x86_64/bdiv_dbm1c.asm	Sat Mar 05 23:16:16 2011 +0100
@@ -1,6 +1,6 @@
 dnl  x86_64 mpn_bdiv_dbm1.
 
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -24,77 +24,66 @@
 C AMD K10	 2.25
 C Intel P4	12.5
 C Intel core2	 4
-C Intel corei	 3.8
+C Intel NHM	 3.75
+C Intel SBR	 3.6
 C Intel atom	20
 C VIA nano	 4
 
 C TODO
-C  * Do proper 4-way feed-in instead of the current epilogue
+C  * Optimise feed-in code.
 
-C INPUT PARAMETERS shared
-define(`qp',	`%rdi')
-define(`up',	`%rsi')
-define(`n',	`%rdx')
-define(`bd',	`%rcx')
-define(`cy',	`%r8')
+C INPUT PARAMETERS
+define(`qp',	  `%rdi')
+define(`up',	  `%rsi')
+define(`n_param', `%rdx')
+define(`bd',	  `%rcx')
+define(`cy',	  `%r8')
 
+define(`n',       `%r9')
 
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_bdiv_dbm1c)
-	mov	(%rsi), %rax
-	mov	%rdx, %r9		C n
+	mov	(up), %rax
+	mov	n_param, n
+	mov	R32(n_param), R32(%r11)
+	mul	%rcx
+	lea	(up,n,8), up
+	lea	(qp,n,8), qp
+	neg	n
+	and	$3, R32(%r11)
+	jz	L(lo0)
+	lea	-4(n,%r11), n
+	cmp	$2, R32(%r11)
+	jc	L(lo1)
+	jz	L(lo2)
+	jmp	L(lo3)
 
+	ALIGN(16)
+L(top):	mov	(up,n,8), %rax
 	mul	%rcx
-	sub	%rax, %r8
-	mov	%r8, (%rdi)
+L(lo0):	sub	%rax, %r8
+	mov	%r8, (qp,n,8)
 	sbb	%rdx, %r8
+	mov	8(up,n,8), %rax
+	mul	%rcx
+L(lo3):	sub	%rax, %r8
+	mov	%r8, 8(qp,n,8)
+	sbb	%rdx, %r8
+	mov	16(up,n,8), %rax
+	mul	%rcx
+L(lo2):	sub	%rax, %r8
+	mov	%r8, 16(qp,n,8)
+	sbb	%rdx, %r8
+	mov	24(up,n,8), %rax
+	mul	%rcx
+L(lo1):	sub	%rax, %r8
+	mov	%r8, 24(qp,n,8)
+	sbb	%rdx, %r8
+	add	$4, n
+	jnz	L(top)
 
-	lea	(%rsi,%r9,8), %rsi
-	lea	(%rdi,%r9,8), %rdi
-	neg	%r9
-	add	$4, %r9
-	jns	L(end)
-	ALIGN(16)
-L(top):
-	mov	-24(%rsi,%r9,8), %rax
-	mul	%rcx
-	sub	%rax, %r8
-	mov	%r8, -24(%rdi,%r9,8)
-	sbb	%rdx, %r8
-L(3):
-	mov	-16(%rsi,%r9,8), %rax
-	mul	%rcx
-	sub	%rax, %r8
-	mov	%r8, -16(%rdi,%r9,8)
-	sbb	%rdx, %r8
-L(2):
-	mov	-8(%rsi,%r9,8), %rax
-	mul	%rcx
-	sub	%rax, %r8
-	mov	%r8, -8(%rdi,%r9,8)
-	sbb	%rdx, %r8
-L(1):
-	mov	(%rsi,%r9,8), %rax
-	mul	%rcx
-	sub	%rax, %r8
-	mov	%r8, (%rdi,%r9,8)
-	sbb	%rdx, %r8
-
-	add	$4, %r9
-	js	L(top)
-L(end):
-	je	L(3x)
-	cmp	$2, %r9
-	jg	L(ret)
-	mov	$-1, %r9
-	je	L(1)
-	jmp	L(2)
-L(3x):
-	dec	%r9
-	jmp	L(3)
-
-L(ret):	mov	%r8, %rax
+L(end):	mov	%r8, %rax
 	ret
 EPILOGUE()


More information about the gmp-commit mailing list