[Gmp-commit] /var/hg/gmp: Rewrite default x86_64 copying code.

mercurial at gmplib.org mercurial at gmplib.org
Tue Apr 17 00:35:59 CEST 2012


details:   /var/hg/gmp/rev/f8e3d3517187
changeset: 14841:f8e3d3517187
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Apr 17 00:35:57 2012 +0200
description:
Rewrite default x86_64 copying code.

diffstat:

 ChangeLog            |   5 ++
 mpn/x86_64/copyd.asm |  86 ++++++++++++++++++++++++------------------------
 mpn/x86_64/copyi.asm |  92 ++++++++++++++++++++++++++--------------------------
 3 files changed, 94 insertions(+), 89 deletions(-)

diffs (250 lines):

diff -r dc13f686713f -r f8e3d3517187 ChangeLog
--- a/ChangeLog	Mon Apr 16 21:52:55 2012 +0200
+++ b/ChangeLog	Tue Apr 17 00:35:57 2012 +0200
@@ -1,3 +1,8 @@
+2012-04-17  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/copyd.asm: Rewrite.
+	* mpn/x86_64/copyi.asm: Rewrite.
+
 2012-04-16  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/fastsse/lshift-movdqu2.asm: Add DOS entry/exit sequences.
diff -r dc13f686713f -r f8e3d3517187 mpn/x86_64/copyd.asm
--- a/mpn/x86_64/copyd.asm	Mon Apr 16 21:52:55 2012 +0200
+++ b/mpn/x86_64/copyd.asm	Tue Apr 17 00:35:57 2012 +0200
@@ -1,6 +1,6 @@
 dnl  AMD64 mpn_copyd -- copy limb vector, decrementing.
 
-dnl  Copyright 2003, 2005, 2007, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -19,64 +19,64 @@
 
 include(`../config.m4')
 
-
-C	    cycles/limb
+C	     cycles/limb
 C AMD K8,K9	 1
 C AMD K10	 1
-C Intel P4	 2.8
+C AMD bd1	 1.36
+C AMD bobcat	 1.71
+C Intel P4	 2-3
 C Intel core2	 1
-C Intel corei	 1
+C Intel NHM	 1
+C Intel SBR	 1
 C Intel atom	 2
-C VIA nano	 2.14
+C VIA nano	 2
 
 
-C INPUT PARAMETERS
-C rp	rdi
-C up	rsi
-C n	rdx
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
 
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
 
 ABI_SUPPORT(DOS64)
 ABI_SUPPORT(STD64)
 
 ASM_START()
 	TEXT
-	ALIGN(16)
+	ALIGN(64)
 PROLOGUE(mpn_copyd)
-	DOS64_ENTRY(3)
-	leaq	-8(up,n,8), up
-	leaq	(rp,n,8), rp
-	subq	$4, n
+	lea	-8(up,n,8), up
+	lea	(rp,n,8), rp
+	sub	$4, n
 	jc	L(end)
-	ALIGN(16)
-L(oop):	movq	(up), %r8
-	movq	-8(up), %r9
-	leaq	-32(rp), rp
-	movq	-16(up), %r10
-	movq	-24(up), %r11
-	leaq	-32(up), up
-	movq	%r8, 24(rp)
-	movq	%r9, 16(rp)
-	subq	$4, n
-	movq	%r10, 8(rp)
-	movq	%r11, (rp)
-	jnc	L(oop)
+	nop
 
-L(end):	shrl	R32(%rdx)		C edx = lowpart(n)
+L(top):	mov	(up), %rax
+	mov	-8(up), %r9
+	lea	-32(rp), rp
+	mov	-16(up), %r10
+	mov	-24(up), %r11
+	lea	-32(up), up
+	mov	%rax, 24(rp)
+	mov	%r9, 16(rp)
+	sub	$4, n
+	mov	%r10, 8(rp)
+	mov	%r11, (rp)
+	jnc	L(top)
+
+L(end):	shr	R32(n)
 	jnc	1f
-	movq	(up), %r8
-	movq	%r8, -8(rp)
-	leaq	-8(rp), rp
-	leaq	-8(up), up
-1:	shrl	R32(%rdx)		C edx = lowpart(n)
+	mov	(up), %rax
+	mov	%rax, -8(rp)
+	lea	-8(rp), rp
+	lea	-8(up), up
+1:	shr	R32(n)
 	jnc	1f
-	movq	(up), %r8
-	movq	-8(up), %r9
-	movq	%r8, -8(rp)
-	movq	%r9, -16(rp)
-1:	DOS64_EXIT()
-	ret
+	mov	(up), %rax
+	mov	-8(up), %r9
+	mov	%rax, -8(rp)
+	mov	%r9, -16(rp)
+1:	ret
 EPILOGUE()
diff -r dc13f686713f -r f8e3d3517187 mpn/x86_64/copyi.asm
--- a/mpn/x86_64/copyi.asm	Mon Apr 16 21:52:55 2012 +0200
+++ b/mpn/x86_64/copyi.asm	Tue Apr 17 00:35:57 2012 +0200
@@ -1,6 +1,6 @@
 dnl  AMD64 mpn_copyi -- copy limb vector, incrementing.
 
-dnl  Copyright 2003, 2005, 2007, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -19,63 +19,63 @@
 
 include(`../config.m4')
 
+C	     cycles/limb
+C AMD K8,K9	 1
+C AMD K10	 1
+C AMD bd1	 1.36
+C AMD bobcat	 1.71
+C Intel P4	 2-3
+C Intel core2	 1
+C Intel NHM	 1
+C Intel SBR	 1
+C Intel atom	 2
+C VIA nano	 2
 
-C	    cycles/limb
-C AMD K8,K9	 1.25
-C AMD K10	 1.5
-C Intel P4	 2.8
-C Intel core2	 1
-C Intel corei	 1
-C Intel atom	 2
-C VIA nano	 2.14
 
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
 
-C INPUT PARAMETERS
-C rp	rdi
-C up	rsi
-C n	rdx
-
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
 
 ABI_SUPPORT(DOS64)
 ABI_SUPPORT(STD64)
 
 ASM_START()
 	TEXT
-	ALIGN(16)
+	ALIGN(64)
+	.byte	0,0,0,0,0,0
 PROLOGUE(mpn_copyi)
-	DOS64_ENTRY(3)
-	leaq	-8(rp), rp
-	subq	$4, n
+	lea	-8(rp), rp
+	sub	$4, n
 	jc	L(end)
-	ALIGN(16)
-L(oop):	movq	(up), %r8
-	movq	8(up), %r9
-	leaq	32(rp), rp
-	movq	16(up), %r10
-	movq	24(up), %r11
-	leaq	32(up), up
-	movq	%r8, -24(rp)
-	movq	%r9, -16(rp)
-	subq	$4, n
-	movq	%r10, -8(rp)
-	movq	%r11, (rp)
-	jnc	L(oop)
 
-L(end):	shrl	R32(%rdx)		C edx = lowpart(n)
+L(top):	mov	(up), %rax
+	mov	8(up), %r9
+	lea	32(rp), rp
+	mov	16(up), %r10
+	mov	24(up), %r11
+	lea	32(up), up
+	mov	%rax, -24(rp)
+	mov	%r9, -16(rp)
+	sub	$4, n
+	mov	%r10, -8(rp)
+	mov	%r11, (rp)
+	jnc	L(top)
+
+L(end):	shr	R32(n)
 	jnc	1f
-	movq	(up), %r8
-	movq	%r8, 8(rp)
-	leaq	8(rp), rp
-	leaq	8(up), up
-1:	shrl	R32(%rdx)		C edx = lowpart(n)
+	mov	(up), %rax
+	mov	%rax, 8(rp)
+	lea	8(rp), rp
+	lea	8(up), up
+1:	shr	R32(n)
 	jnc	1f
-	movq	(up), %r8
-	movq	8(up), %r9
-	movq	%r8, 8(rp)
-	movq	%r9, 16(rp)
-1:	DOS64_EXIT()
-	ret
+	mov	(up), %rax
+	mov	8(up), %r9
+	mov	%rax, 8(rp)
+	mov	%r9, 16(rp)
+1:	ret
 EPILOGUE()


More information about the gmp-commit mailing list