[Gmp-commit] /var/hg/gmp: Rewrite default x86_64 copying code.
mercurial at gmplib.org
mercurial at gmplib.org
Tue Apr 17 00:35:59 CEST 2012
details: /var/hg/gmp/rev/f8e3d3517187
changeset: 14841:f8e3d3517187
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Apr 17 00:35:57 2012 +0200
description:
Rewrite default x86_64 copying code.
diffstat:
ChangeLog | 5 ++
mpn/x86_64/copyd.asm | 86 ++++++++++++++++++++++++------------------------
mpn/x86_64/copyi.asm | 92 ++++++++++++++++++++++++++--------------------------
3 files changed, 94 insertions(+), 89 deletions(-)
diffs (250 lines):
diff -r dc13f686713f -r f8e3d3517187 ChangeLog
--- a/ChangeLog Mon Apr 16 21:52:55 2012 +0200
+++ b/ChangeLog Tue Apr 17 00:35:57 2012 +0200
@@ -1,3 +1,8 @@
+2012-04-17 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/copyd.asm: Rewrite.
+ * mpn/x86_64/copyi.asm: Rewrite.
+
2012-04-16 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/fastsse/lshift-movdqu2.asm: Add DOS entry/exit sequences.
diff -r dc13f686713f -r f8e3d3517187 mpn/x86_64/copyd.asm
--- a/mpn/x86_64/copyd.asm Mon Apr 16 21:52:55 2012 +0200
+++ b/mpn/x86_64/copyd.asm Tue Apr 17 00:35:57 2012 +0200
@@ -1,6 +1,6 @@
dnl AMD64 mpn_copyd -- copy limb vector, decrementing.
-dnl Copyright 2003, 2005, 2007, 2011 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -19,64 +19,64 @@
include(`../config.m4')
-
-C cycles/limb
+C cycles/limb
C AMD K8,K9 1
C AMD K10 1
-C Intel P4 2.8
+C AMD bd1 1.36
+C AMD bobcat 1.71
+C Intel P4 2-3
C Intel core2 1
-C Intel corei 1
+C Intel NHM 1
+C Intel SBR 1
C Intel atom 2
-C VIA nano 2.14
+C VIA nano 2
-C INPUT PARAMETERS
-C rp rdi
-C up rsi
-C n rdx
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
PROLOGUE(mpn_copyd)
- DOS64_ENTRY(3)
- leaq -8(up,n,8), up
- leaq (rp,n,8), rp
- subq $4, n
+ lea -8(up,n,8), up
+ lea (rp,n,8), rp
+ sub $4, n
jc L(end)
- ALIGN(16)
-L(oop): movq (up), %r8
- movq -8(up), %r9
- leaq -32(rp), rp
- movq -16(up), %r10
- movq -24(up), %r11
- leaq -32(up), up
- movq %r8, 24(rp)
- movq %r9, 16(rp)
- subq $4, n
- movq %r10, 8(rp)
- movq %r11, (rp)
- jnc L(oop)
+ nop
-L(end): shrl R32(%rdx) C edx = lowpart(n)
+L(top): mov (up), %rax
+ mov -8(up), %r9
+ lea -32(rp), rp
+ mov -16(up), %r10
+ mov -24(up), %r11
+ lea -32(up), up
+ mov %rax, 24(rp)
+ mov %r9, 16(rp)
+ sub $4, n
+ mov %r10, 8(rp)
+ mov %r11, (rp)
+ jnc L(top)
+
+L(end): shr R32(n)
jnc 1f
- movq (up), %r8
- movq %r8, -8(rp)
- leaq -8(rp), rp
- leaq -8(up), up
-1: shrl R32(%rdx) C edx = lowpart(n)
+ mov (up), %rax
+ mov %rax, -8(rp)
+ lea -8(rp), rp
+ lea -8(up), up
+1: shr R32(n)
jnc 1f
- movq (up), %r8
- movq -8(up), %r9
- movq %r8, -8(rp)
- movq %r9, -16(rp)
-1: DOS64_EXIT()
- ret
+ mov (up), %rax
+ mov -8(up), %r9
+ mov %rax, -8(rp)
+ mov %r9, -16(rp)
+1: ret
EPILOGUE()
diff -r dc13f686713f -r f8e3d3517187 mpn/x86_64/copyi.asm
--- a/mpn/x86_64/copyi.asm Mon Apr 16 21:52:55 2012 +0200
+++ b/mpn/x86_64/copyi.asm Tue Apr 17 00:35:57 2012 +0200
@@ -1,6 +1,6 @@
dnl AMD64 mpn_copyi -- copy limb vector, incrementing.
-dnl Copyright 2003, 2005, 2007, 2011 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -19,63 +19,63 @@
include(`../config.m4')
+C cycles/limb
+C AMD K8,K9 1
+C AMD K10 1
+C AMD bd1 1.36
+C AMD bobcat 1.71
+C Intel P4 2-3
+C Intel core2 1
+C Intel NHM 1
+C Intel SBR 1
+C Intel atom 2
+C VIA nano 2
-C cycles/limb
-C AMD K8,K9 1.25
-C AMD K10 1.5
-C Intel P4 2.8
-C Intel core2 1
-C Intel corei 1
-C Intel atom 2
-C VIA nano 2.14
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
-C INPUT PARAMETERS
-C rp rdi
-C up rsi
-C n rdx
-
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
+ .byte 0,0,0,0,0,0
PROLOGUE(mpn_copyi)
- DOS64_ENTRY(3)
- leaq -8(rp), rp
- subq $4, n
+ lea -8(rp), rp
+ sub $4, n
jc L(end)
- ALIGN(16)
-L(oop): movq (up), %r8
- movq 8(up), %r9
- leaq 32(rp), rp
- movq 16(up), %r10
- movq 24(up), %r11
- leaq 32(up), up
- movq %r8, -24(rp)
- movq %r9, -16(rp)
- subq $4, n
- movq %r10, -8(rp)
- movq %r11, (rp)
- jnc L(oop)
-L(end): shrl R32(%rdx) C edx = lowpart(n)
+L(top): mov (up), %rax
+ mov 8(up), %r9
+ lea 32(rp), rp
+ mov 16(up), %r10
+ mov 24(up), %r11
+ lea 32(up), up
+ mov %rax, -24(rp)
+ mov %r9, -16(rp)
+ sub $4, n
+ mov %r10, -8(rp)
+ mov %r11, (rp)
+ jnc L(top)
+
+L(end): shr R32(n)
jnc 1f
- movq (up), %r8
- movq %r8, 8(rp)
- leaq 8(rp), rp
- leaq 8(up), up
-1: shrl R32(%rdx) C edx = lowpart(n)
+ mov (up), %rax
+ mov %rax, 8(rp)
+ lea 8(rp), rp
+ lea 8(up), up
+1: shr R32(n)
jnc 1f
- movq (up), %r8
- movq 8(up), %r9
- movq %r8, 8(rp)
- movq %r9, 16(rp)
-1: DOS64_EXIT()
- ret
+ mov (up), %rax
+ mov 8(up), %r9
+ mov %rax, 8(rp)
+ mov %r9, 16(rp)
+1: ret
EPILOGUE()
More information about the gmp-commit
mailing list