[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Mar 5 23:16:19 CET 2011
details: /var/hg/gmp/rev/c260881363d4
changeset: 13999:c260881363d4
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Mar 05 23:16:07 2011 +0100
description:
Write proper feed-in code.
details: /var/hg/gmp/rev/f6e322dd8330
changeset: 14000:f6e322dd8330
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Mar 05 23:16:16 2011 +0100
description:
*** empty log message ***
diffstat:
ChangeLog | 4 +
mpn/x86_64/bdiv_dbm1c.asm | 105 ++++++++++++++++++++-------------------------
2 files changed, 51 insertions(+), 58 deletions(-)
diffs (146 lines):
diff -r 81e561509e1a -r f6e322dd8330 ChangeLog
--- a/ChangeLog Fri Mar 04 22:38:26 2011 +0100
+++ b/ChangeLog Sat Mar 05 23:16:16 2011 +0100
@@ -1,3 +1,7 @@
+2011-03-05 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/bdiv_dbm1c.asm: Write proper feed-in code.
+
2011-03-04 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/addmul_2.asm: Rewrite for linear performance.
diff -r 81e561509e1a -r f6e322dd8330 mpn/x86_64/bdiv_dbm1c.asm
--- a/mpn/x86_64/bdiv_dbm1c.asm Fri Mar 04 22:38:26 2011 +0100
+++ b/mpn/x86_64/bdiv_dbm1c.asm Sat Mar 05 23:16:16 2011 +0100
@@ -1,6 +1,6 @@
dnl x86_64 mpn_bdiv_dbm1.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -24,77 +24,66 @@
C AMD K10 2.25
C Intel P4 12.5
C Intel core2 4
-C Intel corei 3.8
+C Intel NHM 3.75
+C Intel SBR 3.6
C Intel atom 20
C VIA nano 4
C TODO
-C * Do proper 4-way feed-in instead of the current epilogue
+C * Optimise feed-in code.
-C INPUT PARAMETERS shared
-define(`qp', `%rdi')
-define(`up', `%rsi')
-define(`n', `%rdx')
-define(`bd', `%rcx')
-define(`cy', `%r8')
+C INPUT PARAMETERS
+define(`qp', `%rdi')
+define(`up', `%rsi')
+define(`n_param', `%rdx')
+define(`bd', `%rcx')
+define(`cy', `%r8')
+define(`n', `%r9')
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_bdiv_dbm1c)
- mov (%rsi), %rax
- mov %rdx, %r9 C n
+ mov (up), %rax
+ mov n_param, n
+ mov R32(n_param), R32(%r11)
+ mul %rcx
+ lea (up,n,8), up
+ lea (qp,n,8), qp
+ neg n
+ and $3, R32(%r11)
+ jz L(lo0)
+ lea -4(n,%r11), n
+ cmp $2, R32(%r11)
+ jc L(lo1)
+ jz L(lo2)
+ jmp L(lo3)
+ ALIGN(16)
+L(top): mov (up,n,8), %rax
mul %rcx
- sub %rax, %r8
- mov %r8, (%rdi)
+L(lo0): sub %rax, %r8
+ mov %r8, (qp,n,8)
sbb %rdx, %r8
+ mov 8(up,n,8), %rax
+ mul %rcx
+L(lo3): sub %rax, %r8
+ mov %r8, 8(qp,n,8)
+ sbb %rdx, %r8
+ mov 16(up,n,8), %rax
+ mul %rcx
+L(lo2): sub %rax, %r8
+ mov %r8, 16(qp,n,8)
+ sbb %rdx, %r8
+ mov 24(up,n,8), %rax
+ mul %rcx
+L(lo1): sub %rax, %r8
+ mov %r8, 24(qp,n,8)
+ sbb %rdx, %r8
+ add $4, n
+ jnz L(top)
- lea (%rsi,%r9,8), %rsi
- lea (%rdi,%r9,8), %rdi
- neg %r9
- add $4, %r9
- jns L(end)
- ALIGN(16)
-L(top):
- mov -24(%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, -24(%rdi,%r9,8)
- sbb %rdx, %r8
-L(3):
- mov -16(%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, -16(%rdi,%r9,8)
- sbb %rdx, %r8
-L(2):
- mov -8(%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, -8(%rdi,%r9,8)
- sbb %rdx, %r8
-L(1):
- mov (%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, (%rdi,%r9,8)
- sbb %rdx, %r8
-
- add $4, %r9
- js L(top)
-L(end):
- je L(3x)
- cmp $2, %r9
- jg L(ret)
- mov $-1, %r9
- je L(1)
- jmp L(2)
-L(3x):
- dec %r9
- jmp L(3)
-
-L(ret): mov %r8, %rax
+L(end): mov %r8, %rax
ret
EPILOGUE()
More information about the gmp-commit
mailing list