[Gmp-commit] /home/hgfiles/gmp-5.0: Rewrite not to rely on ZF after 'bt' insn.
mercurial at gmplib.org
mercurial at gmplib.org
Sat Nov 6 19:36:23 CET 2010
details: /home/hgfiles/gmp-5.0/rev/d38bf82a975e
changeset: 13435:d38bf82a975e
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Nov 06 19:36:19 2010 +0100
description:
Rewrite not to rely on ZF after 'bt' insn.
diffstat:
ChangeLog | 4 ++
mpn/x86_64/aors_n.asm | 90 +++++++++++++++++++++++++++-----------------------
2 files changed, 52 insertions(+), 42 deletions(-)
diffs (144 lines):
diff -r 1878a7b7e66e -r d38bf82a975e ChangeLog
--- a/ChangeLog Fri May 14 11:40:09 2010 +0200
+++ b/ChangeLog Sat Nov 06 19:36:19 2010 +0100
@@ -1,3 +1,7 @@
+2010-11-06 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/aors_n.asm: Rewrite not to rely on ZF after 'bt' insn.
+
2010-05-14 Torbjorn Granlund <tege at gmplib.org>
* mpn/generic/redc_2.c: Use asm code just for GNU C.
diff -r 1878a7b7e66e -r d38bf82a975e mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm Fri May 14 11:40:09 2010 +0200
+++ b/mpn/x86_64/aors_n.asm Sat Nov 06 19:36:19 2010 +0100
@@ -1,6 +1,7 @@
dnl AMD64 mpn_add_n, mpn_sub_n
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
+dnl Inc.
dnl This file is part of the GNU MP Library.
@@ -20,12 +21,13 @@
include(`../config.m4')
C cycles/limb
-C K8,K9: 1.5
-C K10: 1.5
-C P4: ?
-C P6 core2: 4.9
-C P6 corei7:
-C P6 atom: 4
+C AMD K8,K9 1.5
+C AMD K10 1.5
+C Intel P4 ?
+C Intel core2 4.9
+C Intel corei ?
+C Intel atom 4
+C VIA nano 3.25
C The inner loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
@@ -53,24 +55,57 @@
ALIGN(16)
PROLOGUE(func_nc)
mov R32(n), R32(%rax)
+ shr $2, n
and $3, R32(%rax)
- shr $2, n
bt $0, %r8 C cy flag <- carry parameter
- jz L(1)
- jmp L(ent)
+ jrcxz L(lt4)
+
+ mov (up), %r8
+ mov 8(up), %r9
+ dec n
+ jmp L(mid)
+
EPILOGUE()
ALIGN(16)
PROLOGUE(func)
mov R32(n), R32(%rax)
shr $2, n
- jz L(0)
and $3, R32(%rax)
+ jrcxz L(lt4)
-L(ent): mov (up), %r8
+ mov (up), %r8
mov 8(up), %r9
dec n
jmp L(mid)
+L(lt4): dec R32(%rax)
+ mov (up), %r8
+ jnz L(2)
+ ADCSBB (vp), %r8
+ mov %r8, (rp)
+ adc %eax, %eax
+ ret
+
+L(2): dec R32(%rax)
+ mov 8(up), %r9
+ jnz L(3)
+ ADCSBB (vp), %r8
+ ADCSBB 8(vp), %r9
+ mov %r8, (rp)
+ mov %r9, 8(rp)
+ adc %eax, %eax
+ ret
+
+L(3): mov 16(up), %r10
+ ADCSBB (vp), %r8
+ ADCSBB 8(vp), %r9
+ ADCSBB 16(vp), %r10
+ mov %r8, (rp)
+ mov %r9, 8(rp)
+ mov %r10, 16(rp)
+ setc R8(%rax)
+ ret
+
ALIGN(16)
L(top): ADCSBB (vp), %r8
ADCSBB 8(vp), %r9
@@ -104,36 +139,7 @@
inc R32(%rax)
dec R32(%rax)
- jnz L(1)
+ jnz L(lt4)
adc %eax, %eax
ret
-
-L(0): test R32(%rax), R32(%rax)
-L(1): dec R32(%rax)
- mov (up), %r8
- jnz L(2)
- ADCSBB (vp), %r8
- mov %r8, (rp)
- adc %eax, %eax
- ret
-
-L(2): dec R32(%rax)
- mov 8(up), %r9
- jnz L(3)
- ADCSBB (vp), %r8
- ADCSBB 8(vp), %r9
- mov %r8, (rp)
- mov %r9, 8(rp)
- adc %eax, %eax
- ret
-
-L(3): mov 16(up), %r10
- ADCSBB (vp), %r8
- ADCSBB 8(vp), %r9
- ADCSBB 16(vp), %r10
- mov %r8, (rp)
- mov %r9, 8(rp)
- mov %r10, 16(rp)
- setc %al
- ret
EPILOGUE()
More information about the gmp-commit
mailing list