[Gmp-commit] /home/hgfiles/gmp-4.3: Don't rely on ZF after 'bt' insn.
mercurial at gmplib.org
mercurial at gmplib.org
Thu Nov 11 14:10:03 CET 2010
details: /home/hgfiles/gmp-4.3/rev/1a2742d53add
changeset: 12553:1a2742d53add
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Nov 11 14:09:58 2010 +0100
description:
Don't rely on ZF after 'bt' insn.
diffstat:
ChangeLog | 7 ++++
mpn/x86_64/aors_n.asm | 77 ++++++++++++++++++++++++---------------------
mpn/x86_64/atom/aors_n.asm | 23 +++++++-----
3 files changed, 61 insertions(+), 46 deletions(-)
diffs (197 lines):
diff -r 23bc00ec2a06 -r 1a2742d53add ChangeLog
--- a/ChangeLog Thu Nov 11 12:02:20 2010 +0100
+++ b/ChangeLog Thu Nov 11 14:09:58 2010 +0100
@@ -1,8 +1,15 @@
2010-11-11 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/x86_64/atom/aors_n.asm: Don't rely on ZF after 'bt' insn.
+ Use 64-bit 'test' to support operands of 2^32 limbs and more.
+
* doc/gmp.texi (Low-level Functions): Remove documentation for
non-existing function mpn_sqr_n.
+2010-11-06 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/aors_n.asm: Rewrite not to rely on ZF after 'bt' insn.
+
2010-02-08 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/x86/atom/gmp-mparam.h: New file.
diff -r 23bc00ec2a06 -r 1a2742d53add mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm Thu Nov 11 12:02:20 2010 +0100
+++ b/mpn/x86_64/aors_n.asm Thu Nov 11 14:09:58 2010 +0100
@@ -1,6 +1,7 @@
dnl AMD64 mpn_add_n, mpn_sub_n
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
+dnl Inc.
dnl This file is part of the GNU MP Library.
@@ -52,24 +53,57 @@
ALIGN(16)
PROLOGUE(func_nc)
mov R32(n), R32(%rax)
+ shr $2, n
and $3, R32(%rax)
- shr $2, n
bt $0, %r8 C cy flag <- carry parameter
- jz L(1)
- jmp L(ent)
+ jrcxz L(lt4)
+
+ mov (up), %r8
+ mov 8(up), %r9
+ dec n
+ jmp L(mid)
+
EPILOGUE()
ALIGN(16)
PROLOGUE(func)
mov R32(n), R32(%rax)
shr $2, n
- jz L(0)
and $3, R32(%rax)
+ jrcxz L(lt4)
-L(ent): mov (up), %r8
+ mov (up), %r8
mov 8(up), %r9
dec n
jmp L(mid)
+L(lt4): dec R32(%rax)
+ mov (up), %r8
+ jnz L(2)
+ ADCSBB (vp), %r8
+ mov %r8, (rp)
+ adc %eax, %eax
+ ret
+
+L(2): dec R32(%rax)
+ mov 8(up), %r9
+ jnz L(3)
+ ADCSBB (vp), %r8
+ ADCSBB 8(vp), %r9
+ mov %r8, (rp)
+ mov %r9, 8(rp)
+ adc %eax, %eax
+ ret
+
+L(3): mov 16(up), %r10
+ ADCSBB (vp), %r8
+ ADCSBB 8(vp), %r9
+ ADCSBB 16(vp), %r10
+ mov %r8, (rp)
+ mov %r9, 8(rp)
+ mov %r10, 16(rp)
+ setc R8(%rax)
+ ret
+
ALIGN(16)
L(top): ADCSBB (vp), %r8
ADCSBB 8(vp), %r9
@@ -103,36 +137,7 @@
inc R32(%rax)
dec R32(%rax)
- jnz L(1)
+ jnz L(lt4)
adc %eax, %eax
ret
-
-L(0): test R32(%rax), R32(%rax)
-L(1): dec R32(%rax)
- mov (up), %r8
- jnz L(2)
- ADCSBB (vp), %r8
- mov %r8, (rp)
- adc %eax, %eax
- ret
-
-L(2): dec R32(%rax)
- mov 8(up), %r9
- jnz L(3)
- ADCSBB (vp), %r8
- ADCSBB 8(vp), %r9
- mov %r8, (rp)
- mov %r9, 8(rp)
- adc %eax, %eax
- ret
-
-L(3): mov 16(up), %r10
- ADCSBB (vp), %r8
- ADCSBB 8(vp), %r9
- ADCSBB 16(vp), %r10
- mov %r8, (rp)
- mov %r9, 8(rp)
- mov %r10, 16(rp)
- setc %al
- ret
EPILOGUE()
diff -r 23bc00ec2a06 -r 1a2742d53add mpn/x86_64/atom/aors_n.asm
--- a/mpn/x86_64/atom/aors_n.asm Thu Nov 11 12:02:20 2010 +0100
+++ b/mpn/x86_64/atom/aors_n.asm Thu Nov 11 14:09:58 2010 +0100
@@ -1,6 +1,6 @@
dnl X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -63,15 +63,16 @@
jg L(b3)
L(b1): mov (%rsi), %r10
- test R32(%rcx), R32(%rcx)
- bt $0, R32(%r8)
+ test %rcx, %rcx
jnz L(gt1)
+ shr R32(%r8) C Set CF from argument
ADCSBB (%rdx), %r10
mov %r10, (%rdi)
mov R32(%rcx), R32(%rax) C zero rax
adc R32(%rax), R32(%rax)
ret
-L(gt1): ADCSBB (%rdx), %r10
+L(gt1): shr R32(%r8)
+ ADCSBB (%rdx), %r10
mov 8(%rsi), %r11
lea 16(%rsi), %rsi
lea -16(%rdx), %rdx
@@ -81,12 +82,13 @@
L(b2): mov (%rsi), %r9
mov 8(%rsi), %r10
lea -8(%rdx), %rdx
- test R32(%rcx), R32(%rcx)
- bt $0, R32(%r8)
+ test %rcx, %rcx
jnz L(gt2)
+ shr R32(%r8)
lea -40(%rdi), %rdi
jmp L(e2)
-L(gt2): ADCSBB 8(%rdx), %r9
+L(gt2): shr R32(%r8)
+ ADCSBB 8(%rdx), %r9
mov 16(%rsi), %r11
lea -8(%rsi), %rsi
lea -8(%rdi), %rdi
@@ -95,12 +97,13 @@
L(b3): mov (%rsi), %rax
mov 8(%rsi), %r9
mov 16(%rsi), %r10
- test R32(%rcx), R32(%rcx)
- bt $0, %r8
+ test %rcx, %rcx
jnz L(gt3)
+ shr R32(%r8)
lea -32(%rdi), %rdi
jmp L(e3)
-L(gt3): ADCSBB (%rdx), %rax
+L(gt3): shr R32(%r8)
+ ADCSBB (%rdx), %rax
jmp L(m3)
L(b0): mov (%rsi), %r11
More information about the gmp-commit
mailing list