[Gmp-commit] /home/hgfiles/gmp: Move mpn/x86_64/atom/aors_n.asm to mpn/x86_64...
mercurial at gmplib.org
mercurial at gmplib.org
Sat Feb 5 14:51:46 CET 2011
details: /home/hgfiles/gmp/rev/7cba880ba762
changeset: 13827:7cba880ba762
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 05 14:51:43 2011 +0100
description:
Move mpn/x86_64/atom/aors_n.asm to mpn/x86_64/coreisbr, use it for both CPUs.
diffstat:
ChangeLog | 10 ++-
mpn/x86_64/atom/aors_n.asm | 129 +----------------------------------
mpn/x86_64/coreisbr/aors_n.asm | 148 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 159 insertions(+), 128 deletions(-)
diffs (truncated from 315 to 300 lines):
diff -r 1f58740e2052 -r 7cba880ba762 ChangeLog
--- a/ChangeLog Sat Feb 05 09:45:46 2011 +0100
+++ b/ChangeLog Sat Feb 05 14:51:43 2011 +0100
@@ -1,8 +1,14 @@
+2011-02-05 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/coreisbr/aors_n.asm: New file, based on old
+ atom/aors_n.asm.
+ * mpn/x86_64/atom/aors_n.asm: Grab coreisbr/aors_n.asm.
+
2011-02-05 Marco Bodrato <bodrato at mail.dm.unipi.it>
* gmp-impl.h (mpn_toom6_mul_n_itch): Handle threshold == zero.
- (mpn_toom8_mul_n_itch): Likewise.
- (MPN_TOOM6H_MIN, MPN_TOOM8H_MIN): Define.
+ (mpn_toom8_mul_n_itch): Likewise.
+ (MPN_TOOM6H_MIN, MPN_TOOM8H_MIN): Define.
* tests/mpn/t-toom6h.c: No tests below MPN_TOOM6H_MIN.
* tests/mpn/t-toom8h.c: No tests below MPN_TOOM8H_MIN.
diff -r 1f58740e2052 -r 7cba880ba762 mpn/x86_64/atom/aors_n.asm
--- a/mpn/x86_64/atom/aors_n.asm Sat Feb 05 09:45:46 2011 +0100
+++ b/mpn/x86_64/atom/aors_n.asm Sat Feb 05 14:51:43 2011 +0100
@@ -1,6 +1,7 @@
dnl X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
-dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -19,129 +20,5 @@
include(`../config.m4')
-
-C cycles/limb
-C AMD K8,K9 1.85
-C AMD K10 ?
-C Intel P4 ?
-C Intel core2 ?
-C Intel corei ?
-C Intel atom 3
-C VIA nano ?
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n', `%rcx')
-define(`cy', `%r8') C (only for mpn_add_nc)
-
-ifdef(`OPERATION_add_n', `
- define(ADCSBB, adc)
- define(func, mpn_add_n)
- define(func_nc, mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
- define(ADCSBB, sbb)
- define(func, mpn_sub_n)
- define(func_nc, mpn_sub_nc)')
-
MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(func_nc)
- jmp L(ent)
-EPILOGUE()
-PROLOGUE(func)
- xor %r8, %r8
-L(ent):
- mov R32(%rcx), R32(%rax)
- shr $2, %rcx
- and $3, R32(%rax)
- jz L(b0)
- cmp $2, R32(%rax)
- jz L(b2)
- jg L(b3)
-
-L(b1): mov (%rsi), %r10
- test %rcx, %rcx
- jnz L(gt1)
- shr R32(%r8) C Set CF from argument
- ADCSBB (%rdx), %r10
- mov %r10, (%rdi)
- mov R32(%rcx), R32(%rax) C zero rax
- adc R32(%rax), R32(%rax)
- ret
-L(gt1): shr R32(%r8)
- ADCSBB (%rdx), %r10
- mov 8(%rsi), %r11
- lea 16(%rsi), %rsi
- lea -16(%rdx), %rdx
- lea -16(%rdi), %rdi
- jmp L(m1)
-
-L(b2): mov (%rsi), %r9
- mov 8(%rsi), %r10
- lea -8(%rdx), %rdx
- test %rcx, %rcx
- jnz L(gt2)
- shr R32(%r8)
- lea -40(%rdi), %rdi
- jmp L(e2)
-L(gt2): shr R32(%r8)
- ADCSBB 8(%rdx), %r9
- mov 16(%rsi), %r11
- lea -8(%rsi), %rsi
- lea -8(%rdi), %rdi
- jmp L(m2)
-
-L(b3): mov (%rsi), %rax
- mov 8(%rsi), %r9
- mov 16(%rsi), %r10
- test %rcx, %rcx
- jnz L(gt3)
- shr R32(%r8)
- lea -32(%rdi), %rdi
- jmp L(e3)
-L(gt3): shr R32(%r8)
- ADCSBB (%rdx), %rax
- jmp L(m3)
-
-L(b0): mov (%rsi), %r11
- neg R32(%r8)
- lea -24(%rdx), %rdx
- lea -24(%rdi), %rdi
- lea 8(%rsi), %rsi
- jmp L(m0)
-
- ALIGN(8)
-L(top): mov %r11, 24(%rdi)
- ADCSBB (%rdx), %rax
- lea 32(%rdi), %rdi
-L(m3): mov %rax, (%rdi)
- ADCSBB 8(%rdx), %r9
- mov 24(%rsi), %r11
-L(m2): mov %r9, 8(%rdi)
- ADCSBB 16(%rdx), %r10
- lea 32(%rsi), %rsi
-L(m1): mov %r10, 16(%rdi)
-L(m0): ADCSBB 24(%rdx), %r11
- mov (%rsi), %rax
- mov 8(%rsi), %r9
- lea 32(%rdx), %rdx
- dec %rcx
- mov 16(%rsi), %r10
- jnz L(top)
-
- mov %r11, 24(%rdi)
-L(e3): ADCSBB (%rdx), %rax
- mov %rax, 32(%rdi)
-L(e2): ADCSBB 8(%rdx), %r9
- mov %r9, 40(%rdi)
-L(e1): ADCSBB 16(%rdx), %r10
- mov %r10, 48(%rdi)
- mov R32(%rcx), R32(%rax) C zero rax
- adc R32(%rax), R32(%rax)
- ret
-EPILOGUE()
+include_mpn(`x86_64/coreisbr/aors_n.asm')
diff -r 1f58740e2052 -r 7cba880ba762 mpn/x86_64/coreisbr/aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/coreisbr/aors_n.asm Sat Feb 05 14:51:43 2011 +0100
@@ -0,0 +1,148 @@
+dnl X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Sandy Bridge.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 1.85
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 5
+C Intel NHM 5.5
+C Intel SBR 1.61
+C Intel atom 3
+C VIA nano 3
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cy', `%r8') C (only for mpn_add_nc and mpn_sub_nc)
+
+ifdef(`OPERATION_add_n', `
+ define(ADCSBB, adc)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADCSBB, sbb)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ xor %r8, %r8
+L(ent): mov R32(n), R32(%rax)
+ shr $2, n
+ and $3, R32(%rax)
+ jz L(b0)
+ cmp $2, R32(%rax)
+ jz L(b2)
+ jg L(b3)
+
+L(b1): mov (up), %r10
+ test n, n
+ jnz L(gt1)
+ neg R32(%r8) C set CF from argument
+ ADCSBB (vp), %r10
+ mov %r10, (rp)
+ mov R32(n), R32(%rax) C zero rax
+ adc R32(%rax), R32(%rax)
+ ret
+L(gt1): neg R32(%r8)
+ ADCSBB (vp), %r10
+ mov 8(up), %r11
+ lea 16(up), up
+ lea -16(vp), vp
+ lea -16(rp), rp
+ jmp L(m1)
+
+L(b3): mov (up), %rax
+ mov 8(up), %r9
+ mov 16(up), %r10
+ test n, n
+ jnz L(gt3)
+ neg R32(%r8)
+ lea -32(rp), rp
+ jmp L(e3)
+L(gt3): neg R32(%r8)
+ ADCSBB (vp), %rax
+ jmp L(m3)
+
+ nop C alignment
+ nop C alignment
+L(b0): mov (up), %r11
+ neg R32(%r8)
+ lea -24(vp), vp
+ lea -24(rp), rp
+ lea 8(up), up
+ jmp L(m0)
+
+L(b2): mov (up), %r9
+ mov 8(up), %r10
+ lea -8(vp), vp
+ test n, n
+ jnz L(gt2)
+ neg R32(%r8)
+ lea -40(rp), rp
+ jmp L(e2)
+L(gt2): neg R32(%r8)
+ lea -8(up), up
+ lea -8(rp), rp
+ jmp L(m2)
+
+ ALIGN(8)
+L(top): mov %r11, 24(rp)
+ ADCSBB (vp), %rax
+ lea 32(rp), rp
+L(m3): mov %rax, (rp)
+L(m2): ADCSBB 8(vp), %r9
+ mov 24(up), %r11
+ mov %r9, 8(rp)
+ ADCSBB 16(vp), %r10
+ lea 32(up), up
+L(m1): mov %r10, 16(rp)
+L(m0): ADCSBB 24(vp), %r11
+ mov (up), %rax
+ mov 8(up), %r9
+ lea 32(vp), vp
+ dec n
+ mov 16(up), %r10
+ jnz L(top)
More information about the gmp-commit
mailing list