[Gmp-commit] /var/hg/gmp: Provide basic x86_64 sqr_diag_addlsh1 for sqr_basec...
mercurial at gmplib.org
mercurial at gmplib.org
Sat Aug 31 19:32:42 CEST 2013
details: /var/hg/gmp/rev/a560ed2fc6af
changeset: 15959:a560ed2fc6af
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Aug 31 18:46:59 2013 +0200
description:
Provide basic x86_64 sqr_diag_addlsh1 for sqr_basecase-less CPUs.
diffstat:
mpn/x86_64/sqr_diag_addlsh1.asm | 103 ++++++++++++++++++++++++++++++++++++++++
1 files changed, 103 insertions(+), 0 deletions(-)
diffs (107 lines):
diff -r 76038a65601f -r a560ed2fc6af mpn/x86_64/sqr_diag_addlsh1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/sqr_diag_addlsh1.asm Sat Aug 31 18:46:59 2013 +0200
@@ -0,0 +1,103 @@
+dnl AMD64 mpn_sqr_diag_addlsh1
+
+dnl Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C AMD bull 3.5
+C AMD pile 3.5
+C AMD steam ?
+C AMD bobcat 4
+C AMD jaguar ?
+C Intel P4 ?
+C Intel core 4
+C Intel NHM 4.75
+C Intel SBR 3.13
+C Intel IBR 3.1
+C Intel HWL 2.5
+C Intel BWL ?
+C Intel atom 15
+C VIA nano 4
+
+C When playing with pointers, set this to $2 to fall back to conservative
+C indexing in wind-dowm code.
+define(`I',`$1')
+
+define(`rp', `%rdi')
+define(`tp', `%rsi')
+define(`up_arg', `%rdx')
+define(`n', `%rcx')
+
+define(`up', `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_sqr_diag_addlsh1)
+ FUNC_ENTRY(4)
+ push %rbx
+
+ dec n
+ shl n
+
+ mov (up_arg), %rax
+
+ lea (rp,n,8), rp
+ lea (tp,n,8), tp
+ lea (up_arg,n,4), up
+ neg n
+
+ mul %rax
+ mov %rax, (rp,n,8)
+
+ xor R32(%rbx), R32(%rbx)
+ jmp L(mid)
+
+ ALIGN(16)
+L(top): add %r10, %r8
+ adc %rax, %r9
+ mov %r8, -8(rp,n,8)
+ mov %r9, (rp,n,8)
+L(mid): mov (tp,n,8), %r8
+ mov 8(tp,n,8), %r9
+ adc %r8, %r8
+ adc %r9, %r9
+ lea (%rdx,%rbx), %r10
+ setc R8(%rbx)
+ mov 8(up,n,4), %rax
+ mul %rax
+ add $2, n
+ js L(top)
+
+L(end): add %r10, %r8
+ adc %rax, %r9
+ mov %r8, I(-8(rp),-8(rp,n,8))
+ mov %r9, I((rp),(rp,n,8))
+ adc %rbx, %rdx
+ mov %rdx, I(8(rp),8(rp,n,8))
+
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
More information about the gmp-commit
mailing list