[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Jan 19 17:22:59 CET 2011
details: /home/hgfiles/gmp/rev/cae88d21df37
changeset: 13747:cae88d21df37
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jan 19 17:19:48 2011 +0100
description:
Use R32 throughout.
details: /home/hgfiles/gmp/rev/fb6fddc552e2
changeset: 13748:fb6fddc552e2
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jan 19 17:22:52 2011 +0100
description:
mpn/x86_64/core2/rsh1aors_n.asm: New file.
diffstat:
ChangeLog | 4 +
mpn/x86_64/core2/rsh1aors_n.asm | 173 ++++++++++++++++++++++++++++++++++++++++
mpn/x86_64/rsh1aors_n.asm | 11 +-
3 files changed, 182 insertions(+), 6 deletions(-)
diffs (235 lines):
diff -r 0bf61597048d -r fb6fddc552e2 ChangeLog
--- a/ChangeLog Tue Jan 18 07:59:02 2011 +0100
+++ b/ChangeLog Wed Jan 19 17:22:52 2011 +0100
@@ -1,3 +1,7 @@
+2011-01-19 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/core2/rsh1aors_n.asm: New file.
+
2011-01-18 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/x86/bdiv_q_1.asm: New file (same core alg. as dive_1).
diff -r 0bf61597048d -r fb6fddc552e2 mpn/x86_64/core2/rsh1aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/core2/rsh1aors_n.asm Wed Jan 19 17:22:52 2011 +0100
@@ -0,0 +1,173 @@
+dnl Intel P6/64 mpn_rsh1add_n and mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1
+
+dnl Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 4.25
+C Intel P4 21.5
+C Intel core2 3.2
+C Intel corei 3.87
+C Intel atom ?
+C VIA nano 44.9
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_rsh1add_n)
+ define(func_nc, mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsh1sub_n)
+ define(func_nc, mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ASM_START()
+ TEXT
+
+ ALIGN(16)
+PROLOGUE(func_nc)
+ push %rbx
+
+ neg %r8 C set C flag from parameter
+ mov (up), %rbp
+ ADCSBB (vp), %rbp
+
+ jmp L(ent)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(func_n)
+ push %rbx
+ push %rbp
+
+ mov (up), %rbp
+ ADDSUB (vp), %rbp
+L(ent):
+ sbb R32(%rbx), R32(%rbx) C save cy
+ mov R32(%rbp), R32(%rax)
+ and $1, R32(%rax) C return value
+
+ mov R32(n), R32(%r11)
+ and $3, R32(%r11)
+
+ cmp $1, R32(%r11)
+ je L(do) C jump if n = 1 5 9 ...
+
+L(n1): cmp $2, R32(%r11)
+ jne L(n2) C jump unless n = 2 6 10 ...
+ add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up), %r10
+ ADCSBB 8(vp), %r10
+ lea 8(up), up
+ lea 8(vp), vp
+ lea 8(rp), rp
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r10, %rbp
+ mov %rbp, -8(rp)
+ jmp L(cj1)
+
+L(n2): cmp $3, R32(%r11)
+ jne L(n3) C jump unless n = 3 7 11 ...
+ add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up), %r9
+ mov 16(up), %r10
+ ADCSBB 8(vp), %r9
+ ADCSBB 16(vp), %r10
+ lea 16(up), up
+ lea 16(vp), vp
+ lea 16(rp), rp
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r9, %rbp
+ mov %rbp, -16(rp)
+ jmp L(cj2)
+
+L(n3): dec n C come here for n = 4 8 12 ...
+ add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up), %r8
+ mov 16(up), %r9
+ ADCSBB 8(vp), %r8
+ ADCSBB 16(vp), %r9
+ mov 24(up), %r10
+ ADCSBB 24(vp), %r10
+ lea 24(up), up
+ lea 24(vp), vp
+ lea 24(rp), rp
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r8, %rbp
+ mov %rbp, -24(rp)
+ shrd $1, %r9, %r8
+ mov %r8, -16(rp)
+L(cj2): shrd $1, %r10, %r9
+ mov %r9, -8(rp)
+L(cj1): mov %r10, %rbp
+
+L(do):
+ shr $2, n C 4
+ je L(end) C 2
+ ALIGN(16)
+L(top): add R32(%rbx), R32(%rbx) C restore cy
+
+ mov 8(up), %r8
+ mov 16(up), %r9
+ ADCSBB 8(vp), %r8
+ ADCSBB 16(vp), %r9
+ mov 24(up), %r10
+ mov 32(up), %r11
+ ADCSBB 24(vp), %r10
+ ADCSBB 32(vp), %r11
+
+ lea 32(up), up
+ lea 32(vp), vp
+
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r8, %rbp
+ mov %rbp, (rp)
+ shrd $1, %r9, %r8
+ mov %r8, 8(rp)
+ shrd $1, %r10, %r9
+ mov %r9, 16(rp)
+ shrd $1, %r11, %r10
+ mov %r10, 24(rp)
+
+ dec n
+ mov %r11, %rbp
+ lea 32(rp), rp
+ jne L(top)
+
+L(end): shrd $1, %rbx, %rbp
+ mov %rbp, (rp)
+ pop %rbp
+ pop %rbx
+ ret
+EPILOGUE()
diff -r 0bf61597048d -r fb6fddc552e2 mpn/x86_64/rsh1aors_n.asm
--- a/mpn/x86_64/rsh1aors_n.asm Tue Jan 18 07:59:02 2011 +0100
+++ b/mpn/x86_64/rsh1aors_n.asm Wed Jan 19 17:22:52 2011 +0100
@@ -23,7 +23,7 @@
C cycles/limb
C AMD K8,K9 2.14 (mpn_add_n + mpn_rshift need 4.125)
C AMD K10 2.14 (mpn_add_n + mpn_rshift need 4.125)
-C AMD P4 12.75
+C Intel P4 12.75
C Intel core2 3.75
C Intel corei 4.4
C Intel atom ?
@@ -38,7 +38,6 @@
define(`up',`%rsi')
define(`vp',`%rdx')
define(`n',`%rcx')
-define(`n32',`%ecx')
ifdef(`OPERATION_rsh1add_n', `
define(ADDSUB, add)
@@ -60,7 +59,7 @@
PROLOGUE(func_nc)
push %rbx
- xor %eax, %eax
+ xor R32(%rax), R32(%rax)
neg %r8 C set C flag from parameter
mov (up), %rbx
ADCSBB (vp), %rbx
@@ -71,14 +70,14 @@
PROLOGUE(func_n)
push %rbx
- xor %eax, %eax
+ xor R32(%rax), R32(%rax)
mov (up), %rbx
ADDSUB (vp), %rbx
L(ent):
rcr %rbx C rotate, save acy
- adc %eax, %eax C return value
+ adc R32(%rax), R32(%rax) C return value
- mov n32, R32(%r11)
+ mov R32(n), R32(%r11)
and $3, R32(%r11)
cmp $1, R32(%r11)
More information about the gmp-commit
mailing list