[Gmp-commit] /home/hgfiles/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon May 10 11:22:15 CEST 2010
details: /home/hgfiles/gmp/rev/8e1260bce208
changeset: 13618:8e1260bce208
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 09 23:03:15 2010 +0200
description:
Clean up comments.
details: /home/hgfiles/gmp/rev/c767c27a18d6
changeset: 13619:c767c27a18d6
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 10 00:11:45 2010 +0200
description:
Misc cleanup.
details: /home/hgfiles/gmp/rev/7c6fd72092fb
changeset: 13620:7c6fd72092fb
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 10 00:12:14 2010 +0200
description:
Remove redundant include.
details: /home/hgfiles/gmp/rev/64228ef81450
changeset: 13621:64228ef81450
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 10 00:14:16 2010 +0200
description:
Overhaul of x86_64 {add,sub.rsb}lshC_n.asm code.
details: /home/hgfiles/gmp/rev/563e3c3cf26a
changeset: 13622:563e3c3cf26a
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 10 11:22:02 2010 +0200
description:
Fix typo.
diffstat:
ChangeLog | 18 ++++-
mpn/ia64/aorslshC_n.asm | 2 -
mpn/x86_64/aorrlsh1_n.asm | 12 +-
mpn/x86_64/aorrlsh2_n.asm | 144 +++-------------------------------
mpn/x86_64/aorrlshC_n.asm | 143 ++++++++++++++++++++++++++++++++++
mpn/x86_64/core2/aorrlsh1_n.asm | 39 +++++++++
mpn/x86_64/core2/aorrlsh2_n.asm | 39 +++++++++
mpn/x86_64/core2/aorslsh1_n.asm | 154 -------------------------------------
mpn/x86_64/core2/sublsh1_n.asm | 33 +++++++
mpn/x86_64/core2/sublsh2_n.asm | 33 +++++++
mpn/x86_64/core2/sublshC_n.asm | 144 ++++++++++++++++++++++++++++++++++
mpn/x86_64/pentium4/aorslsh1_n.asm | 10 +-
mpn/x86_64/pentium4/aorslsh2_n.asm | 10 +-
13 files changed, 478 insertions(+), 303 deletions(-)
diffs (truncated from 892 to 300 lines):
diff -r d9d303fcd120 -r 563e3c3cf26a ChangeLog
--- a/ChangeLog Sat May 08 00:50:30 2010 +0200
+++ b/ChangeLog Mon May 10 11:22:02 2010 +0200
@@ -1,9 +1,25 @@
+2010-05-10 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/aorrlsh2_n.asm: Fix typo.
+
+2010-05-09 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/aorrlshC_n.asm: New file based on aorrlsh2_n.asm.
+ * mpn/x86_64/aorrlsh2_n.asm: Now just include aorrlshC_n.asm.
+ * mpn/x86_64/core2/aorrlsh1_n.asm: New file, include ../aorrlshC_n.asm.
+ * mpn/x86_64/core2/aorrlsh2_n.asm: Likewise.
+
+ * mpn/x86_64/core2/sublshC_n.asm: New file based on aorslsh1_n.asm.
+ * mpn/x86_64/core2/aorslsh1_n.asm: Remove.
+ * mpn/x86_64/core2/sublsh1_n.asm: Just include sublshC_n.asm.
+ * mpn/x86_64/core2/sublsh2_n.asm: Likewise.
+
2010-05-08 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/atom/gmp-mparam.h: Disable mpn_rsh1add_n, mpn_rsh1sub_n.
* mpn/x86_64/pentium4/aorslshC_n.asm: New file based on aorslsh1_n.asm.
- * mpn/x86_64/pentium4/aorslsh1_n.asm: Just include aorslshC_n.asm.
+ * mpn/x86_64/pentium4/aorslsh1_n.asm: Now just include aorslshC_n.asm.
* mpn/x86_64/pentium4/aorslsh2_n.asm: New file.
2010-05-07 Torbjorn Granlund <tege at gmplib.org>
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/ia64/aorslshC_n.asm
--- a/mpn/ia64/aorslshC_n.asm Sat May 08 00:50:30 2010 +0200
+++ b/mpn/ia64/aorslshC_n.asm Mon May 10 11:22:02 2010 +0200
@@ -19,8 +19,6 @@
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-include(`../config.m4')
-
C cycles/limb
C Itanium: ?
C Itanium 2: 1.5
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/x86_64/aorrlsh1_n.asm
--- a/mpn/x86_64/aorrlsh1_n.asm Sat May 08 00:50:30 2010 +0200
+++ b/mpn/x86_64/aorrlsh1_n.asm Mon May 10 11:22:02 2010 +0200
@@ -44,13 +44,13 @@
define(`n', `%rcx')
ifdef(`OPERATION_addlsh1_n', `
- define(ADDSUB, add)
- define(ADCSBB, adc)
- define(func, mpn_addlsh1_n)')
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_addlsh1_n)')
ifdef(`OPERATION_rsblsh1_n', `
- define(ADDSUB, sub)
- define(ADCSBB, sbb)
- define(func, mpn_rsblsh1_n)')
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_rsblsh1_n)')
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/x86_64/aorrlsh2_n.asm
--- a/mpn/x86_64/aorrlsh2_n.asm Sat May 08 00:50:30 2010 +0200
+++ b/mpn/x86_64/aorrlsh2_n.asm Mon May 10 11:22:02 2010 +0200
@@ -1,8 +1,9 @@
-dnl AMD64 mpn_addlsh2_n and mpn_rsblsh2_n. R = 2*V +- U.
-dnl ("rsb" means reversed subtract, name mandated by mpn_sublsh2_n which
-dnl subtacts the shifted operand from the unshifted operand.)
+dnl AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
-dnl Copyright 2009 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2009, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -21,135 +22,18 @@
include(`../config.m4')
-
-C cycles/limb
-C AMD K8,K9 2
-C AMD K10 2
-C Intel P4 ?
-C Intel core2 3
-C Intel corei 2.75
-C Intel atom ?
-C VIA nano ?
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n', `%rcx')
+define(LSH, 2)
+define(RSH, 62)
ifdef(`OPERATION_addlsh2_n',`
- define(ADDSUB, `add')
- define(ADCSBB, `adc')
- define(func, mpn_addlsh2_n)')
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_addlsh2_n)')
ifdef(`OPERATION_rsblsh2_n',`
- define(ADDSUB, `sub')
- define(ADCSBB, `sbb')
- define(func, mpn_rsblsh2_n)')
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_rsblsh2_n)')
MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(func)
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov (vp), %r8
- lea (,%r8,4), %r12
- shr $62, %r8
-
- mov R32(n), R32(%rax)
- lea (rp,n,8), rp
- lea (up,n,8), up
- lea (vp,n,8), vp
- neg n
- and $3, R8(%rax)
- je L(b00)
- cmp $2, R8(%rax)
- jc L(b01)
- je L(b10)
-
-L(b11): mov 8(vp,n,8), %r10
- lea (%r8,%r10,4), %r14
- shr $62, %r10
- mov 16(vp,n,8), %r11
- lea (%r10,%r11,4), %r15
- shr $62, %r11
- ADDSUB (up,n,8), %r12
- ADCSBB 8(up,n,8), %r14
- ADCSBB 16(up,n,8), %r15
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r12, (rp,n,8)
- mov %r14, 8(rp,n,8)
- mov %r15, 16(rp,n,8)
- add $3, n
- js L(top)
- jmp L(end)
-
-L(b01): mov %r8, %r11
- ADDSUB (up,n,8), %r12
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r12, (rp,n,8)
- add $1, n
- js L(top)
- jmp L(end)
-
-L(b10): mov 8(vp,n,8), %r11
- lea (%r8,%r11,4), %r15
- shr $62, %r11
- ADDSUB (up,n,8), %r12
- ADCSBB 8(up,n,8), %r15
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r12, (rp,n,8)
- mov %r15, 8(rp,n,8)
- add $2, n
- js L(top)
- jmp L(end)
-
-L(b00): mov 8(vp,n,8), %r9
- mov 16(vp,n,8), %r10
- jmp L(e00)
-
- ALIGN(16)
-L(top): mov 16(vp,n,8), %r10
- mov (vp,n,8), %r8
- mov 8(vp,n,8), %r9
- lea (%r11,%r8,4), %r12
- shr $62, %r8
-L(e00): lea (%r8,%r9,4), %r13
- shr $62, %r9
- mov 24(vp,n,8), %r11
- lea (%r9,%r10,4), %r14
- shr $62, %r10
- lea (%r10,%r11,4), %r15
- shr $62, %r11
- add R32(%rax), R32(%rax) C restore carry
- ADCSBB (up,n,8), %r12
- ADCSBB 8(up,n,8), %r13
- ADCSBB 16(up,n,8), %r14
- ADCSBB 24(up,n,8), %r15
- mov %r12, (rp,n,8)
- mov %r13, 8(rp,n,8)
- mov %r14, 16(rp,n,8)
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r15, 24(rp,n,8)
- add $4, n
- js L(top)
-L(end):
-
-ifdef(`OPERATION_addlsh2_n',`
- sub R32(%r11), R32(%rax)
- neg R32(%rax)')
-ifdef(`OPERATION_rsblsh2_n',`
- add R32(%r11), R32(%rax)
- movslq R32(%rax), %rax')
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- ret
-EPILOGUE()
+include_mpn(`x86_64/aorrlshC_n.asm')
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/x86_64/aorrlshC_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/aorrlshC_n.asm Mon May 10 11:22:02 2010 +0200
@@ -0,0 +1,143 @@
+dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl Copyright 2009, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+C cycles/limb
+C AMD K8,K9 2
+C AMD K10 2
+C Intel P4 ?
+C Intel core2 3
+C Intel corei 2.75
+C Intel atom ?
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+
+define(M, eval(1<<LSH))
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov (vp), %r8
+ lea (,%r8,M), %r12
+ shr $RSH, %r8
+
+ mov R32(n), R32(%rax)
+ lea (rp,n,8), rp
+ lea (up,n,8), up
+ lea (vp,n,8), vp
+ neg n
+ and $3, R8(%rax)
+ je L(b00)
+ cmp $2, R8(%rax)
+ jc L(b01)
+ je L(b10)
+
+L(b11): mov 8(vp,n,8), %r10
+ lea (%r8,%r10,M), %r14
+ shr $RSH, %r10
+ mov 16(vp,n,8), %r11
+ lea (%r10,%r11,M), %r15
+ shr $RSH, %r11
+ ADDSUB (up,n,8), %r12
More information about the gmp-commit
mailing list