[Gmp-commit] /home/hgfiles/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Feb 5 20:22:48 CET 2011
details: /home/hgfiles/gmp/rev/19d332f71577
changeset: 13829:19d332f71577
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 05 20:14:57 2011 +0100
description:
New files for Intel SBR.
details: /home/hgfiles/gmp/rev/e243a052828b
changeset: 13830:e243a052828b
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 05 20:21:37 2011 +0100
description:
(mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc, mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare
details: /home/hgfiles/gmp/rev/f744ec32bf32
changeset: 13831:f744ec32bf32
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 05 20:22:04 2011 +0100
description:
(mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc, mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare
details: /home/hgfiles/gmp/rev/1712752e4f47
changeset: 13832:1712752e4f47
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 05 20:22:41 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 10 ++
gmp-impl.h | 12 ++
mpn/asm-defs.m4 | 6 +
mpn/x86_64/coreisbr/aorrlsh1_n.asm | 40 +++++++++
mpn/x86_64/coreisbr/aorrlsh2_n.asm | 40 +++++++++
mpn/x86_64/coreisbr/aorrlshC_n.asm | 157 +++++++++++++++++++++++++++++++++++++
mpz/lucnum_ui.c | 5 +-
7 files changed, 268 insertions(+), 2 deletions(-)
diffs (truncated from 373 to 300 lines):
diff -r 7cba880ba762 -r 1712752e4f47 ChangeLog
--- a/ChangeLog Sat Feb 05 14:51:43 2011 +0100
+++ b/ChangeLog Sat Feb 05 20:22:41 2011 +0100
@@ -1,5 +1,13 @@
2011-02-05 Torbjorn Granlund <tege at gmplib.org>
+ * gmp-impl.h (mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc,
+ mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare.
+ * mpn/asm-defs.m4: Likewise.
+
+ * mpn/x86_64/coreisbr/aorrlshC_n.asm: New file.
+ * mpn/x86_64/coreisbr/aorrlsh1_n.asm: New file.
+ * mpn/x86_64/coreisbr/aorrlsh2_n.asm: New file.
+
* mpn/x86_64/coreisbr/aors_n.asm: New file, based on old
atom/aors_n.asm.
* mpn/x86_64/atom/aors_n.asm: Grab coreisbr/aors_n.asm.
@@ -12,6 +20,8 @@
* tests/mpn/t-toom6h.c: No tests below MPN_TOOM6H_MIN.
* tests/mpn/t-toom8h.c: No tests below MPN_TOOM8H_MIN.
+ * mpz/lucnum_ui.c: Use mpn_addlsh2_n.
+
2011-02-04 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/atom/rsh1aors_n.asm: Add a MULFUNC_PROLOGUE.
diff -r 7cba880ba762 -r 1712752e4f47 gmp-impl.h
--- a/gmp-impl.h Sat Feb 05 14:51:43 2011 +0100
+++ b/gmp-impl.h Sat Feb 05 20:22:41 2011 +0100
@@ -800,11 +800,15 @@
returns the carry out (0, 1 or 2). */
#define mpn_addlsh1_n __MPN(addlsh1_n)
__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_addlsh1_nc __MPN(addlsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
returns the carry out (0, ..., 4). */
#define mpn_addlsh2_n __MPN(addlsh2_n)
__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_addlsh2_nc __MPN(addlsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
returns the carry out (0, ..., 2^k). */
@@ -815,21 +819,29 @@
returns the borrow out (0, 1 or 2). */
#define mpn_sublsh1_n __MPN(sublsh1_n)
__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_sublsh1_nc __MPN(sublsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
returns the carry out (-1, 0, 1). */
#define mpn_rsblsh1_n __MPN(rsblsh1_n)
__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
returns the borrow out (FIXME 0, 1, 2 or 3). */
#define mpn_sublsh2_n __MPN(sublsh2_n)
__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_sublsh2_nc __MPN(sublsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
returns the carry out (-1, ..., 3). */
#define mpn_rsblsh2_n __MPN(rsblsh2_n)
__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
returns the carry out (-1, 0, ..., 2^k-1). */
diff -r 7cba880ba762 -r 1712752e4f47 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4 Sat Feb 05 14:51:43 2011 +0100
+++ b/mpn/asm-defs.m4 Sat Feb 05 20:22:41 2011 +0100
@@ -1309,7 +1309,9 @@
define_mpn(add_n)
define_mpn(add_nc)
define_mpn(addlsh1_n)
+define_mpn(addlsh1_nc)
define_mpn(addlsh2_n)
+define_mpn(addlsh2_nc)
define_mpn(addlsh_n)
define_mpn(addmul_1)
define_mpn(addmul_1c)
@@ -1392,7 +1394,9 @@
define_mpn(redc_1)
define_mpn(redc_2)
define_mpn(rsblsh1_n)
+define_mpn(rsblsh1_nc)
define_mpn(rsblsh2_n)
+define_mpn(rsblsh2_nc)
define_mpn(rsblsh_n)
define_mpn(rsh1add_n)
define_mpn(rsh1add_nc)
@@ -1408,7 +1412,9 @@
define_mpn(sqr_diag_addlsh1)
define_mpn(sub_n)
define_mpn(sublsh1_n)
+define_mpn(sublsh1_nc)
define_mpn(sublsh2_n)
+define_mpn(sublsh2_nc)
define_mpn(sqrtrem)
define_mpn(sub)
define_mpn(sub_1)
diff -r 7cba880ba762 -r 1712752e4f47 mpn/x86_64/coreisbr/aorrlsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/coreisbr/aorrlsh1_n.asm Sat Feb 05 20:22:41 2011 +0100
@@ -0,0 +1,40 @@
+dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_addlsh1_n)
+ define(func_nc, mpn_addlsh1_nc)')
+ifdef(`OPERATION_rsblsh1_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsblsh1_n)
+ define(func_nc, mpn_rsblsh1_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff -r 7cba880ba762 -r 1712752e4f47 mpn/x86_64/coreisbr/aorrlsh2_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/coreisbr/aorrlsh2_n.asm Sat Feb 05 20:22:41 2011 +0100
@@ -0,0 +1,40 @@
+dnl AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 1)
+dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_addlsh2_n)
+ define(func_nc, mpn_addlsh2_nc)')
+ifdef(`OPERATION_rsblsh2_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsblsh2_n)
+ define(func_nc, mpn_rsblsh2_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff -r 7cba880ba762 -r 1712752e4f47 mpn/x86_64/coreisbr/aorrlshC_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/coreisbr/aorrlshC_n.asm Sat Feb 05 20:22:41 2011 +0100
@@ -0,0 +1,157 @@
+dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 3.25
+C Intel NHM 4
+C Intel SBR 2 C (or 1.95 when L(top)'s alignment = 16 (mod 32))
+C Intel atom ?
+C VIA nano ?
+
+C This code probably runs close to optimally on Sandy Bridge, and reasonably
+C well on Core 2, but it runs poorly on all other processors, including Nehalem
+C (NHM).
+C
+C The carry handling is prepared for _nc variants. If we choose to
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cy', `%r8')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func_nc)
+ push %rbp
+ mov cy, %rax
+ neg %rax C set msb on carry
+ xor R32(%rbp), R32(%rbp) C limb carry
+ mov (vp), %r8
+ shrd $RSH, %r8, %rbp
+ mov R32(n), R32(%r9)
+ and $3, R32(%r9)
+ je L(b00)
+ cmp $2, R32(%r9)
+ jc L(b01)
+ je L(b10)
+ jmp L(b11)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(func_n)
+ push %rbp
+ xor R32(%rbp), R32(%rbp) C limb carry
+ mov (vp), %r8
+ shrd $RSH, %r8, %rbp
+ mov R32(n), R32(%rax)
+ and $3, R32(%rax)
+ je L(b00)
+ cmp $2, R32(%rax)
+ jc L(b01)
+ je L(b10)
+
+L(b11): mov 8(vp), %r9
+ shrd $RSH, %r9, %r8
+ mov 16(vp), %r10
+ shrd $RSH, %r10, %r9
+ add R32(%rax), R32(%rax) C init carry flag
+ ADCSBB (up), %rbp
+ ADCSBB 8(up), %r8
+ ADCSBB 16(up), %r9
+ mov %rbp, (rp)
+ mov %r8, 8(rp)
+ mov %r9, 16(rp)
+ mov %r10, %rbp
+ lea 24(up), up
+ lea 24(vp), vp
+ lea 24(rp), rp
+ sbb R32(%rax), R32(%rax) C save carry flag
+ sub $3, n
+ ja L(top)
+ jmp L(end)
+
+L(b01): add R32(%rax), R32(%rax) C init carry flag
+ ADCSBB (up), %rbp
+ mov %rbp, (rp)
+ mov %r8, %rbp
More information about the gmp-commit
mailing list