[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Feb 11 09:06:34 CET 2011
details: /home/hgfiles/gmp/rev/75a98eb56b82
changeset: 13840:75a98eb56b82
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Feb 10 21:05:35 2011 +0100
description:
Tune.
details: /home/hgfiles/gmp/rev/03139b9b6e28
changeset: 13841:03139b9b6e28
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Feb 11 09:06:29 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 13 +++
configure.in | 9 ++
gmp-impl.h | 4 +-
mpn/asm-defs.m4 | 4 +-
mpn/generic/toom_interpolate_8pts.c | 4 +-
mpn/powerpc64/mode64/mod_1_4.asm | 12 +-
mpn/x86/atom/aorrlsh2_n.asm | 42 +++++++++++
mpn/x86/atom/rsblsh1_n.asm | 42 +++++++++++
mpn/x86/k7/addlsh1_n.asm | 56 ++++++---------
mpn/x86/k7/aorrlshC_n.asm | 129 ++++++++++++++++++++++++++++++++++++
mpz/lucnum_ui.c | 2 +-
tests/devel/try.c | 87 ++++++++++++++++++++++++
tests/refmpn.c | 69 +++++++++++++++++++
tests/tests.h | 11 +++
14 files changed, 437 insertions(+), 47 deletions(-)
diffs (truncated from 751 to 300 lines):
diff -r a375236a1e6b -r 03139b9b6e28 ChangeLog
--- a/ChangeLog Sun Feb 06 20:27:28 2011 +0100
+++ b/ChangeLog Fri Feb 11 09:06:29 2011 +0100
@@ -1,3 +1,16 @@
+2011-02-11 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86/k7/addlsh1_n.asm: Faster core loop (Torbjorn's).
+
+ * configure.in: Add HAVE_NATIVE_{add,sub,rsb}lsh{,1,2}_nc.
+ * tests/tests.h: refmpn_{add,sub,rsb}lsh{,1,2}_nc prototypes.
+ * tests/refmpn.c: New refmpn_{add,sub,rsb}lsh{,1,2}_nc.
+ * tests/devel/try.c: Tests for mpn_{add,sub,rsb}lsh{,1,2}_nc.
+
+ * mpn/x86/k7/aorrlshC_n.asm: New file.
+ * mpn/x86/atom/aorrlsh2_n.asm: Grab k7/aorrlshC_n.asm.
+ * mpn/x86/atom/rsblsh1_n.asm: Grab k7/aorrlshC_n.asm.
+
2011-02-06 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/x86/k7/addlsh1_n.asm: New file.
diff -r a375236a1e6b -r 03139b9b6e28 configure.in
--- a/configure.in Sun Feb 06 20:27:28 2011 +0100
+++ b/configure.in Fri Feb 11 09:06:29 2011 +0100
@@ -3006,6 +3006,9 @@
#undef HAVE_NATIVE_mpn_addlsh1_n
#undef HAVE_NATIVE_mpn_addlsh2_n
#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
@@ -3062,6 +3065,9 @@
#undef HAVE_NATIVE_mpn_rsblsh1_n
#undef HAVE_NATIVE_mpn_rsblsh2_n
#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
#undef HAVE_NATIVE_mpn_rsh1add_n
#undef HAVE_NATIVE_mpn_rsh1add_nc
#undef HAVE_NATIVE_mpn_rsh1sub_n
@@ -3075,6 +3081,9 @@
#undef HAVE_NATIVE_mpn_sublsh1_n
#undef HAVE_NATIVE_mpn_sublsh2_n
#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
#undef HAVE_NATIVE_mpn_submul_1c
#undef HAVE_NATIVE_mpn_udiv_qrnnd
#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
diff -r a375236a1e6b -r 03139b9b6e28 gmp-impl.h
--- a/gmp-impl.h Sun Feb 06 20:27:28 2011 +0100
+++ b/gmp-impl.h Fri Feb 11 09:06:29 2011 +0100
@@ -4,7 +4,7 @@
BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -830,7 +830,7 @@
__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
- returns the borrow out (FIXME 0, 1, 2 or 3). */
+ returns the borrow out (0, ..., 4). */
#define mpn_sublsh2_n __MPN(sublsh2_n)
__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
#define mpn_sublsh2_nc __MPN(sublsh2_nc)
diff -r a375236a1e6b -r 03139b9b6e28 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4 Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/asm-defs.m4 Fri Feb 11 09:06:29 2011 +0100
@@ -2,8 +2,8 @@
dnl
dnl m4 macros for gmp assembly code, shared by all CPUs.
-dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
-dnl Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2011 Free
+dnl Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
diff -r a375236a1e6b -r 03139b9b6e28 mpn/generic/toom_interpolate_8pts.c
--- a/mpn/generic/toom_interpolate_8pts.c Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/generic/toom_interpolate_8pts.c Fri Feb 11 09:06:29 2011 +0100
@@ -6,7 +6,7 @@
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -61,7 +61,7 @@
#endif
#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,src,n,s)
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
#else
static mp_limb_t
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
diff -r a375236a1e6b -r 03139b9b6e28 mpn/powerpc64/mode64/mod_1_4.asm
--- a/mpn/powerpc64/mode64/mod_1_4.asm Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/powerpc64/mode64/mod_1_4.asm Fri Feb 11 09:06:29 2011 +0100
@@ -1,6 +1,6 @@
dnl PowerPC-64 mpn_mod_1s_4p
-dnl Copyright 2010 Free Software Foundation, Inc.
+dnl Copyright 2010, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -21,7 +21,7 @@
C cycles/limb
C POWER3/PPC630 ?
-C POWER4/PPC970 ?
+C POWER4/PPC970 9
C POWER5 9
C POWER6 13
@@ -115,7 +115,7 @@
mtctr r7
bdz L(end)
- ALIGN(32)
+ ALIGN(16)
L(top): ld r31, -16(ap)
ld r10, -8(ap)
ld r11, 8(ap)
@@ -130,9 +130,9 @@
mulld r11, r11, r24
mulhdu r4, r12, r25
mulld r12, r12, r25
- addi ap, ap, -32
addc r8, r10, r31
addze r10, r27
+ addi ap, ap, -32
addc r27, r8, r12
adde r12, r10, r4
addc r11, r27, r11
@@ -142,8 +142,8 @@
addc r0, r9, r12
adde r9, r7, r4
bdnz L(top)
-L(end):
- lwz r3, 12(cps)
+
+L(end): lwz r3, 12(cps)
mulld r10, r9, r26
mulhdu r9, r9, r26
addc r11, r0, r10
diff -r a375236a1e6b -r 03139b9b6e28 mpn/x86/atom/aorrlsh2_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorrlsh2_n.asm Fri Feb 11 09:06:29 2011 +0100
@@ -0,0 +1,42 @@
+dnl Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(M4_inst, adcl)
+ define(M4_opp, subl)
+ define(M4_function, mpn_addlsh2_n)
+ define(M4_function_c, mpn_addlsh2_nc)
+',`ifdef(`OPERATION_rsblsh2_n', `
+ define(M4_inst, sbbl)
+ define(M4_opp, addl)
+ define(M4_function, mpn_rsblsh2_n)
+ define(M4_function_c, mpn_rsblsh2_nc)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r a375236a1e6b -r 03139b9b6e28 mpn/x86/atom/rsblsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/rsblsh1_n.asm Fri Feb 11 09:06:29 2011 +0100
@@ -0,0 +1,42 @@
+dnl Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(M4_inst, adcl)
+ define(M4_opp, subl)
+ define(M4_function, mpn_addlsh1_n)
+ define(M4_function_c, mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+ define(M4_inst, sbbl)
+ define(M4_opp, addl)
+ define(M4_function, mpn_rsblsh1_n)
+ define(M4_function_c, mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r a375236a1e6b -r 03139b9b6e28 mpn/x86/k7/addlsh1_n.asm
--- a/mpn/x86/k7/addlsh1_n.asm Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/x86/k7/addlsh1_n.asm Fri Feb 11 09:06:29 2011 +0100
@@ -75,7 +75,7 @@
define(`up', `%esi')
define(`vp', `%ebp')
- mov $0x20000000, %eax
+ mov $0x2aaaaaab, %eax
push %ebx FRAME_pushl()
movl PARAM_SIZE, %ebx C size
@@ -94,9 +94,9 @@
push vp FRAME_pushl()
movl PARAM_DBLD, vp
- leal 4(,%edx,4), %ecx C count*4+4 = -(size\8)*4
+ leal 3(%edx,%edx,2), %ecx C count*3+3 = -(size\6)*3
xorl %edx, %edx
- leal (%ebx,%ecx,2), %ebx C size + (count*4+4)*2 = size % 8
+ leal (%ebx,%ecx,2), %ebx C size + (count*3+3)*2 = size % 6
orl %ebx, %ebx
jz L(exact)
@@ -106,15 +106,15 @@
mov (vp), %eax
adc %eax, %eax
rcr %edx C restore 1st saved carry bit
+ lea 4(vp), vp
adc (up), %eax
- mov %eax, (rp)
+ lea 4(up), up
adc %edx, %edx C save a carry bit in edx
- lea 4(rp), rp
- lea 4(up), up
- lea 4(vp), vp
ifdef(`CPU_P6',`
adc %edx, %edx ') C save another carry bit in edx
decl %ebx
+ mov %eax, (rp)
+ lea 4(rp), rp
jnz L(oop)
movl vp, VAR_TMP
L(exact):
@@ -131,8 +131,6 @@
adc %ebx, %ebx
mov 8(vp), %ecx
adc %ecx, %ecx
More information about the gmp-commit
mailing list