[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Feb 11 09:06:34 CET 2011


details:   /home/hgfiles/gmp/rev/75a98eb56b82
changeset: 13840:75a98eb56b82
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Feb 10 21:05:35 2011 +0100
description:
Tune.

details:   /home/hgfiles/gmp/rev/03139b9b6e28
changeset: 13841:03139b9b6e28
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Feb 11 09:06:29 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                           |   13 +++
 configure.in                        |    9 ++
 gmp-impl.h                          |    4 +-
 mpn/asm-defs.m4                     |    4 +-
 mpn/generic/toom_interpolate_8pts.c |    4 +-
 mpn/powerpc64/mode64/mod_1_4.asm    |   12 +-
 mpn/x86/atom/aorrlsh2_n.asm         |   42 +++++++++++
 mpn/x86/atom/rsblsh1_n.asm          |   42 +++++++++++
 mpn/x86/k7/addlsh1_n.asm            |   56 ++++++---------
 mpn/x86/k7/aorrlshC_n.asm           |  129 ++++++++++++++++++++++++++++++++++++
 mpz/lucnum_ui.c                     |    2 +-
 tests/devel/try.c                   |   87 ++++++++++++++++++++++++
 tests/refmpn.c                      |   69 +++++++++++++++++++
 tests/tests.h                       |   11 +++
 14 files changed, 437 insertions(+), 47 deletions(-)

diffs (truncated from 751 to 300 lines):

diff -r a375236a1e6b -r 03139b9b6e28 ChangeLog
--- a/ChangeLog	Sun Feb 06 20:27:28 2011 +0100
+++ b/ChangeLog	Fri Feb 11 09:06:29 2011 +0100
@@ -1,3 +1,16 @@
+2011-02-11 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/x86/k7/addlsh1_n.asm: Faster core loop (Torbjorn's).
+
+	* configure.in: Add HAVE_NATIVE_{add,sub,rsb}lsh{,1,2}_nc.
+	* tests/tests.h: refmpn_{add,sub,rsb}lsh{,1,2}_nc prototypes.
+	* tests/refmpn.c: New refmpn_{add,sub,rsb}lsh{,1,2}_nc.
+	* tests/devel/try.c: Tests for mpn_{add,sub,rsb}lsh{,1,2}_nc.
+
+	* mpn/x86/k7/aorrlshC_n.asm: New file.
+	* mpn/x86/atom/aorrlsh2_n.asm: Grab k7/aorrlshC_n.asm.
+	* mpn/x86/atom/rsblsh1_n.asm: Grab k7/aorrlshC_n.asm.
+
 2011-02-06 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/x86/k7/addlsh1_n.asm: New file.
diff -r a375236a1e6b -r 03139b9b6e28 configure.in
--- a/configure.in	Sun Feb 06 20:27:28 2011 +0100
+++ b/configure.in	Fri Feb 11 09:06:29 2011 +0100
@@ -3006,6 +3006,9 @@
 #undef HAVE_NATIVE_mpn_addlsh1_n
 #undef HAVE_NATIVE_mpn_addlsh2_n
 #undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
 #undef HAVE_NATIVE_mpn_addmul_1c
 #undef HAVE_NATIVE_mpn_addmul_2
 #undef HAVE_NATIVE_mpn_addmul_3
@@ -3062,6 +3065,9 @@
 #undef HAVE_NATIVE_mpn_rsblsh1_n
 #undef HAVE_NATIVE_mpn_rsblsh2_n
 #undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
 #undef HAVE_NATIVE_mpn_rsh1add_n
 #undef HAVE_NATIVE_mpn_rsh1add_nc
 #undef HAVE_NATIVE_mpn_rsh1sub_n
@@ -3075,6 +3081,9 @@
 #undef HAVE_NATIVE_mpn_sublsh1_n
 #undef HAVE_NATIVE_mpn_sublsh2_n
 #undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
 #undef HAVE_NATIVE_mpn_submul_1c
 #undef HAVE_NATIVE_mpn_udiv_qrnnd
 #undef HAVE_NATIVE_mpn_udiv_qrnnd_r
diff -r a375236a1e6b -r 03139b9b6e28 gmp-impl.h
--- a/gmp-impl.h	Sun Feb 06 20:27:28 2011 +0100
+++ b/gmp-impl.h	Fri Feb 11 09:06:29 2011 +0100
@@ -4,7 +4,7 @@
    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
 
 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -830,7 +830,7 @@
 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
-   returns the borrow out (FIXME 0, 1, 2 or 3).  */
+   returns the borrow out (0, ..., 4).  */
 #define mpn_sublsh2_n __MPN(sublsh2_n)
 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 #define mpn_sublsh2_nc __MPN(sublsh2_nc)
diff -r a375236a1e6b -r 03139b9b6e28 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4	Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/asm-defs.m4	Fri Feb 11 09:06:29 2011 +0100
@@ -2,8 +2,8 @@
 dnl
 dnl  m4 macros for gmp assembly code, shared by all CPUs.
 
-dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
-dnl  Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2011 Free
+dnl  Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
diff -r a375236a1e6b -r 03139b9b6e28 mpn/generic/toom_interpolate_8pts.c
--- a/mpn/generic/toom_interpolate_8pts.c	Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/generic/toom_interpolate_8pts.c	Fri Feb 11 09:06:29 2011 +0100
@@ -6,7 +6,7 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2011 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -61,7 +61,7 @@
 #endif
 
 #if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,src,n,s)
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
 #else
 static mp_limb_t
 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
diff -r a375236a1e6b -r 03139b9b6e28 mpn/powerpc64/mode64/mod_1_4.asm
--- a/mpn/powerpc64/mode64/mod_1_4.asm	Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/powerpc64/mode64/mod_1_4.asm	Fri Feb 11 09:06:29 2011 +0100
@@ -1,6 +1,6 @@
 dnl  PowerPC-64 mpn_mod_1s_4p
 
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -21,7 +21,7 @@
 
 C		    cycles/limb
 C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
+C POWER4/PPC970		 9
 C POWER5		 9
 C POWER6		13
 
@@ -115,7 +115,7 @@
 	mtctr	r7
 	bdz	L(end)
 
-	ALIGN(32)
+	ALIGN(16)
 L(top):	ld	r31, -16(ap)
 	ld	r10, -8(ap)
 	ld	r11, 8(ap)
@@ -130,9 +130,9 @@
 	mulld	r11, r11, r24
 	mulhdu	r4, r12, r25
 	mulld	r12, r12, r25
-	addi	ap, ap, -32
 	addc	r8, r10, r31
 	addze	r10, r27
+	addi	ap, ap, -32
 	addc	r27, r8, r12
 	adde	r12, r10, r4
 	addc	r11, r27, r11
@@ -142,8 +142,8 @@
 	addc	r0, r9, r12
 	adde	r9, r7, r4
 	bdnz	L(top)
-L(end):
-	lwz	r3, 12(cps)
+
+L(end):	lwz	r3, 12(cps)
 	mulld	r10, r9, r26
 	mulhdu	r9, r9, r26
 	addc	r11, r0, r10
diff -r a375236a1e6b -r 03139b9b6e28 mpn/x86/atom/aorrlsh2_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorrlsh2_n.asm	Fri Feb 11 09:06:29 2011 +0100
@@ -0,0 +1,42 @@
+dnl  Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+	define(M4_inst,        adcl)
+	define(M4_opp,         subl)
+	define(M4_function,    mpn_addlsh2_n)
+	define(M4_function_c,  mpn_addlsh2_nc)
+',`ifdef(`OPERATION_rsblsh2_n', `
+	define(M4_inst,        sbbl)
+	define(M4_opp,         addl)
+	define(M4_function,    mpn_rsblsh2_n)
+	define(M4_function_c,  mpn_rsblsh2_nc)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r a375236a1e6b -r 03139b9b6e28 mpn/x86/atom/rsblsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/rsblsh1_n.asm	Fri Feb 11 09:06:29 2011 +0100
@@ -0,0 +1,42 @@
+dnl  Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+	define(M4_inst,        adcl)
+	define(M4_opp,         subl)
+	define(M4_function,    mpn_addlsh1_n)
+	define(M4_function_c,  mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+	define(M4_inst,        sbbl)
+	define(M4_opp,         addl)
+	define(M4_function,    mpn_rsblsh1_n)
+	define(M4_function_c,  mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r a375236a1e6b -r 03139b9b6e28 mpn/x86/k7/addlsh1_n.asm
--- a/mpn/x86/k7/addlsh1_n.asm	Sun Feb 06 20:27:28 2011 +0100
+++ b/mpn/x86/k7/addlsh1_n.asm	Fri Feb 11 09:06:29 2011 +0100
@@ -75,7 +75,7 @@
 define(`up',  `%esi')
 define(`vp',  `%ebp')
 
-	mov	$0x20000000, %eax
+	mov	$0x2aaaaaab, %eax
 
 	push	%ebx			FRAME_pushl()
 	movl	PARAM_SIZE, %ebx	C size
@@ -94,9 +94,9 @@
 	push	vp			FRAME_pushl()
 	movl	PARAM_DBLD, vp
 
-	leal	4(,%edx,4), %ecx	C count*4+4 = -(size\8)*4
+	leal	3(%edx,%edx,2), %ecx	C count*3+3 = -(size\6)*3
 	xorl	%edx, %edx
-	leal	(%ebx,%ecx,2), %ebx	C size + (count*4+4)*2 = size % 8
+	leal	(%ebx,%ecx,2), %ebx	C size + (count*3+3)*2 = size % 6
 	orl	%ebx, %ebx
 	jz	L(exact)
 
@@ -106,15 +106,15 @@
 	mov	(vp), %eax
 	adc	%eax, %eax
 	rcr	%edx			C restore 1st saved carry bit
+	lea	4(vp), vp
 	adc	(up), %eax
-	mov	%eax, (rp)
+	lea	4(up), up
 	adc	%edx, %edx		C save a carry bit in edx
-	lea	4(rp), rp
-	lea	4(up), up
-	lea	4(vp), vp
 ifdef(`CPU_P6',`
 	adc	%edx, %edx ')		C save another carry bit in edx
 	decl	%ebx
+	mov	%eax, (rp)
+	lea	4(rp), rp
 	jnz	L(oop)
 	movl	vp, VAR_TMP
 L(exact):
@@ -131,8 +131,6 @@
 	adc	%ebx, %ebx
 	mov	8(vp), %ecx
 	adc	%ecx, %ecx


More information about the gmp-commit mailing list