[Gmp-commit] /home/hgfiles/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Feb 11 07:52:12 CET 2011


details:   /home/hgfiles/gmp/rev/d7088b155a80
changeset: 13836:d7088b155a80
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Feb 10 23:25:42 2011 +0100
description:
Fixed a FIXME comment.

details:   /home/hgfiles/gmp/rev/baa61caf15ad
changeset: 13837:baa61caf15ad
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Feb 11 07:12:00 2011 +0100
description:
Faster inner loop (by tege).

details:   /home/hgfiles/gmp/rev/5cbb42265900
changeset: 13838:5cbb42265900
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Feb 11 07:26:20 2011 +0100
description:
Tests for mpn_{add,sub,rsb}lsh{,1,2}.

details:   /home/hgfiles/gmp/rev/4e3f13fe4c88
changeset: 13839:4e3f13fe4c88
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Feb 11 07:52:05 2011 +0100
description:
aorrlshC_n.asm for k7 and atom.

diffstat:

 ChangeLog                   |   13 ++++
 configure.in                |    9 +++
 gmp-impl.h                  |    2 +-
 mpn/x86/atom/aorrlsh2_n.asm |   42 ++++++++++++++
 mpn/x86/atom/rsblsh1_n.asm  |   42 ++++++++++++++
 mpn/x86/k7/addlsh1_n.asm    |   56 +++++++-----------
 mpn/x86/k7/aorrlshC_n.asm   |  129 ++++++++++++++++++++++++++++++++++++++++++++
 tests/devel/try.c           |   87 +++++++++++++++++++++++++++++
 tests/refmpn.c              |   69 +++++++++++++++++++++++
 tests/tests.h               |   11 +++
 10 files changed, 425 insertions(+), 35 deletions(-)

diffs (truncated from 645 to 300 lines):

diff -r 5d639287e647 -r 4e3f13fe4c88 ChangeLog
--- a/ChangeLog	Mon Feb 07 08:05:27 2011 +0100
+++ b/ChangeLog	Fri Feb 11 07:52:05 2011 +0100
@@ -1,3 +1,16 @@
+2011-02-11 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/x86/k7/addlsh1_n.asm: Faster core loop (Torbjorn's).
+
+	* configure.in: Add HAVE_NATIVE_{add,sub,rsb}lsh{,1,2}_nc.
+	* tests/tests.h: refmpn_{add,sub,rsb}lsh{,1,2}_nc prototypes.
+	* tests/refmpn.c: New refmpn_{add,sub,rsb}lsh{,1,2}_nc.
+	* tests/devel/try.c: Tests for mpn_{add,sub,rsb}lsh{,1,2}_nc.
+
+	* mpn/x86/k7/aorrlshC_n.asm: New file.
+	* mpn/x86/atom/aorrlsh2_n.asm: Grab k7/aorrlshC_n.asm.
+	* mpn/x86/atom/rsblsh1_n.asm: Grab k7/aorrlshC_n.asm.
+
 2011-02-06 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/x86/k7/addlsh1_n.asm: New file.
diff -r 5d639287e647 -r 4e3f13fe4c88 configure.in
--- a/configure.in	Mon Feb 07 08:05:27 2011 +0100
+++ b/configure.in	Fri Feb 11 07:52:05 2011 +0100
@@ -3006,6 +3006,9 @@
 #undef HAVE_NATIVE_mpn_addlsh1_n
 #undef HAVE_NATIVE_mpn_addlsh2_n
 #undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
 #undef HAVE_NATIVE_mpn_addmul_1c
 #undef HAVE_NATIVE_mpn_addmul_2
 #undef HAVE_NATIVE_mpn_addmul_3
@@ -3062,6 +3065,9 @@
 #undef HAVE_NATIVE_mpn_rsblsh1_n
 #undef HAVE_NATIVE_mpn_rsblsh2_n
 #undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
 #undef HAVE_NATIVE_mpn_rsh1add_n
 #undef HAVE_NATIVE_mpn_rsh1add_nc
 #undef HAVE_NATIVE_mpn_rsh1sub_n
@@ -3075,6 +3081,9 @@
 #undef HAVE_NATIVE_mpn_sublsh1_n
 #undef HAVE_NATIVE_mpn_sublsh2_n
 #undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
 #undef HAVE_NATIVE_mpn_submul_1c
 #undef HAVE_NATIVE_mpn_udiv_qrnnd
 #undef HAVE_NATIVE_mpn_udiv_qrnnd_r
diff -r 5d639287e647 -r 4e3f13fe4c88 gmp-impl.h
--- a/gmp-impl.h	Mon Feb 07 08:05:27 2011 +0100
+++ b/gmp-impl.h	Fri Feb 11 07:52:05 2011 +0100
@@ -830,7 +830,7 @@
 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
-   returns the borrow out (FIXME 0, 1, 2 or 3).  */
+   returns the borrow out (0, ..., 4).  */
 #define mpn_sublsh2_n __MPN(sublsh2_n)
 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 #define mpn_sublsh2_nc __MPN(sublsh2_nc)
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/atom/aorrlsh2_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorrlsh2_n.asm	Fri Feb 11 07:52:05 2011 +0100
@@ -0,0 +1,42 @@
+dnl  Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+	define(M4_inst,        adcl)
+	define(M4_opp,         subl)
+	define(M4_function,    mpn_addlsh2_n)
+	define(M4_function_c,  mpn_addlsh2_nc)
+',`ifdef(`OPERATION_rsblsh2_n', `
+	define(M4_inst,        sbbl)
+	define(M4_opp,         addl)
+	define(M4_function,    mpn_rsblsh2_n)
+	define(M4_function_c,  mpn_rsblsh2_nc)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/atom/rsblsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/rsblsh1_n.asm	Fri Feb 11 07:52:05 2011 +0100
@@ -0,0 +1,42 @@
+dnl  Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+	define(M4_inst,        adcl)
+	define(M4_opp,         subl)
+	define(M4_function,    mpn_addlsh1_n)
+	define(M4_function_c,  mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+	define(M4_inst,        sbbl)
+	define(M4_opp,         addl)
+	define(M4_function,    mpn_rsblsh1_n)
+	define(M4_function_c,  mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/k7/addlsh1_n.asm
--- a/mpn/x86/k7/addlsh1_n.asm	Mon Feb 07 08:05:27 2011 +0100
+++ b/mpn/x86/k7/addlsh1_n.asm	Fri Feb 11 07:52:05 2011 +0100
@@ -75,7 +75,7 @@
 define(`up',  `%esi')
 define(`vp',  `%ebp')
 
-	mov	$0x20000000, %eax
+	mov	$0x2aaaaaab, %eax
 
 	push	%ebx			FRAME_pushl()
 	movl	PARAM_SIZE, %ebx	C size
@@ -94,9 +94,9 @@
 	push	vp			FRAME_pushl()
 	movl	PARAM_DBLD, vp
 
-	leal	4(,%edx,4), %ecx	C count*4+4 = -(size\8)*4
+	leal	3(%edx,%edx,2), %ecx	C count*3+3 = -(size\6)*3
 	xorl	%edx, %edx
-	leal	(%ebx,%ecx,2), %ebx	C size + (count*4+4)*2 = size % 8
+	leal	(%ebx,%ecx,2), %ebx	C size + (count*3+3)*2 = size % 6
 	orl	%ebx, %ebx
 	jz	L(exact)
 
@@ -106,15 +106,15 @@
 	mov	(vp), %eax
 	adc	%eax, %eax
 	rcr	%edx			C restore 1st saved carry bit
+	lea	4(vp), vp
 	adc	(up), %eax
-	mov	%eax, (rp)
+	lea	4(up), up
 	adc	%edx, %edx		C save a carry bit in edx
-	lea	4(rp), rp
-	lea	4(up), up
-	lea	4(vp), vp
 ifdef(`CPU_P6',`
 	adc	%edx, %edx ')		C save another carry bit in edx
 	decl	%ebx
+	mov	%eax, (rp)
+	lea	4(rp), rp
 	jnz	L(oop)
 	movl	vp, VAR_TMP
 L(exact):
@@ -131,8 +131,6 @@
 	adc	%ebx, %ebx
 	mov	8(vp), %ecx
 	adc	%ecx, %ecx
-	mov	12(vp), vp
-	adc	vp, vp
 
 	rcr	%edx			C restore 1st saved carry bit
 
@@ -142,40 +140,30 @@
 	mov	%ebx, 4(rp)
 	adc	8(up), %ecx
 	mov	%ecx, 8(rp)
-	adc	12(up), vp
-	mov	vp, 12(rp)
 
-	movl	VAR_TMP, vp
+	mov	12(vp), %eax
+	adc	%eax, %eax
+	mov	16(vp), %ebx
+	adc	%ebx, %ebx
+	mov	20(vp), %ecx
+	adc	%ecx, %ecx
 
-	mov	16(vp), %eax
-	adc	%eax, %eax
-	mov	20(vp), %ebx
-	adc	%ebx, %ebx
-	mov	24(vp), %ecx
-	adc	%ecx, %ecx
-	mov	28(vp), vp
-	adc	vp, vp
-
+	lea	24(vp), vp
 	adc	%edx, %edx		C save a carry bit in edx
 
-	adc	16(up), %eax
-	mov	%eax, 16(rp)
-	adc	20(up), %ebx
-	mov	%ebx, 20(rp)
-	adc	24(up), %ecx
-	mov	%ecx, 24(rp)
-	adc	28(up), vp
-	mov	vp, 28(rp)
+	adc	12(up), %eax
+	mov	%eax, 12(rp)
+	adc	16(up), %ebx
+	mov	%ebx, 16(rp)
+	adc	20(up), %ecx
 
-	movl	VAR_TMP, vp
-	lea	32(rp), rp
-	lea	32(up), up
-	lea	32(vp), vp
+	lea	24(up), up
 
 ifdef(`CPU_P6',`
 	adc	%edx, %edx ')		C save another carry bit in edx
+	mov	%ecx, 20(rp)
 	incl	VAR_COUNT
-	movl	vp, VAR_TMP
+	lea	24(rp), rp
 	jne	L(top)
 
 L(end):
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/k7/aorrlshC_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/k7/aorrlshC_n.asm	Fri Feb 11 07:52:05 2011 +0100
@@ -0,0 +1,129 @@
+dnl  AMD K7 mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,


More information about the gmp-commit mailing list