[Gmp-commit] /var/hg/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Feb 21 22:18:05 CET 2011


details:   /var/hg/gmp/rev/f273f3687988
changeset: 13874:f273f3687988
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sun Feb 20 11:15:32 2011 +0100
description:
sublsh2 for Atom/32.

details:   /var/hg/gmp/rev/8da5a004ff8f
changeset: 13875:8da5a004ff8f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sun Feb 20 11:31:54 2011 +0100
description:
lshift for atom/32.

details:   /var/hg/gmp/rev/d82d695896cd
changeset: 13876:d82d695896cd
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Feb 21 22:06:51 2011 +0100
description:
Special lshift.1 for atom/32.

details:   /var/hg/gmp/rev/c5b80d4df0ed
changeset: 13877:c5b80d4df0ed
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Feb 21 22:17:00 2011 +0100
description:
Special lhisft1 for atom/32

details:   /var/hg/gmp/rev/a8c2fdcab134
changeset: 13878:a8c2fdcab134
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Feb 21 22:17:58 2011 +0100
description:
Merge

diffstat:

 ChangeLog                           |   28 ++++
 gmp-impl.h                          |    2 +-
 mpn/x86/aors_n.asm                  |   10 +-
 mpn/x86/aorsmul_1.asm               |   20 +-
 mpn/x86/atom/addlsh1_n.asm          |   23 ---
 mpn/x86/atom/aorrlsh1_n.asm         |   42 ++++++
 mpn/x86/atom/aorslshC_n.asm         |  219 ++++++++++++++++++++++++++++++++++++
 mpn/x86/atom/aorsmul_1.asm          |   14 +-
 mpn/x86/atom/lshift.asm             |  170 +++++++++++++++++++++++++++-
 mpn/x86/atom/rsblsh1_n.asm          |   42 ------
 mpn/x86/atom/rshift.asm             |   26 ++--
 mpn/x86/atom/sublsh2_n.asm          |   46 +++++++
 mpn/x86/bdiv_dbm1c.asm              |   40 +++--
 mpn/x86/copyd.asm                   |   10 +-
 mpn/x86/copyi.asm                   |   10 +-
 mpn/x86/k6/aorsmul_1.asm            |   14 +-
 mpn/x86/k6/mul_1.asm                |   14 +-
 mpn/x86/k7/addlsh1_n.asm            |   10 +-
 mpn/x86/k7/aorsmul_1.asm            |   12 +-
 mpn/x86/k7/invert_limb.asm          |   10 +-
 mpn/x86/k7/mod_1_1.asm              |   10 +-
 mpn/x86/k7/mod_1_4.asm              |   10 +-
 mpn/x86/k7/mul_1.asm                |   10 +-
 mpn/x86/k7/sublsh1_n.asm            |   10 +-
 mpn/x86/lshift.asm                  |   12 +-
 mpn/x86/mod_34lsub1.asm             |   10 +-
 mpn/x86/mul_1.asm                   |   20 +-
 mpn/x86/mul_basecase.asm            |   10 +-
 mpn/x86/p6/aors_n.asm               |    8 +-
 mpn/x86/p6/aorsmul_1.asm            |   14 +-
 mpn/x86/pentium4/sse2/add_n.asm     |    8 +-
 mpn/x86/pentium4/sse2/addlsh1_n.asm |   10 +-
 mpn/x86/pentium4/sse2/addmul_1.asm  |   16 +-
 mpn/x86/pentium4/sse2/mod_1_1.asm   |   12 +-
 mpn/x86/pentium4/sse2/mod_1_4.asm   |   14 +-
 mpn/x86/pentium4/sse2/mul_1.asm     |   10 +-
 mpn/x86/pentium4/sse2/popcount.asm  |   34 ++--
 mpn/x86/pentium4/sse2/sub_n.asm     |    8 +-
 mpn/x86/pentium4/sse2/submul_1.asm  |   14 +-
 mpn/x86/rshift.asm                  |   12 +-
 mpn/x86/sqr_basecase.asm            |   10 +-
 mpq/aors.c                          |   12 +-
 mpz/divegcd.c                       |   55 ++++++++-
 43 files changed, 789 insertions(+), 302 deletions(-)

diffs (truncated from 1699 to 300 lines):

diff -r 153ddf846890 -r a8c2fdcab134 ChangeLog
--- a/ChangeLog	Sun Feb 20 10:59:12 2011 +0100
+++ b/ChangeLog	Mon Feb 21 22:17:58 2011 +0100
@@ -1,7 +1,35 @@
+2011-02-21  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86/atom/aorrlsh1_n.asm: New file (code from rsblsh_1, slightly
+	slower for addlsh_1 for large operands, but much faster for small).
+	* mpn/x86/atom/addlsh1_n.asm: Remove.
+	* mpn/x86/atom/rsblsh1_n.asm: Remove.
+
+2011-02-20  Marc Glisse  <marc.glisse at inria.fr>
+
+	* mpq/aors.c: Rewrite to remove redundant division.
+
+2011-02-20  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86/atom/lshift.asm: New file.
+	* mpn/x86/atom/rshift.asm: Normalise mnemonic usage.
+
+	* gmp-impl.h (mpn_divexact_by7): Relax inclusion condition.
+
+	* mpz/divegcd.c (mpz_divexact_by5): New conditionally enabled function.
+	(mpz_divexact_by3): Wrap inside appropriate conditions.
+	(mpz_divexact_gcd): Rewrite.
+
+	* mpn/x86/bdiv_dbm1c.asm: Save a jump.
+
 2011-02-20 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
+	* mpn/x86/atom/aorslshC_n.asm: New file.
+	* mpn/x86/atom/sublsh2_n.asm: New file.
+
 	* mpn/x86/atom/aors_n.asm: New code.
 	* mpn/x86/atom/rshift.asm: Atom64 code adapted to 32-bit.
+	* mpn/x86/atom/lshift.asm: Likewise.
 
 2011-02-19  Torbjorn Granlund  <tege at gmplib.org>
 
diff -r 153ddf846890 -r a8c2fdcab134 gmp-impl.h
--- a/gmp-impl.h	Sun Feb 20 10:59:12 2011 +0100
+++ b/gmp-impl.h	Mon Feb 21 22:17:58 2011 +0100
@@ -1371,7 +1371,7 @@
   (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
 #endif
 
-#if GMP_NUMB_BITS % 6 == 0
+#if GMP_NUMB_BITS % 3 == 0
 #define mpn_divexact_by7(dst,src,size) \
   (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
 #endif
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/aors_n.asm
--- a/mpn/x86/aors_n.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aors_n.asm	Mon Feb 21 22:17:58 2011 +0100
@@ -22,11 +22,11 @@
 
 
 C     cycles/limb
-C P5:   3.375
-C P6:   3.125
-C K6:   3.5
-C K7:   2.25
-C P4:   8.75
+C P5	3.375
+C P6	3.125
+C K6	3.5
+C K7	2.25
+C P4	8.75
 
 
 ifdef(`OPERATION_add_n',`
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/aorsmul_1.asm
--- a/mpn/x86/aorsmul_1.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aorsmul_1.asm	Mon Feb 21 22:17:58 2011 +0100
@@ -22,19 +22,19 @@
 include(`../config.m4')
 
 
-C                           cycles/limb
-C P5:                           14.75
-C P6 model 0-8,10-12)            7.5
+C			    cycles/limb
+C P5				14.75
+C P6 model 0-8,10-12		 7.5
 C P6 model 9  (Banias)		 6.7
-C P6 model 13 (Dothan)           6.75
-C P4 model 0  (Willamette)      24.0
-C P4 model 1  (?)               24.0
-C P4 model 2  (Northwood)       24.0
+C P6 model 13 (Dothan)		 6.75
+C P4 model 0  (Willamette)	24.0
+C P4 model 1  (?)		24.0
+C P4 model 2  (Northwood)	24.0
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C K6:                           12.5
-C K7:                            5.25
-C K8:
+C AMD K6			12.5
+C AMD K7			 5.25
+C AMD K8
 
 
 ifdef(`OPERATION_addmul_1',`
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/atom/addlsh1_n.asm
--- a/mpn/x86/atom/addlsh1_n.asm	Sun Feb 20 10:59:12 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-dnl  Intel Atom mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-dnl
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n)
-include_mpn(`x86/k7/addlsh1_n.asm')
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/atom/aorrlsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorrlsh1_n.asm	Mon Feb 21 22:17:58 2011 +0100
@@ -0,0 +1,42 @@
+dnl  Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+	define(M4_inst,        adc)
+	define(M4_opp,         sub)
+	define(M4_function,    mpn_addlsh1_n)
+	define(M4_function_c,  mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+	define(M4_inst,        sbb)
+	define(M4_opp,         add)
+	define(M4_function,    mpn_rsblsh1_n)
+	define(M4_function_c,  mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/atom/aorslshC_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorslshC_n.asm	Mon Feb 21 22:17:58 2011 +0100
@@ -0,0 +1,219 @@
+dnl  Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_ip1_addlshC_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_ip1_addlshC_nc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C				mp_limb_t carry);
+C mp_limb_t mpn_ip1_sublshC_n (mp_ptr dst, mp_srcptr src, mp_size_t size,);
+C mp_limb_t mpn_ip1_sublshC_nc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C				mp_signed_limb_t borrow);
+
+defframe(PARAM_CORB,	16)
+defframe(PARAM_SIZE,	12)
+defframe(PARAM_SRC,	 8)
+defframe(PARAM_DST,	 4)
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t borrow);
+
+C if src1 == dst, _ip1 is used
+
+defframe(GPARAM_CORB,	20)
+defframe(GPARAM_SIZE,	16)
+defframe(GPARAM_SRC2,	12)
+
+dnl  re-use parameter space
+define(SAVE_EBP,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp',  `%edi')
+define(`up',  `%esi')
+
+ASM_START()
+	TEXT
+	ALIGN(8)
+
+PROLOGUE(M4_ip_function_c)
+deflit(`FRAME',0)
+	movl	PARAM_CORB, %ecx
+	movl	%ecx, %edx
+	shr	$LSH, %edx
+	andl	$1, %edx
+	M4_opp	%edx, %ecx
+	jmp	L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_ip_function)
+deflit(`FRAME',0)
+
+	xor	%ecx, %ecx
+	xor	%edx, %edx
+L(start_nc):
+	push	rp			FRAME_pushl()
+	mov	PARAM_DST, rp
+	mov	up, SAVE_UP
+	mov	PARAM_SRC, up
+	mov	%ebx, SAVE_EBX
+	mov	PARAM_SIZE, %ebx	C size
+L(inplace):
+	incl	%ebx			C size + 1
+	shr	%ebx			C (size+1)\2
+	mov	%ebp, SAVE_EBP
+	jnc	L(entry)		C size odd
+
+	add	%edx, %edx		C size even
+	mov	%ecx, %ebp
+	mov	(up), %ecx
+	lea	-4(rp), rp
+	lea	(%ebp,%ecx,M), %eax
+	lea	4(up), up
+	jmp	L(enteven)
+
+	ALIGN(16)
+L(oop):
+	lea	(%ecx,%eax,M), %ebp
+	shr	$RSH, %eax
+	mov	4(up), %ecx
+	add	%edx, %edx
+	lea 	8(up), up
+	M4_inst	%ebp, (rp)
+	lea	(%eax,%ecx,M), %eax
+
+L(enteven):
+	M4_inst	%eax, 4(rp)
+	lea	8(rp), rp
+
+	sbb	%edx, %edx
+	shr	$RSH, %ecx
+
+L(entry):
+	mov	(up), %eax
+	decl	%ebx


More information about the gmp-commit mailing list