[Gmp-commit] /home/hgfiles/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Feb 5 20:22:48 CET 2011


details:   /home/hgfiles/gmp/rev/19d332f71577
changeset: 13829:19d332f71577
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Feb 05 20:14:57 2011 +0100
description:
New files for Intel SBR.

details:   /home/hgfiles/gmp/rev/e243a052828b
changeset: 13830:e243a052828b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Feb 05 20:21:37 2011 +0100
description:
(mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc, mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare

details:   /home/hgfiles/gmp/rev/f744ec32bf32
changeset: 13831:f744ec32bf32
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Feb 05 20:22:04 2011 +0100
description:
(mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc, mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare

details:   /home/hgfiles/gmp/rev/1712752e4f47
changeset: 13832:1712752e4f47
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Feb 05 20:22:41 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                          |   10 ++
 gmp-impl.h                         |   12 ++
 mpn/asm-defs.m4                    |    6 +
 mpn/x86_64/coreisbr/aorrlsh1_n.asm |   40 +++++++++
 mpn/x86_64/coreisbr/aorrlsh2_n.asm |   40 +++++++++
 mpn/x86_64/coreisbr/aorrlshC_n.asm |  157 +++++++++++++++++++++++++++++++++++++
 mpz/lucnum_ui.c                    |    5 +-
 7 files changed, 268 insertions(+), 2 deletions(-)

diffs (truncated from 373 to 300 lines):

diff -r 7cba880ba762 -r 1712752e4f47 ChangeLog
--- a/ChangeLog	Sat Feb 05 14:51:43 2011 +0100
+++ b/ChangeLog	Sat Feb 05 20:22:41 2011 +0100
@@ -1,5 +1,13 @@
 2011-02-05  Torbjorn Granlund  <tege at gmplib.org>
 
+	* gmp-impl.h (mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc,
+	mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare.
+	* mpn/asm-defs.m4: Likewise.
+
+	* mpn/x86_64/coreisbr/aorrlshC_n.asm: New file.
+	* mpn/x86_64/coreisbr/aorrlsh1_n.asm: New file.
+	* mpn/x86_64/coreisbr/aorrlsh2_n.asm: New file.
+
 	* mpn/x86_64/coreisbr/aors_n.asm: New file, based on old
 	atom/aors_n.asm.
 	* mpn/x86_64/atom/aors_n.asm: Grab coreisbr/aors_n.asm.
@@ -12,6 +20,8 @@
 	* tests/mpn/t-toom6h.c: No tests below MPN_TOOM6H_MIN.
 	* tests/mpn/t-toom8h.c: No tests below MPN_TOOM8H_MIN.
 
+	* mpz/lucnum_ui.c: Use mpn_addlsh2_n.
+	
 2011-02-04  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/atom/rsh1aors_n.asm: Add a MULFUNC_PROLOGUE.
diff -r 7cba880ba762 -r 1712752e4f47 gmp-impl.h
--- a/gmp-impl.h	Sat Feb 05 14:51:43 2011 +0100
+++ b/gmp-impl.h	Sat Feb 05 20:22:41 2011 +0100
@@ -800,11 +800,15 @@
    returns the carry out (0, 1 or 2).  */
 #define mpn_addlsh1_n __MPN(addlsh1_n)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_addlsh1_nc __MPN(addlsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
    returns the carry out (0, ..., 4).  */
 #define mpn_addlsh2_n __MPN(addlsh2_n)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_addlsh2_nc __MPN(addlsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
    returns the carry out (0, ..., 2^k).  */
@@ -815,21 +819,29 @@
    returns the borrow out (0, 1 or 2).  */
 #define mpn_sublsh1_n __MPN(sublsh1_n)
 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_sublsh1_nc __MPN(sublsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
    returns the carry out (-1, 0, 1).  */
 #define mpn_rsblsh1_n __MPN(rsblsh1_n)
 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
    returns the borrow out (FIXME 0, 1, 2 or 3).  */
 #define mpn_sublsh2_n __MPN(sublsh2_n)
 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_sublsh2_nc __MPN(sublsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
    returns the carry out (-1, ..., 3).  */
 #define mpn_rsblsh2_n __MPN(rsblsh2_n)
 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
    returns the carry out (-1, 0, ..., 2^k-1).  */
diff -r 7cba880ba762 -r 1712752e4f47 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4	Sat Feb 05 14:51:43 2011 +0100
+++ b/mpn/asm-defs.m4	Sat Feb 05 20:22:41 2011 +0100
@@ -1309,7 +1309,9 @@
 define_mpn(add_n)
 define_mpn(add_nc)
 define_mpn(addlsh1_n)
+define_mpn(addlsh1_nc)
 define_mpn(addlsh2_n)
+define_mpn(addlsh2_nc)
 define_mpn(addlsh_n)
 define_mpn(addmul_1)
 define_mpn(addmul_1c)
@@ -1392,7 +1394,9 @@
 define_mpn(redc_1)
 define_mpn(redc_2)
 define_mpn(rsblsh1_n)
+define_mpn(rsblsh1_nc)
 define_mpn(rsblsh2_n)
+define_mpn(rsblsh2_nc)
 define_mpn(rsblsh_n)
 define_mpn(rsh1add_n)
 define_mpn(rsh1add_nc)
@@ -1408,7 +1412,9 @@
 define_mpn(sqr_diag_addlsh1)
 define_mpn(sub_n)
 define_mpn(sublsh1_n)
+define_mpn(sublsh1_nc)
 define_mpn(sublsh2_n)
+define_mpn(sublsh2_nc)
 define_mpn(sqrtrem)
 define_mpn(sub)
 define_mpn(sub_1)
diff -r 7cba880ba762 -r 1712752e4f47 mpn/x86_64/coreisbr/aorrlsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/coreisbr/aorrlsh1_n.asm	Sat Feb 05 20:22:41 2011 +0100
@@ -0,0 +1,40 @@
+dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n', `
+	define(ADDSUB,	add)
+	define(ADCSBB,	adc)
+	define(func_n,	mpn_addlsh1_n)
+	define(func_nc,	mpn_addlsh1_nc)')
+ifdef(`OPERATION_rsblsh1_n', `
+	define(ADDSUB,	sub)
+	define(ADCSBB,	sbb)
+	define(func_n,	mpn_rsblsh1_n)
+	define(func_nc,	mpn_rsblsh1_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff -r 7cba880ba762 -r 1712752e4f47 mpn/x86_64/coreisbr/aorrlsh2_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/coreisbr/aorrlsh2_n.asm	Sat Feb 05 20:22:41 2011 +0100
@@ -0,0 +1,40 @@
+dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n', `
+	define(ADDSUB,	add)
+	define(ADCSBB,	adc)
+	define(func_n,	mpn_addlsh2_n)
+	define(func_nc,	mpn_addlsh2_nc)')
+ifdef(`OPERATION_rsblsh2_n', `
+	define(ADDSUB,	sub)
+	define(ADCSBB,	sbb)
+	define(func_n,	mpn_rsblsh2_n)
+	define(func_nc,	mpn_rsblsh2_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff -r 7cba880ba762 -r 1712752e4f47 mpn/x86_64/coreisbr/aorrlshC_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/coreisbr/aorrlshC_n.asm	Sat Feb 05 20:22:41 2011 +0100
@@ -0,0 +1,157 @@
+dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl  Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+C	     cycles/limb
+C AMD K8,K9	 ?
+C AMD K10	 ?
+C Intel P4	 ?
+C Intel core2	 3.25
+C Intel NHM	 4
+C Intel SBR	 2  C (or 1.95 when L(top)'s alignment = 16 (mod 32))
+C Intel atom	 ?
+C VIA nano	 ?
+
+C This code probably runs close to optimally on Sandy Bridge, and reasonably
+C well on Core 2, but it runs poorly on all other processors, including Nehalem
+C (NHM).
+C
+C The carry handling is prepared for _nc variants.  If we choose to 
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`n',	`%rcx')
+define(`cy',	`%r8')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func_nc)
+	push	%rbp
+	mov	cy, %rax
+	neg	%rax			C set msb on carry
+	xor	R32(%rbp), R32(%rbp)	C limb carry
+	mov	(vp), %r8
+	shrd	$RSH, %r8, %rbp
+	mov	R32(n), R32(%r9)
+	and	$3, R32(%r9)
+	je	L(b00)
+	cmp	$2, R32(%r9)
+	jc	L(b01)
+	je	L(b10)
+	jmp	L(b11)
+EPILOGUE()
+
+	ALIGN(16)
+PROLOGUE(func_n)
+	push	%rbp
+	xor	R32(%rbp), R32(%rbp)	C limb carry
+	mov	(vp), %r8
+	shrd	$RSH, %r8, %rbp
+	mov	R32(n), R32(%rax)
+	and	$3, R32(%rax)
+	je	L(b00)
+	cmp	$2, R32(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	mov	8(vp), %r9
+	shrd	$RSH, %r9, %r8
+	mov	16(vp), %r10
+	shrd	$RSH, %r10, %r9
+	add	R32(%rax), R32(%rax)	C init carry flag
+	ADCSBB	(up), %rbp
+	ADCSBB	8(up), %r8
+	ADCSBB	16(up), %r9
+	mov	%rbp, (rp)
+	mov	%r8, 8(rp)
+	mov	%r9, 16(rp)
+	mov	%r10, %rbp
+	lea	24(up), up
+	lea	24(vp), vp
+	lea	24(rp), rp
+	sbb	R32(%rax), R32(%rax)	C save carry flag
+	sub	$3, n
+	ja	L(top)
+	jmp	L(end)
+
+L(b01):	add	R32(%rax), R32(%rax)	C init carry flag
+	ADCSBB	(up), %rbp
+	mov	%rbp, (rp)
+	mov	%r8, %rbp


More information about the gmp-commit mailing list