[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Mar 3 09:29:07 CET 2011


details:   /var/hg/gmp/rev/524c209c9e6a
changeset: 13985:524c209c9e6a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 03 09:07:07 2011 +0100
description:
Comment fixes.

details:   /var/hg/gmp/rev/9fde52331a39
changeset: 13986:9fde52331a39
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 03 09:11:07 2011 +0100
description:
Disable mpn_rsblsh_n due to carry-in issues.

details:   /var/hg/gmp/rev/16a9ccb257a7
changeset: 13987:16a9ccb257a7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 03 09:29:02 2011 +0100
description:
(mpn_addlsh_nc, mpn_rsblsh_nc): Declare.

diffstat:

 ChangeLog                          |  13 ++++++++++---
 gmp-impl.h                         |   8 ++++++--
 mpn/asm-defs.m4                    |   2 ++
 mpn/x86_64/coreinhm/aorrlsh_n.asm  |   8 +++++---
 mpn/x86_64/coreisbr/aorrlsh2_n.asm |   4 +++-
 mpn/x86_64/coreisbr/aorrlshC_n.asm |   8 +++-----
 mpn/x86_64/coreisbr/aorrlsh_n.asm  |   8 +++++---
 7 files changed, 34 insertions(+), 17 deletions(-)

diffs (154 lines):

diff -r 58bf25d0d0a1 -r 16a9ccb257a7 ChangeLog
--- a/ChangeLog	Thu Mar 03 00:10:19 2011 +0100
+++ b/ChangeLog	Thu Mar 03 09:29:02 2011 +0100
@@ -1,9 +1,16 @@
 2011-03-03  Torbjorn Granlund  <tege at gmplib.org>
 
+	* gmp-impl.h (mpn_addlsh_nc, mpn_rsblsh_nc): Declare.
+	* mpn/asm-defs.m4: Likewise.
+
+	* mpn/x86_64/coreisbr/aorrlsh_n.asm: Disable mpn_rsblsh_n due to
+	carry-in issues.
+	* mpn/x86_64/coreinhm/aorrlsh_n.asm: Likewise.
+	* mpn/x86_64/coreisbr/aorrlsh2_n.asm: Likewise.
+
+2011-03-02  Torbjorn Granlund  <tege at gmplib.org>
+
 	* mpn/x86_64/coreinhm/aorrlsh_n.asm: New file.
-
-2011-03-02  Torbjorn Granlund  <tege at gmplib.org>
-
 	* mpn/x86_64/coreisbr/aorrlsh_n.asm: New file.
 
 2011-03-01  Niels Möller  <nisse at lysator.liu.se>
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 gmp-impl.h
--- a/gmp-impl.h	Thu Mar 03 00:10:19 2011 +0100
+++ b/gmp-impl.h	Thu Mar 03 09:29:02 2011 +0100
@@ -813,7 +813,9 @@
 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
    returns the carry out (0, ..., 2^k).  */
 #define mpn_addlsh_n __MPN(addlsh_n)
-  __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+#define mpn_addlsh_nc __MPN(addlsh_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
 
 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
    returns the borrow out (0, 1 or 2).  */
@@ -839,7 +841,7 @@
 /* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and
    returns the carry out (0, ..., 2^k).  */
 #define mpn_sublsh_n __MPN(sublsh_n)
-  __GMP_DECLSPEC mp_limb_t mpn_sublsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
 
 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
    returns the carry out (-1, ..., 3).  */
@@ -852,6 +854,8 @@
    returns the carry out (-1, 0, ..., 2^k-1).  */
 #define mpn_rsblsh_n __MPN(rsblsh_n)
 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+#define mpn_rsblsh_nc __MPN(rsblsh_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
 
 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
    and returns the bit rshifted out (0 or 1).  */
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4	Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/asm-defs.m4	Thu Mar 03 09:29:02 2011 +0100
@@ -1313,6 +1313,7 @@
 define_mpn(addlsh2_n)
 define_mpn(addlsh2_nc)
 define_mpn(addlsh_n)
+define_mpn(addlsh_nc)
 define_mpn(addmul_1)
 define_mpn(addmul_1c)
 define_mpn(addmul_2)
@@ -1398,6 +1399,7 @@
 define_mpn(rsblsh2_n)
 define_mpn(rsblsh2_nc)
 define_mpn(rsblsh_n)
+define_mpn(rsblsh_nc)
 define_mpn(rsh1add_n)
 define_mpn(rsh1add_nc)
 define_mpn(rsh1sub_n)
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreinhm/aorrlsh_n.asm
--- a/mpn/x86_64/coreinhm/aorrlsh_n.asm	Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreinhm/aorrlsh_n.asm	Thu Mar 03 09:29:02 2011 +0100
@@ -33,8 +33,8 @@
 C Intel atom	 ?
 C VIA nano	 ?
 
-C The inner-loop probably runs close to optimally on Nehalem using 4-way
-C unrolling.  The rest of the code is quite crude, and could perhaps be made
+C The inner-loop probably runs close to optimally on Nehalem (using 4-way
+C unrolling).  The rest of the code is quite crude, and could perhaps be made
 C both smaller and faster.
 
 C INPUT PARAMETERS
@@ -58,7 +58,9 @@
 	define(func_n,	mpn_rsblsh_n)
 	define(func_nc,	mpn_rsblsh_nc)')
 
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n mpn_rsblsh_nc)
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
 
 ASM_START()
 	TEXT
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreisbr/aorrlsh2_n.asm
--- a/mpn/x86_64/coreisbr/aorrlsh2_n.asm	Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreisbr/aorrlsh2_n.asm	Thu Mar 03 09:29:02 2011 +0100
@@ -36,5 +36,7 @@
 	define(func_n,	mpn_rsblsh2_n)
 	define(func_nc,	mpn_rsblsh2_nc)')
 
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+C mpn_rsblsh2_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh2_nc
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n)
 include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreisbr/aorrlshC_n.asm
--- a/mpn/x86_64/coreisbr/aorrlshC_n.asm	Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreisbr/aorrlshC_n.asm	Thu Mar 03 09:29:02 2011 +0100
@@ -29,11 +29,9 @@
 C Intel atom	 ?
 C VIA nano	 ?
 
-C This code probably runs close to optimally on Sandy Bridge, and reasonably
-C well on Core 2, but it runs poorly on all other processors, including Nehalem
-C (NHM).
-C
-C The carry handling is prepared for _nc variants.  If we choose to 
+C This code probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling).  It also runs reasonably well on Core 2, but it runs poorly on
+C all other processors, including Nehalem.
 
 C INPUT PARAMETERS
 define(`rp',	`%rdi')
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreisbr/aorrlsh_n.asm
--- a/mpn/x86_64/coreisbr/aorrlsh_n.asm	Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreisbr/aorrlsh_n.asm	Thu Mar 03 09:29:02 2011 +0100
@@ -33,8 +33,8 @@
 C Intel atom	 ?
 C VIA nano	 ?
 
-C The inner-loop probably runs close to optimally on Sandy Bridge using 4-way
-C unrolling.  The rest of the code is quite crude, and could perhaps be made
+C The inner-loop probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling).  The rest of the code is quite crude, and could perhaps be made
 C both smaller and faster.
 
 C INPUT PARAMETERS
@@ -58,7 +58,9 @@
 	define(func_n,	mpn_rsblsh_n)
 	define(func_nc,	mpn_rsblsh_nc)')
 
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n mpn_rsblsh_nc)
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
 
 ASM_START()
 	TEXT


More information about the gmp-commit mailing list