[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Mar 3 09:29:07 CET 2011
details: /var/hg/gmp/rev/524c209c9e6a
changeset: 13985:524c209c9e6a
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 03 09:07:07 2011 +0100
description:
Comment fixes.
details: /var/hg/gmp/rev/9fde52331a39
changeset: 13986:9fde52331a39
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 03 09:11:07 2011 +0100
description:
Disable mpn_rsblsh_n due to carry-in issues.
details: /var/hg/gmp/rev/16a9ccb257a7
changeset: 13987:16a9ccb257a7
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 03 09:29:02 2011 +0100
description:
(mpn_addlsh_nc, mpn_rsblsh_nc): Declare.
diffstat:
ChangeLog | 13 ++++++++++---
gmp-impl.h | 8 ++++++--
mpn/asm-defs.m4 | 2 ++
mpn/x86_64/coreinhm/aorrlsh_n.asm | 8 +++++---
mpn/x86_64/coreisbr/aorrlsh2_n.asm | 4 +++-
mpn/x86_64/coreisbr/aorrlshC_n.asm | 8 +++-----
mpn/x86_64/coreisbr/aorrlsh_n.asm | 8 +++++---
7 files changed, 34 insertions(+), 17 deletions(-)
diffs (154 lines):
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 ChangeLog
--- a/ChangeLog Thu Mar 03 00:10:19 2011 +0100
+++ b/ChangeLog Thu Mar 03 09:29:02 2011 +0100
@@ -1,9 +1,16 @@
2011-03-03 Torbjorn Granlund <tege at gmplib.org>
+ * gmp-impl.h (mpn_addlsh_nc, mpn_rsblsh_nc): Declare.
+ * mpn/asm-defs.m4: Likewise.
+
+ * mpn/x86_64/coreisbr/aorrlsh_n.asm: Disable mpn_rsblsh_n due to
+ carry-in issues.
+ * mpn/x86_64/coreinhm/aorrlsh_n.asm: Likewise.
+ * mpn/x86_64/coreisbr/aorrlsh2_n.asm: Likewise.
+
+2011-03-02 Torbjorn Granlund <tege at gmplib.org>
+
* mpn/x86_64/coreinhm/aorrlsh_n.asm: New file.
-
-2011-03-02 Torbjorn Granlund <tege at gmplib.org>
-
* mpn/x86_64/coreisbr/aorrlsh_n.asm: New file.
2011-03-01 Niels Möller <nisse at lysator.liu.se>
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 gmp-impl.h
--- a/gmp-impl.h Thu Mar 03 00:10:19 2011 +0100
+++ b/gmp-impl.h Thu Mar 03 09:29:02 2011 +0100
@@ -813,7 +813,9 @@
/* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
returns the carry out (0, ..., 2^k). */
#define mpn_addlsh_n __MPN(addlsh_n)
- __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+#define mpn_addlsh_nc __MPN(addlsh_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
/* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
returns the borrow out (0, 1 or 2). */
@@ -839,7 +841,7 @@
/* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and
returns the carry out (0, ..., 2^k). */
#define mpn_sublsh_n __MPN(sublsh_n)
- __GMP_DECLSPEC mp_limb_t mpn_sublsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
/* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
returns the carry out (-1, ..., 3). */
@@ -852,6 +854,8 @@
returns the carry out (-1, 0, ..., 2^k-1). */
#define mpn_rsblsh_n __MPN(rsblsh_n)
__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+#define mpn_rsblsh_nc __MPN(rsblsh_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
/* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
and returns the bit rshifted out (0 or 1). */
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4 Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/asm-defs.m4 Thu Mar 03 09:29:02 2011 +0100
@@ -1313,6 +1313,7 @@
define_mpn(addlsh2_n)
define_mpn(addlsh2_nc)
define_mpn(addlsh_n)
+define_mpn(addlsh_nc)
define_mpn(addmul_1)
define_mpn(addmul_1c)
define_mpn(addmul_2)
@@ -1398,6 +1399,7 @@
define_mpn(rsblsh2_n)
define_mpn(rsblsh2_nc)
define_mpn(rsblsh_n)
+define_mpn(rsblsh_nc)
define_mpn(rsh1add_n)
define_mpn(rsh1add_nc)
define_mpn(rsh1sub_n)
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreinhm/aorrlsh_n.asm
--- a/mpn/x86_64/coreinhm/aorrlsh_n.asm Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreinhm/aorrlsh_n.asm Thu Mar 03 09:29:02 2011 +0100
@@ -33,8 +33,8 @@
C Intel atom ?
C VIA nano ?
-C The inner-loop probably runs close to optimally on Nehalem using 4-way
-C unrolling. The rest of the code is quite crude, and could perhaps be made
+C The inner-loop probably runs close to optimally on Nehalem (using 4-way
+C unrolling). The rest of the code is quite crude, and could perhaps be made
C both smaller and faster.
C INPUT PARAMETERS
@@ -58,7 +58,9 @@
define(func_n, mpn_rsblsh_n)
define(func_nc, mpn_rsblsh_nc)')
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n mpn_rsblsh_nc)
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
ASM_START()
TEXT
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreisbr/aorrlsh2_n.asm
--- a/mpn/x86_64/coreisbr/aorrlsh2_n.asm Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreisbr/aorrlsh2_n.asm Thu Mar 03 09:29:02 2011 +0100
@@ -36,5 +36,7 @@
define(func_n, mpn_rsblsh2_n)
define(func_nc, mpn_rsblsh2_nc)')
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+C mpn_rsblsh2_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh2_nc
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n)
include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreisbr/aorrlshC_n.asm
--- a/mpn/x86_64/coreisbr/aorrlshC_n.asm Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreisbr/aorrlshC_n.asm Thu Mar 03 09:29:02 2011 +0100
@@ -29,11 +29,9 @@
C Intel atom ?
C VIA nano ?
-C This code probably runs close to optimally on Sandy Bridge, and reasonably
-C well on Core 2, but it runs poorly on all other processors, including Nehalem
-C (NHM).
-C
-C The carry handling is prepared for _nc variants. If we choose to
+C This code probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling). It also runs reasonably well on Core 2, but it runs poorly on
+C all other processors, including Nehalem.
C INPUT PARAMETERS
define(`rp', `%rdi')
diff -r 58bf25d0d0a1 -r 16a9ccb257a7 mpn/x86_64/coreisbr/aorrlsh_n.asm
--- a/mpn/x86_64/coreisbr/aorrlsh_n.asm Thu Mar 03 00:10:19 2011 +0100
+++ b/mpn/x86_64/coreisbr/aorrlsh_n.asm Thu Mar 03 09:29:02 2011 +0100
@@ -33,8 +33,8 @@
C Intel atom ?
C VIA nano ?
-C The inner-loop probably runs close to optimally on Sandy Bridge using 4-way
-C unrolling. The rest of the code is quite crude, and could perhaps be made
+C The inner-loop probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling). The rest of the code is quite crude, and could perhaps be made
C both smaller and faster.
C INPUT PARAMETERS
@@ -58,7 +58,9 @@
define(func_n, mpn_rsblsh_n)
define(func_nc, mpn_rsblsh_nc)')
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n mpn_rsblsh_nc)
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
ASM_START()
TEXT
More information about the gmp-commit
mailing list