[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Mar 11 22:33:41 CET 2011
details: /var/hg/gmp/rev/a7959325467f
changeset: 14037:a7959325467f
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 10 23:35:58 2011 +0100
description:
*** empty log message ***
details: /var/hg/gmp/rev/2dc198f6c8d1
changeset: 14038:2dc198f6c8d1
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Mar 11 22:33:39 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 24 ++++
configure.in | 6 +
gmp-impl.h | 32 ++++++-
mpn/asm-defs.m4 | 7 +
mpn/generic/toom_interpolate_5pts.c | 4 +-
mpn/generic/toom_interpolate_6pts.c | 8 +-
mpn/generic/toom_interpolate_8pts.c | 4 +-
mpn/x86/atom/aorslshC_n.asm | 8 +-
mpn/x86/atom/sublsh1_n.asm | 2 +-
mpn/x86/atom/sublsh2_n.asm | 10 +-
mpn/x86/k7/sublsh1_n.asm | 174 +++++++----------------------------
tests/devel/try.c | 80 ++++++++++++++++-
tests/refmpn.c | 60 ++++++++++++
tests/tests.h | 9 +
tune/common.c | 42 ++++++++
tune/speed.c | 18 +++
tune/speed.h | 6 +
17 files changed, 335 insertions(+), 159 deletions(-)
diffs (truncated from 840 to 300 lines):
diff -r 747f8faeb304 -r 2dc198f6c8d1 ChangeLog
--- a/ChangeLog Thu Mar 10 22:31:09 2011 +0100
+++ b/ChangeLog Fri Mar 11 22:33:39 2011 +0100
@@ -1,7 +1,31 @@
+2011-03-11 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * gmp-impl.h (mpn_sublsh1_n_ip1): Declare.
+ * mpn/generic/toom_interpolate_5pts.c: Use mpn_sublsh1_n_ip1.
+
+ * tests/devel/try.c: Tests for {add,sub}lsh*_n_ip[12].
+ * tests/refmpn.c: New reference for mpn_{add,sub}lsh*_n_ip[12].
+ * tests/tests.h: Declarations for reference functions above.
+
+ * tune/common.c: New speed_mpn_{add,sub}lsh*_n_ip[12] functions.
+ * tune/speed.h: Prototypes for functions above.
+ * tune/speed.c: Support for mpn_{add,sub}lsh*_n_ip[12].
+
+ * mpn/x86/k7/sublsh1_n.asm: Replaced generic sublsh1 code with faster _ip1.
+ * mpn/x86/atom/sublsh1_n.asm: Changed PROLOGUE accordingly.
+
2011-03-10 Marc Glisse <marc.glisse at inria.fr>
* tests/cxx/t-istream.cc: Explicit conversion to streampos.
+2011-03-10 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86/atom/sse2/mul_basecase.asm: Suppress wind-down rp updates.
+
+ * Move new aorrlsh_n.asm to new k8 dir. Revert
+ mpn/x86_64/aorrlsh_n.asm.
+ * configure.in: Setup path for new k8 directory.
+
2011-03-10 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/x86/pentium4/sse2/bdiv_dbm1c.asm: New file, was in atom.
diff -r 747f8faeb304 -r 2dc198f6c8d1 configure.in
--- a/configure.in Thu Mar 10 22:31:09 2011 +0100
+++ b/configure.in Fri Mar 11 22:33:39 2011 +0100
@@ -3078,6 +3078,12 @@
#undef HAVE_NATIVE_mpn_sublsh1_nc
#undef HAVE_NATIVE_mpn_sublsh2_nc
#undef HAVE_NATIVE_mpn_sublsh_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh_nc_ip1
#undef HAVE_NATIVE_mpn_submul_1c
#undef HAVE_NATIVE_mpn_udiv_qrnnd
#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
diff -r 747f8faeb304 -r 2dc198f6c8d1 gmp-impl.h
--- a/gmp-impl.h Thu Mar 10 22:31:09 2011 +0100
+++ b/gmp-impl.h Fri Mar 11 22:33:39 2011 +0100
@@ -818,11 +818,25 @@
__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
/* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
- returns the borrow out (0, 1 or 2). */
+ returns the borrow out (0, 1 or 2). Use _ip1 when a=c. */
#define mpn_sublsh1_n __MPN(sublsh1_n)
__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
#define mpn_sublsh1_nc __MPN(sublsh1_nc)
__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+#if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
+#define mpn_sublsh1_n_ip1(dst,src,n) mpn_sublsh1_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_sublsh1_n_ip1 1
+#else
+#define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#define mpn_sublsh1_nc_ip1(dst,src,n,c) mpn_sublsh1_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_sublsh1_nc_ip1 1
+#else
+#define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+#endif
/* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
returns the carry out (-1, 0, 1). */
@@ -832,11 +846,25 @@
__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
- returns the borrow out (0, ..., 4). */
+ returns the borrow out (0, ..., 4). Use _ip1 when a=c. */
#define mpn_sublsh2_n __MPN(sublsh2_n)
__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
#define mpn_sublsh2_nc __MPN(sublsh2_nc)
__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+#if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define mpn_sublsh2_n_ip1(dst,src,n) mpn_sublsh2_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_sublsh2_n_ip1 1
+#else
+#define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#define mpn_sublsh2_nc_ip1(dst,src,n,c) mpn_sublsh2_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_sublsh2_nc_ip1 1
+#else
+#define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+#endif
/* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and
returns the carry out (0, ..., 2^k). */
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4 Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/asm-defs.m4 Fri Mar 11 22:33:39 2011 +0100
@@ -1415,9 +1415,16 @@
define_mpn(sub_n)
define_mpn(sublsh1_n)
define_mpn(sublsh1_nc)
+define_mpn(sublsh1_n_ip1)
+define_mpn(sublsh1_nc_ip1)
define_mpn(sublsh2_n)
define_mpn(sublsh2_nc)
+define_mpn(sublsh2_n_ip1)
+define_mpn(sublsh2_nc_ip1)
define_mpn(sublsh_n)
+define_mpn(sublsh_nc)
+define_mpn(sublsh_n_ip1)
+define_mpn(sublsh_nc_ip1)
define_mpn(sqrtrem)
define_mpn(sub)
define_mpn(sub_1)
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/generic/toom_interpolate_5pts.c
--- a/mpn/generic/toom_interpolate_5pts.c Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/generic/toom_interpolate_5pts.c Fri Mar 11 22:33:39 2011 +0100
@@ -126,8 +126,8 @@
result is v2 >= 0 */
saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */
vinf[0] = vinf0; /* Set the right value for vinf0 */
-#ifdef HAVE_NATIVE_mpn_sublsh1_n
- cy = mpn_sublsh1_n (v2, v2, vinf, twor);
+#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
+ cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
#else
/* Overwrite unused vm1 */
cy = mpn_lshift (vm1, vinf, twor, 1);
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/generic/toom_interpolate_6pts.c
--- a/mpn/generic/toom_interpolate_6pts.c Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/generic/toom_interpolate_6pts.c Fri Mar 11 22:33:39 2011 +0100
@@ -167,11 +167,11 @@
MPN_INCR_U (pp + 3 * n + 1, n, cy);
/* W2 -= W0<<2 */
-#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n
-#if HAVE_NATIVE_mpn_sublsh2_n
- cy = mpn_sublsh2_n(w2, w2, w0, w0n);
+#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+ cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
#else
- cy = mpn_sublsh_n(w2, w2, w0, w0n, 2);
+ cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
#endif
#else
/* {W4,2*n+1} is now free and can be overwritten. */
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/generic/toom_interpolate_8pts.c
--- a/mpn/generic/toom_interpolate_8pts.c Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/generic/toom_interpolate_8pts.c Fri Mar 11 22:33:39 2011 +0100
@@ -54,8 +54,8 @@
#endif
#endif
-#if HAVE_NATIVE_mpn_sublsh2_n
-#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n(dst,dst,src,n)
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
#else
#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
#endif
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/x86/atom/aorslshC_n.asm
--- a/mpn/x86/atom/aorslshC_n.asm Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/x86/atom/aorslshC_n.asm Fri Mar 11 22:33:39 2011 +0100
@@ -21,11 +21,11 @@
include(`../config.m4')
-C mp_limb_t mpn_ip1_addlshC_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
-C mp_limb_t mpn_ip1_addlshC_nc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_limb_t carry);
-C mp_limb_t mpn_ip1_sublshC_n (mp_ptr dst, mp_srcptr src, mp_size_t size,);
-C mp_limb_t mpn_ip1_sublshC_nc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C mp_signed_limb_t borrow);
defframe(PARAM_CORB, 16)
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/x86/atom/sublsh1_n.asm
--- a/mpn/x86/atom/sublsh1_n.asm Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/x86/atom/sublsh1_n.asm Fri Mar 11 22:33:39 2011 +0100
@@ -19,5 +19,5 @@
include(`../config.m4')
-MULFUNC_PROLOGUE(mpn_sublsh1_n)
+MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1)
include_mpn(`x86/k7/sublsh1_n.asm')
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/x86/atom/sublsh2_n.asm
--- a/mpn/x86/atom/sublsh2_n.asm Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/x86/atom/sublsh2_n.asm Fri Mar 11 22:33:39 2011 +0100
@@ -29,18 +29,18 @@
define(M4_opp, subl)
define(M4_function, mpn_addlsh2_n)
define(M4_function_c, mpn_addlsh2_nc)
- define(M4_ip_function_c, mpn_ip1_addlsh2_nc)
- define(M4_ip_function, mpn_ip1_addlsh2_n)
+ define(M4_ip_function_c, mpn_addlsh2_nc_ip1)
+ define(M4_ip_function, mpn_addlsh2_n_ip1)
',`ifdef(`OPERATION_sublsh2_n', `
define(M4_inst, sbbl)
define(M4_opp, addl)
define(M4_function, mpn_sublsh2_n)
define(M4_function_c, mpn_sublsh2_nc)
- define(M4_ip_function_c, mpn_ip1_sublsh2_nc)
- define(M4_ip_function, mpn_ip1_sublsh2_n)
+ define(M4_ip_function_c, mpn_sublsh2_nc_ip1)
+ define(M4_ip_function, mpn_sublsh2_n_ip1)
',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n
')')')
-MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_ip1_sublsh2_n mpn_ip1_sublsh2_nc)
+MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1)
include_mpn(`x86/atom/aorslshC_n.asm')
diff -r 747f8faeb304 -r 2dc198f6c8d1 mpn/x86/k7/sublsh1_n.asm
--- a/mpn/x86/k7/sublsh1_n.asm Thu Mar 10 22:31:09 2011 +0100
+++ b/mpn/x86/k7/sublsh1_n.asm Fri Mar 11 22:33:39 2011 +0100
@@ -1,4 +1,4 @@
-dnl AMD K7 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+dnl AMD K7 mpn_sublsh1_n_ip1 -- rp[] = rp[] - (up[] << 1)
dnl Copyright 2011 Free Software Foundation, Inc.
@@ -42,211 +42,109 @@
C AMD K8
C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32
-C processors. It uses 2*3-way unrolling, for good reasons. Unfortunately,
-C that means we need an initial magic multiply.
+C processors. It uses 2*4-way unrolling, for good reasons.
C
C Breaking carry recurrency might be a good idea. We would then need separate
C registers for the shift carry and add/subtract carry, which in turn would
C force is to 2*2-way unrolling.
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_DBLD, 12)
+defframe(PARAM_SIZE, 12)
defframe(PARAM_SRC, 8)
defframe(PARAM_DST, 4)
dnl re-use parameter space
define(VAR_COUNT,`PARAM_SIZE')
-define(VAR_TMP,`PARAM_DBLD')
define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_VP,`PARAM_DST')
+define(SAVE_EBP,`PARAM_DST')
ASM_START()
TEXT
ALIGN(8)
-PROLOGUE(mpn_sublsh1_n)
+PROLOGUE(mpn_sublsh1_n_ip1)
deflit(`FRAME',0)
define(`rp', `%edi')
define(`up', `%esi')
-define(`vp', `%ebp')
+ movl PARAM_SIZE, %eax C size
push up FRAME_pushl()
+ push rp FRAME_pushl()
+ xorl %edx, %edx
movl PARAM_SRC, up
- push rp FRAME_pushl()
movl PARAM_DST, rp
movl %ebx, SAVE_EBX
- movl PARAM_SIZE, %ebx C size
- movl vp, SAVE_VP
- movl PARAM_DBLD, vp
- cmp up, rp
- je L(inplace)
+ movl %eax, %ebx
+ shr $3, %eax
- mov $0x2aaaaaab, %eax
- mull %ebx
-
- not %edx C count = -(size\6)-1
- mov %edx, VAR_COUNT
-
- leal 3(%edx,%edx,2), %ecx C count*3+3 = -(size\6)*3
- xorl %edx, %edx
More information about the gmp-commit
mailing list