[Gmp-commit] /var/hg/gmp: 2 new changesets

Sat Mar 12 01:44:23 CET 2011

details:   /var/hg/gmp/rev/2b284d06d3c0
changeset: 14041:2b284d06d3c0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Mar 11 22:46:48 2011 +0100
description:
Clean up udiv_qrnnd_preinv macros.

details:   /var/hg/gmp/rev/8d1214489594
changeset: 14042:8d1214489594
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Mar 12 01:44:21 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog         |    5 +-
 configure.in      |   12 +++
 gmp-impl.h        |  180 +++++++++++++++++++++++------------------------------
 mpn/asm-defs.m4   |   12 +++
 tests/devel/try.c |   84 ++++++++++++++++++++++---
 tune/common.c     |   21 ++++++
 tune/speed.c      |    9 ++
 tune/speed.h      |    3 +
 8 files changed, 213 insertions(+), 113 deletions(-)

diffs (truncated from 492 to 300 lines):

diff -r 2dc198f6c8d1 -r 8d1214489594 ChangeLog

--- a/ChangeLog	Fri Mar 11 22:33:39 2011 +0100
+++ b/ChangeLog	Sat Mar 12 01:44:21 2011 +0100
@@ -1,6 +1,6 @@
 2011-03-11 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
-	* gmp-impl.h (mpn_sublsh1_n_ip1): Declare.
+	* gmp-impl.h: Declare many mpn_{sub,add}lsh*_n_ip[12] functions/macros.
 	* mpn/generic/toom_interpolate_5pts.c: Use mpn_sublsh1_n_ip1.
 
 	* tests/devel/try.c: Tests for {add,sub}lsh*_n_ip[12].
@@ -14,6 +14,9 @@
 	* mpn/x86/k7/sublsh1_n.asm: Replaced generic sublsh1 code with faster _ip1.
 	* mpn/x86/atom/sublsh1_n.asm: Changed PROLOGUE accordingly.
 
+	* configure.in: Define HAVE_NATIVE_mpn_addlsh*_n*_ip[12].
+	* mpn/asm-defs.m4: Declare mpn_addlsh*_n*_ip[12].
+
 2011-03-10  Marc Glisse  <marc.glisse at inria.fr>
 
 	* tests/cxx/t-istream.cc: Explicit conversion to streampos.
diff -r 2dc198f6c8d1 -r 8d1214489594 configure.in
--- a/configure.in	Fri Mar 11 22:33:39 2011 +0100
+++ b/configure.in	Sat Mar 12 01:44:21 2011 +0100
@@ -3003,6 +3003,18 @@
 #undef HAVE_NATIVE_mpn_addlsh1_nc
 #undef HAVE_NATIVE_mpn_addlsh2_nc
 #undef HAVE_NATIVE_mpn_addlsh_nc
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip2
 #undef HAVE_NATIVE_mpn_addmul_1c
 #undef HAVE_NATIVE_mpn_addmul_2
 #undef HAVE_NATIVE_mpn_addmul_3
diff -r 2dc198f6c8d1 -r 8d1214489594 gmp-impl.h
--- a/gmp-impl.h	Fri Mar 11 22:33:39 2011 +0100
+++ b/gmp-impl.h	Sat Mar 12 01:44:21 2011 +0100
@@ -797,25 +797,67 @@
 __GMP_DECLSPEC mp_limb_t mpn_addmul_2s __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 
 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
-   returns the carry out (0, 1 or 2).  */
+   returns the carry out (0, 1 or 2). Use _ip1 when a=c. */
 #define mpn_addlsh1_n __MPN(addlsh1_n)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 #define mpn_addlsh1_nc __MPN(addlsh1_nc)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+#if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define mpn_addlsh1_n_ip1(dst,src,n) mpn_addlsh1_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_addlsh1_n_ip1 1
+#else
+#define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#define mpn_addlsh1_nc_ip1(dst,src,n,c) mpn_addlsh1_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_addlsh1_nc_ip1 1
+#else
+#define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+#endif
 
 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
-   returns the carry out (0, ..., 4).  */
+   returns the carry out (0, ..., 4). Use _ip1 when a=c. */
 #define mpn_addlsh2_n __MPN(addlsh2_n)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 #define mpn_addlsh2_nc __MPN(addlsh2_nc)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+#if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
+#define mpn_addlsh2_n_ip1(dst,src,n) mpn_addlsh2_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_addlsh2_n_ip1 1
+#else
+#define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#define mpn_addlsh2_nc_ip1(dst,src,n,c) mpn_addlsh2_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_addlsh2_nc_ip1 1
+#else
+#define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+#endif
 
 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
-   returns the carry out (0, ..., 2^k).  */
+   returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
 #define mpn_addlsh_n __MPN(addlsh_n)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
 #define mpn_addlsh_nc __MPN(addlsh_nc)
 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
+#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh_n_ip1
+#define mpn_addlsh_n_ip1(dst,src,n,s) mpn_addlsh_n(dst,dst,src,n,s)
+#define HAVE_NATIVE_mpn_addlsh_n_ip1 1
+#else
+#define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
+  __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+#endif
+#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh_nc_ip1
+#define mpn_addlsh_nc_ip1(dst,src,n,s,c) mpn_addlsh_nc(dst,dst,src,n,s,c)
+#define HAVE_NATIVE_mpn_addlsh_nc_ip1 1
+#else
+#define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
+#endif
 
 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
    returns the borrow out (0, 1 or 2). Use _ip1 when a=c. */
@@ -867,9 +909,23 @@
 #endif
 
 /* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and
-   returns the carry out (0, ..., 2^k).  */
+   returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
 #define mpn_sublsh_n __MPN(sublsh_n)
 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh_n_ip1
+#define mpn_sublsh_n_ip1(dst,src,n,s) mpn_sublsh_n(dst,dst,src,n,s)
+#define HAVE_NATIVE_mpn_sublsh_n_ip1 1
+#else
+#define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+#endif
+#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh_nc_ip1
+#define mpn_sublsh_nc_ip1(dst,src,n,s,c) mpn_sublsh_nc(dst,dst,src,n,s,c)
+#define HAVE_NATIVE_mpn_sublsh_nc_ip1 1
+#else
+#define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t));
+#endif
 
 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
    returns the carry out (-1, ..., 3).  */
@@ -2700,108 +2756,28 @@
   } while (0)
 
 
-#ifndef udiv_qrnnd_preinv
-#define udiv_qrnnd_preinv udiv_qrnnd_preinv3
-#endif
-
-/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
-   limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS).
-   If this would yield overflow, DI should be the largest possible number
-   (i.e., only ones).  For correct operation, the most significant bit of D
-   has to be set.  Put the quotient in Q and the remainder in R.  */
-#define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)				\
-  do {									\
-    mp_limb_t _q, _ql, _r;						\
-    mp_limb_t _xh, _xl;							\
-    ASSERT ((d) != 0);							\
-    umul_ppmm (_q, _ql, (nh), (di));					\
-    _q += (nh);	/* Compensate, di is 2**GMP_LIMB_BITS too small */	\
-    umul_ppmm (_xh, _xl, _q, (d));					\
-    sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);				\
-    if (_xh != 0)							\
-      {									\
-	sub_ddmmss (_xh, _r, _xh, _r, 0, (d));				\
-	_q += 1;							\
-	if (_xh != 0)							\
-	  {								\
-	    _r -= (d);							\
-	    _q += 1;							\
-	  }								\
-      }									\
-    if (_r >= (d))							\
-      {									\
-	_r -= (d);							\
-	_q += 1;							\
-      }									\
-    (r) = _r;								\
-    (q) = _q;								\
-  } while (0)
-
-/* Like udiv_qrnnd_preinv, but branch-free. */
-#define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)				\
-  do {									\
-    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;				\
-    mp_limb_t _xh, _xl;							\
-    _n2 = (nh);								\
-    _n10 = (nl);							\
-    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);				\
-    _nadj = _n10 + (_nmask & (d));					\
-    umul_ppmm (_xh, _xl, di, _n2 - _nmask);				\
-    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);			\
-    _q1 = ~_xh;								\
-    umul_ppmm (_xh, _xl, _q1, d);					\
-    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);				\
-    _xh -= (d);					/* xh = 0 or -1 */	\
-    (r) = _xl + ((d) & _xh);						\
-    (q) = _xh - _q1;							\
-  } while (0)
-
-/* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left
-   so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
-#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
-  do {									\
-    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;				\
-    mp_limb_t _xh, _xl;							\
-    _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1));	\
-    _n10 = (nl) << (GMP_LIMB_BITS - (lgup));				\
-    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);				\
-    _nadj = _n10 + (_nmask & (dnorm));					\
-    umul_ppmm (_xh, _xl, di, _n2 - _nmask);				\
-    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);			\
-    _q1 = ~_xh;								\
-    umul_ppmm (_xh, _xl, _q1, d);					\
-    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);				\
-    _xh -= (d);								\
-    (r) = _xl + ((d) & _xh);						\
-    (q) = _xh - _q1;							\
-  } while (0)
-
-/* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
-
+/* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund.
    We write things strangely below, to help gcc.  A more straightforward
    version:
-
-   _r = (nl) - _qh * (d);
-   _t = _r + (d);
-   if (_r >= _ql)
-     {
-       _qh--;
-       _r = _t;
-     }
-
+	_r = (nl) - _qh * (d);
+	_t = _r + (d);
+	if (_r >= _ql)
+	  {
+	    _qh--;
+	    _r = _t;
+	  }
    For one operation shorter critical path, one may want to use this form:
-
-   _p = _qh * (d)
-   _s = (nl) + (d);
-   _r = (nl) - _p;
-   _t = _s - _p;
-   if (_r >= _ql)
-     {
-       _qh--;
-       _r = _t;
-     }
+	_p = _qh * (d)
+	_s = (nl) + (d);
+	_r = (nl) - _p;
+	_t = _s - _p;
+	if (_r >= _ql)
+	  {
+	    _qh--;
+	    _r = _t;
+	  }
 */
-#define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)				\
+#define udiv_qrnnd_preinv(q, r, nh, nl, d, di)				\
   do {									\
     mp_limb_t _qh, _ql, _r, _mask;					\
     umul_ppmm (_qh, _ql, (nh), (di));					\
diff -r 2dc198f6c8d1 -r 8d1214489594 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4	Fri Mar 11 22:33:39 2011 +0100
+++ b/mpn/asm-defs.m4	Sat Mar 12 01:44:21 2011 +0100
@@ -1314,6 +1314,18 @@
 define_mpn(addlsh2_nc)
 define_mpn(addlsh_n)
 define_mpn(addlsh_nc)
+define_mpn(addlsh1_n_ip1)
+define_mpn(addlsh1_nc_ip1)
+define_mpn(addlsh2_n_ip1)
+define_mpn(addlsh2_nc_ip1)
+define_mpn(addlsh_n_ip1)
+define_mpn(addlsh_nc_ip1)
+define_mpn(addlsh1_n_ip2)
+define_mpn(addlsh1_nc_ip2)
+define_mpn(addlsh2_n_ip2)
+define_mpn(addlsh2_nc_ip2)
+define_mpn(addlsh_n_ip2)
+define_mpn(addlsh_nc_ip2)
 define_mpn(addmul_1)
 define_mpn(addmul_1c)
 define_mpn(addmul_2)
diff -r 2dc198f6c8d1 -r 8d1214489594 tests/devel/try.c
--- a/tests/devel/try.c	Fri Mar 11 22:33:39 2011 +0100
+++ b/tests/devel/try.c	Sat Mar 12 01:44:21 2011 +0100
@@ -908,7 +908,7 @@
   REFERENCE (refmpn_addlsh2_n_ip1);
 
   p = &param[TYPE_ADDLSH_N_IP1];
-  COPY (TYPE_ADDLSH_N_IP1);
+  COPY (TYPE_ADDLSH1_N_IP1);