[Gmp-commit] /var/hg/gmp: 3 new changesets

Mon Feb 28 15:11:25 CET 2011

details:   /var/hg/gmp/rev/1697a75ef805
changeset: 13927:1697a75ef805
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Mon Feb 28 14:20:48 2011 +0100
description:
Added testcase to exercise the nl == constant 0 special case in udiv_qrnnd_preinv3.

details:   /var/hg/gmp/rev/6b3ceb8e783b
changeset: 13928:6b3ceb8e783b
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Mon Feb 28 14:49:20 2011 +0100
description:
Optimized udiv_qrnnd_preinv3 and udiv_rnnd_preinv

details:   /var/hg/gmp/rev/00cf5f4b2e9f
changeset: 13929:00cf5f4b2e9f
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Mon Feb 28 15:10:48 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog              |  17 +++++++++++++
 gmp-impl.h             |  64 ++++++++++++++++++++++++++++---------------------
 mpn/generic/rootrem.c  |  28 +++++++++++----------
 tests/mpn/t-divrem_1.c |   5 +++
 4 files changed, 73 insertions(+), 41 deletions(-)

diffs (196 lines):

diff -r 87c2e4357e0e -r 00cf5f4b2e9f ChangeLog

--- a/ChangeLog	Sun Feb 27 23:25:20 2011 +0100
+++ b/ChangeLog	Mon Feb 28 15:10:48 2011 +0100
@@ -1,3 +1,18 @@
+2011-02-28  Niels Möller  <nisse at lysator.liu.se>
+
+	* gmp-impl.h (udiv_qrnnd_preinv3): Eliminated unpredictable branch
+	using masking logic. Further optimization of the nl == constant 0
+	case, similar to udiv_rnd_preinv.
+	(udiv_rnnd_preinv): Likewise.
+
+	* tests/mpn/t-divrem_1.c (check_data): Added testcase to exercise
+	the nl == constant 0 special case in udiv_qrnnd_preinv3.
+
+2011-02-28  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/rootrem.c (mpn_rootrem_internal): Delay O(log(U))
+	allocations until they are known to be needed.
+
 2011-02-27 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/x86/atom/sse2/mul_1.asm: New code.
@@ -252,6 +267,8 @@
 
 2011-01-31  Torbjorn Granlund  <tege at gmplib.org>
 
+	* config.guess: Recognise new Intel processors.
+
 	* config.guess: Support 'coreinhm' and 'coreisbr'.
 	* config.sub: Likewise.
 	* configure.in: Likewise.
diff -r 87c2e4357e0e -r 00cf5f4b2e9f gmp-impl.h
--- a/gmp-impl.h	Sun Feb 27 23:25:20 2011 +0100
+++ b/gmp-impl.h	Mon Feb 28 15:10:48 2011 +0100
@@ -2771,51 +2771,59 @@
 */
 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)				\
   do {									\
-    mp_limb_t _qh, _ql, _r;						\
+    mp_limb_t _qh, _ql, _r, _mask;					\
     umul_ppmm (_qh, _ql, (nh), (di));					\
     if (__builtin_constant_p (nl) && (nl) == 0)				\
-      _qh += (nh) + 1;							\
+      {									\
+	_qh += (nh) + 1;						\
+	_r = - _qh * (d);						\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_qh += _mask;							\
+	_r += _mask & (d);						\
+      }									\
     else								\
-      add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));			\
-    _r = (nl) - _qh * (d);						\
-    if (_r > _ql)	/* both > and >= should be OK */		\
       {									\
-	_r += (d);							\
-	_qh--;								\
-      }									\
-    if (UNLIKELY (_r >= (d)))						\
-      {									\
-	_r -= (d);							\
-	_qh++;								\
+	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+	_r = (nl) - _qh * (d);						\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_qh += _mask;							\
+	_r += _mask & (d);						\
+	if (UNLIKELY (_r >= (d)))					\
+	  {								\
+	    _r -= (d);							\
+	    _qh++;							\
+	  }								\
       }									\
     (r) = _r;								\
     (q) = _qh;								\
   } while (0)
 
-/* Unlike udiv_qrnnd_preinv, works also for nh == d.
-
-   FIXME: The special case for nl = constant 0 could be simplified
-   further, like in udiv_rnd_preinv below. Note that with nl = 0, the
-   case _r >= d can't happen. Also applies to udiv_qrnnd_preinv
-   above.
-
-   FIXME: Use mask for adjustment? */
+/* Dividing (NH, NL) by D, returning the remainder only. Unlike
+   udiv_qrnnd_preinv, works also for the case NH == D, where the
+   quotient doesn't quite fit in a single limb. */
 #define udiv_rnnd_preinv(r, nh, nl, d, di)				\
   do {									\
-    mp_limb_t _qh, _ql, _r;						\
+    mp_limb_t _qh, _ql, _r, _mask;					\
     umul_ppmm (_qh, _ql, (nh), (di));					\
     if (__builtin_constant_p (nl) && (nl) == 0)				\
-      _qh += (nh) + 1;							\
+      {									\
+	_r = ~(_qh + (nh)) * (d);					\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_r += _mask & (d);						\
+      }									\
     else								\
-      add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));			\
-    _r = (nl) - _qh * (d);						\
-    if (_r > _ql)	/* both > and >= should be OK */		\
-      _r += (d);							\
-    if (UNLIKELY (_r >= (d)))						\
-      _r -= (d);							\
+      {									\
+	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+	_r = (nl) - _qh * (d);						\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_r += _mask & (d);						\
+	if (UNLIKELY (_r >= (d)))					\
+	  _r -= (d);							\
+      }									\
     (r) = _r;								\
   } while (0)
 
+/* FIXME: Obsolete? Use udiv_rnnd_preinv(r, nh, 0, d, di) instead. */
 /* Compute r = nh*B mod d, where di is the inverse of d.  */
 #define udiv_rnd_preinv(r, nh, d, di)					\
   do {									\
diff -r 87c2e4357e0e -r 00cf5f4b2e9f mpn/generic/rootrem.c
--- a/mpn/generic/rootrem.c	Sun Feb 27 23:25:20 2011 +0100
+++ b/mpn/generic/rootrem.c	Mon Feb 28 15:10:48 2011 +0100
@@ -8,7 +8,7 @@
    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2002, 2005, 2009, 2010, 2011 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -134,16 +134,6 @@
 
   TMP_MARK;
 
-  /* qp and wp need enough space to store S'^k where S' is an approximate
-     root. Since S' can be as large as S+2, the worst case is when S=2 and
-     S'=4. But then since we know the number of bits of S in advance, S'
-     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
-     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
-     fits in un limbs, the number of extra limbs needed is bounded by
-     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
-#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
-  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
-					of R/(k*S^(k-1)), and S^k */
   if (remp == NULL)
     {
       rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
@@ -155,8 +145,7 @@
       rp = remp;
     }
   sp = rootp;
-  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
-					and temporary for mpn_pow_1 */
+
   count_leading_zeros (cnt, up[un - 1]);
   unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
   /* unb is the number of bits of the input U */
@@ -217,6 +206,19 @@
      Newton iteration will first compute sizes[ni-1] extra bits,
      then sizes[ni-2], ..., then sizes[0] = b. */
 
+  /* qp and wp need enough space to store S'^k where S' is an approximate
+     root. Since S' can be as large as S+2, the worst case is when S=2 and
+     S'=4. But then since we know the number of bits of S in advance, S'
+     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+     fits in un limbs, the number of extra limbs needed is bounded by
+     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
+					of R/(k*S^(k-1)), and S^k */
+  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+					and temporary for mpn_pow_1 */
+
   wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
   wn = 1;
   for (i = ni; i != 0; i--)
diff -r 87c2e4357e0e -r 00cf5f4b2e9f tests/mpn/t-divrem_1.c
--- a/tests/mpn/t-divrem_1.c	Sun Feb 27 23:25:20 2011 +0100
+++ b/tests/mpn/t-divrem_1.c	Mon Feb 28 15:10:48 2011 +0100
@@ -42,6 +42,11 @@
     { { 5 }, 1, 2, 0,
       { 2 }, 1},
 
+    /* Exercises the q update in the nl == constant 0 case of
+       udiv_qrnnd_preinv3. Test case copied from t-fat.c. */
+    { { 287 }, 1, 7, 1,
+      { 0, 41 }, 0 },
+
 #if GMP_NUMB_BITS == 32
     { { 0x3C }, 1, 0xF2, 1,
       { 0x3F789854, 0 }, 0x98 },