[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Feb 28 15:11:25 CET 2011
details: /var/hg/gmp/rev/1697a75ef805
changeset: 13927:1697a75ef805
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 14:20:48 2011 +0100
description:
Added testcase to exercise the nl == constant 0 special case in udiv_qrnnd_preinv3.
details: /var/hg/gmp/rev/6b3ceb8e783b
changeset: 13928:6b3ceb8e783b
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 14:49:20 2011 +0100
description:
Optimized udiv_qrnnd_preinv3 and udiv_rnnd_preinv
details: /var/hg/gmp/rev/00cf5f4b2e9f
changeset: 13929:00cf5f4b2e9f
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 15:10:48 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 17 +++++++++++++
gmp-impl.h | 64 ++++++++++++++++++++++++++++---------------------
mpn/generic/rootrem.c | 28 +++++++++++----------
tests/mpn/t-divrem_1.c | 5 +++
4 files changed, 73 insertions(+), 41 deletions(-)
diffs (196 lines):
diff -r 87c2e4357e0e -r 00cf5f4b2e9f ChangeLog
--- a/ChangeLog Sun Feb 27 23:25:20 2011 +0100
+++ b/ChangeLog Mon Feb 28 15:10:48 2011 +0100
@@ -1,3 +1,18 @@
+2011-02-28 Niels Möller <nisse at lysator.liu.se>
+
+ * gmp-impl.h (udiv_qrnnd_preinv3): Eliminated unpredictable branch
+ using masking logic. Further optimization of the nl == constant 0
+ case, similar to udiv_rnd_preinv.
+ (udiv_rnnd_preinv): Likewise.
+
+ * tests/mpn/t-divrem_1.c (check_data): Added testcase to exercise
+ the nl == constant 0 special case in udiv_qrnnd_preinv3.
+
+2011-02-28 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/rootrem.c (mpn_rootrem_internal): Delay O(log(U))
+ allocations until they are known to be needed.
+
2011-02-27 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/x86/atom/sse2/mul_1.asm: New code.
@@ -252,6 +267,8 @@
2011-01-31 Torbjorn Granlund <tege at gmplib.org>
+ * config.guess: Recognise new Intel processors.
+
* config.guess: Support 'coreinhm' and 'coreisbr'.
* config.sub: Likewise.
* configure.in: Likewise.
diff -r 87c2e4357e0e -r 00cf5f4b2e9f gmp-impl.h
--- a/gmp-impl.h Sun Feb 27 23:25:20 2011 +0100
+++ b/gmp-impl.h Mon Feb 28 15:10:48 2011 +0100
@@ -2771,51 +2771,59 @@
*/
#define udiv_qrnnd_preinv3(q, r, nh, nl, d, di) \
do { \
- mp_limb_t _qh, _ql, _r; \
+ mp_limb_t _qh, _ql, _r, _mask; \
umul_ppmm (_qh, _ql, (nh), (di)); \
if (__builtin_constant_p (nl) && (nl) == 0) \
- _qh += (nh) + 1; \
+ { \
+ _qh += (nh) + 1; \
+ _r = - _qh * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _qh += _mask; \
+ _r += _mask & (d); \
+ } \
else \
- add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - _qh * (d); \
- if (_r > _ql) /* both > and >= should be OK */ \
{ \
- _r += (d); \
- _qh--; \
- } \
- if (UNLIKELY (_r >= (d))) \
- { \
- _r -= (d); \
- _qh++; \
+ add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
+ _r = (nl) - _qh * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _qh += _mask; \
+ _r += _mask & (d); \
+ if (UNLIKELY (_r >= (d))) \
+ { \
+ _r -= (d); \
+ _qh++; \
+ } \
} \
(r) = _r; \
(q) = _qh; \
} while (0)
-/* Unlike udiv_qrnnd_preinv, works also for nh == d.
-
- FIXME: The special case for nl = constant 0 could be simplified
- further, like in udiv_rnd_preinv below. Note that with nl = 0, the
- case _r >= d can't happen. Also applies to udiv_qrnnd_preinv
- above.
-
- FIXME: Use mask for adjustment? */
+/* Dividing (NH, NL) by D, returning the remainder only. Unlike
+ udiv_qrnnd_preinv, works also for the case NH == D, where the
+ quotient doesn't quite fit in a single limb. */
#define udiv_rnnd_preinv(r, nh, nl, d, di) \
do { \
- mp_limb_t _qh, _ql, _r; \
+ mp_limb_t _qh, _ql, _r, _mask; \
umul_ppmm (_qh, _ql, (nh), (di)); \
if (__builtin_constant_p (nl) && (nl) == 0) \
- _qh += (nh) + 1; \
+ { \
+ _r = ~(_qh + (nh)) * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _r += _mask & (d); \
+ } \
else \
- add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - _qh * (d); \
- if (_r > _ql) /* both > and >= should be OK */ \
- _r += (d); \
- if (UNLIKELY (_r >= (d))) \
- _r -= (d); \
+ { \
+ add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
+ _r = (nl) - _qh * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _r += _mask & (d); \
+ if (UNLIKELY (_r >= (d))) \
+ _r -= (d); \
+ } \
(r) = _r; \
} while (0)
+/* FIXME: Obsolete? Use udiv_rnnd_preinv(r, nh, 0, d, di) instead. */
/* Compute r = nh*B mod d, where di is the inverse of d. */
#define udiv_rnd_preinv(r, nh, d, di) \
do { \
diff -r 87c2e4357e0e -r 00cf5f4b2e9f mpn/generic/rootrem.c
--- a/mpn/generic/rootrem.c Sun Feb 27 23:25:20 2011 +0100
+++ b/mpn/generic/rootrem.c Mon Feb 28 15:10:48 2011 +0100
@@ -8,7 +8,7 @@
ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2002, 2005, 2009, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -134,16 +134,6 @@
TMP_MARK;
- /* qp and wp need enough space to store S'^k where S' is an approximate
- root. Since S' can be as large as S+2, the worst case is when S=2 and
- S'=4. But then since we know the number of bits of S in advance, S'
- can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
- So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
- fits in un limbs, the number of extra limbs needed is bounded by
- ceil(k*log2(3/2)/GMP_NUMB_BITS). */
-#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
- qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
- of R/(k*S^(k-1)), and S^k */
if (remp == NULL)
{
rp = TMP_ALLOC_LIMBS (un + 1); /* will contain the remainder */
@@ -155,8 +145,7 @@
rp = remp;
}
sp = rootp;
- wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
- and temporary for mpn_pow_1 */
+
count_leading_zeros (cnt, up[un - 1]);
unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
/* unb is the number of bits of the input U */
@@ -217,6 +206,19 @@
Newton iteration will first compute sizes[ni-1] extra bits,
then sizes[ni-2], ..., then sizes[0] = b. */
+ /* qp and wp need enough space to store S'^k where S' is an approximate
+ root. Since S' can be as large as S+2, the worst case is when S=2 and
+ S'=4. But then since we know the number of bits of S in advance, S'
+ can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+ So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+ fits in un limbs, the number of extra limbs needed is bounded by
+ ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+ qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
+ of R/(k*S^(k-1)), and S^k */
+ wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+ and temporary for mpn_pow_1 */
+
wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
wn = 1;
for (i = ni; i != 0; i--)
diff -r 87c2e4357e0e -r 00cf5f4b2e9f tests/mpn/t-divrem_1.c
--- a/tests/mpn/t-divrem_1.c Sun Feb 27 23:25:20 2011 +0100
+++ b/tests/mpn/t-divrem_1.c Mon Feb 28 15:10:48 2011 +0100
@@ -42,6 +42,11 @@
{ { 5 }, 1, 2, 0,
{ 2 }, 1},
+ /* Exercises the q update in the nl == constant 0 case of
+ udiv_qrnnd_preinv3. Test case copied from t-fat.c. */
+ { { 287 }, 1, 7, 1,
+ { 0, 41 }, 0 },
+
#if GMP_NUMB_BITS == 32
{ { 0x3C }, 1, 0xF2, 1,
{ 0x3F789854, 0 }, 0x98 },
More information about the gmp-commit
mailing list