[Gmp-commit] /var/hg/gmp: 2 new changesets

Sat Oct 27 10:50:44 CEST 2012

details:   /var/hg/gmp/rev/a04b1abcb99d
changeset: 15088:a04b1abcb99d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Oct 27 10:27:52 2012 +0200
description:
Get remainder allocation right.

details:   /var/hg/gmp/rev/755243e9118f
changeset: 15089:755243e9118f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Oct 27 10:50:42 2012 +0200
description:
Trivial merge.

diffstat:

 ChangeLog             |   4 ++++
 mpn/generic/perfpow.c |  45 +++++++++++++++++++++++++++++++++++++++++++++
 mpn/generic/remove.c  |   2 +-
 3 files changed, 50 insertions(+), 1 deletions(-)

diffs (99 lines):

diff -r 7e1cea21638c -r 755243e9118f ChangeLog

--- a/ChangeLog	Thu Oct 25 21:49:32 2012 +0200
+++ b/ChangeLog	Sat Oct 27 10:50:42 2012 +0200
@@ -1,3 +1,7 @@
+2012-10-27  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/remove.c: Get remainder allocation right.
+
 2012-10-25  Torbjorn Granlund  <tege at gmplib.org>
 
 	* longlong.h: De-support old POWER asm syntax.
diff -r 7e1cea21638c -r 755243e9118f mpn/generic/perfpow.c
--- a/mpn/generic/perfpow.c	Thu Oct 25 21:49:32 2012 +0200
+++ b/mpn/generic/perfpow.c	Sat Oct 27 10:50:42 2012 +0200
@@ -32,6 +32,12 @@
        For s = 1, 2, 4, ..., s_max, compute the s least significant
        limbs of {xp,xn}^k. Stop if they don't match the s least
        significant limbs of {np,nn}.
+
+   FIXME: Low xn limbs can be expected to always match, if computed as
+   a mod B^{xn} root. So instead of using mpn_powlo, compute an
+   approximation of the most significant (normalized) limb of {xp,xn}
+   ^ k (and an error bound), and compare to {np, nn}. Or use an even
+   cruder approximation based on fix-point base 2 logarithm.
 */
 static int
 pow_equals (mp_srcptr np, mp_size_t nn,
@@ -105,6 +111,28 @@
    Algorithm:
        Apply Hensel lifting repeatedly, each time
        doubling (approx.) the number of known bits in rp.
+
+   Uses the iteration
+
+     r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
+
+   FIXME: I think we get *exactly* doubled precision in each
+   iteration. Could be simplified to do precision book-keeping in
+   limbs rather than bits.   
+   
+   FIXME: Rewrite iteration as
+
+     r' <-- r - k^{-1} r (r^k y - 1)
+
+   and take advantage of the zero low part of r^k y - 1.
+
+   If we prefer to get y^{1/k} rather than y^{-1/k}, we could also
+   switch to the iteration
+
+     r' <-- r - k^{-1} r (r^k y^{k-1} - 1)
+
+   which converges to y^{1/n - 1}, and we then get y^{1/n} by a single
+   mullo.
 */
 static void
 binv_root (mp_ptr rp, mp_srcptr yp,
@@ -146,6 +174,16 @@
 /*
    Computes rp such that rp^2 * yp = 1 (mod 2^{b+1}).
    Returns non-zero if such an integer rp exists.
+
+   Uses the iteration
+
+     r' <-- (3r - r^3 y) / 2
+
+   FIXME: Rewrite as
+
+     r' <-- r - r (r^2 y - 1) / 2
+
+   and take advantage of zero low part of r^2 y - 1.
 */
 static int
 binv_sqroot (mp_ptr rp, mp_srcptr yp,
@@ -274,6 +312,13 @@
   tp = TMP_ALLOC_LIMBS (5 * nn);	/* FIXME */
   MPN_ZERO (rp1, nn);
 
+  /* FIXME: It seems the inverse in yp is needed only to get
+     non-inverted roots. I.e., is_kth_power computes n^{1/2} as
+     (n^{-1})^{-1/2} and similarly for nth roots. It should be more
+     efficient to compute n^{1/2} as n * n^{-1/2}, with a mullo
+     instead of a binvert. And we can do something similar for kth
+     roots if we switch to an iteration converging to n^{1/k - 1}, and
+     we can then eliminate this binvert call. */
   mpn_binvert (yp, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
   if (b % GMP_LIMB_BITS)
     yp[(b - 1) / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
diff -r 7e1cea21638c -r 755243e9118f mpn/generic/remove.c
--- a/mpn/generic/remove.c	Thu Oct 25 21:49:32 2012 +0200
+++ b/mpn/generic/remove.c	Sat Oct 27 10:50:42 2012 +0200
@@ -86,7 +86,7 @@
 
   TMP_MARK;
 
-  tp = TMP_ALLOC_LIMBS ((un + vn) / 2); /* remainder */
+  tp = TMP_ALLOC_LIMBS ((un + 1 + vn) / 2); /* remainder */
   qp = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
   qp2 = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
   np = TMP_ALLOC_LIMBS (un + LOG);	/* powers of V */