[Gmp-commit] /var/hg/gmp: mpn_brootinv: Interface change, limbs rather than b...
mercurial at gmplib.org
mercurial at gmplib.org
Thu Nov 1 22:08:11 CET 2012
details: /var/hg/gmp/rev/48f07964d11a
changeset: 15104:48f07964d11a
user: Niels M?ller <nisse at lysator.liu.se>
date: Thu Nov 01 22:01:00 2012 +0100
description:
mpn_brootinv: Interface change, limbs rather than bits.
diffstat:
ChangeLog | 8 +++++
gmp-impl.h | 2 +-
mpn/generic/brootinv.c | 73 ++++++++++++++++++++++++++++++++++++++-----------
mpn/generic/perfpow.c | 2 +-
tests/mpn/t-brootinv.c | 2 +-
tune/speed.h | 2 +-
6 files changed, 68 insertions(+), 21 deletions(-)
diffs (179 lines):
diff -r e93da8a39685 -r 48f07964d11a ChangeLog
--- a/ChangeLog Thu Nov 01 21:22:24 2012 +0100
+++ b/ChangeLog Thu Nov 01 22:01:00 2012 +0100
@@ -1,5 +1,13 @@
2012-11-01 Niels Möller <nisse at lysator.liu.se>
+ * mpn/generic/brootinv.c (mpn_brootinv): Input size in limbs
+ rather than bits. Use single-precision iterations for the first
+ limb.
+ * mpn/generic/perfpow.c (is_kth_power): Update mpn_brootinv call.
+ * tests/mpn/t-brootinv.c (main): Likewise.
+ * tune/speed.h (SPEED_ROUTINE_MPN_BROOTINV): Likewise.
+ * gmp-impl.h (mpn_brootinv): Updated prototype.
+
* mpn/generic/hgcd2.c (mpn_hgcd2): Removed redundant loop exit
tests in the single-precision loop.
diff -r e93da8a39685 -r 48f07964d11a gmp-impl.h
--- a/gmp-impl.h Thu Nov 01 21:22:24 2012 +0100
+++ b/gmp-impl.h Thu Nov 01 22:01:00 2012 +0100
@@ -1636,7 +1636,7 @@
__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_brootinv __MPN(brootinv)
-__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_limb_t, mp_ptr);
+__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
#define mpn_bsqrt __MPN(bsqrt)
__GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
diff -r e93da8a39685 -r 48f07964d11a mpn/generic/brootinv.c
--- a/mpn/generic/brootinv.c Thu Nov 01 21:22:24 2012 +0100
+++ b/mpn/generic/brootinv.c Thu Nov 01 22:01:00 2012 +0100
@@ -22,7 +22,22 @@
#include "gmp.h"
#include "gmp-impl.h"
-/* Compute r such that r^k * y = 1 (mod 2^b).
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+ typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+ mp_limb_t r = 1;
+ mp_limb_t s = a;
+
+ for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+ if (e & 1)
+ r *= s;
+
+ return r;
+}
+
+/* Compute r such that r^k * y = 1 (mod B^n).
Iterates
r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
@@ -31,7 +46,8 @@
Works just for odd k. Else the Hensel lifting degenerates.
FIXME:
- (1) Simplify to do precision book-keeping in limbs rather than bits.
+
+ (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
(2) Rewrite iteration as
r' <-- r - k^{-1} r (r^k y - 1)
@@ -41,37 +57,60 @@
(4) Use a small table to get starting value.
- If we prefer to get y^{1/k} rather than y^{-1/k}, we could also switch to
- the iteration
+ Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
+*/
- r' <-- r - k^{-1} r (r^k y^{k-1} - 1)
-
- which converges to y^{1/n - 1}, and we then get y^{1/n} by a single mullo.
-*/
void
-mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_limb_t k, mp_ptr tp)
+mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
{
mp_ptr tp2, tp3;
- mp_limb_t kinv, k2;
- mp_size_t bn, order[GMP_LIMB_BITS + 1];
+ mp_limb_t kinv, k2, r0, y0;
+ mp_size_t order[GMP_LIMB_BITS + 1];
int i, d;
- ASSERT (bnb > 0);
+ ASSERT (bn > 0);
ASSERT ((k & 1) != 0);
- bn = 1 + (bnb - 1) / GMP_LIMB_BITS;
-
tp2 = tp + bn;
tp3 = tp + 2 * bn;
k2 = k + 1;
binvert_limb (kinv, k);
+ /* 4-bit initial approximation:
+
+ y%16 | 1 3 5 7 9 11 13 15,
+ k%4 +-----------------------------
+ 1 | 1 11 13 7 9 3 5 15
+ 3 | 1 3 5 7 9 11 13 15
+
+ */
+ y0 = yp[0];
+
+ r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & ~(k << 2) & 8); /* 4 bits */
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f)); /* 8 bits */
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0xffff)); /* 16 bits */
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+ {
+ unsigned prec = 32;
+ do
+ {
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
+ prec *= 2;
+ }
+ while (prec < GMP_NUMB_BITS);
+ }
+#endif
+
+ rp[0] = r0;
+ if (bn == 1)
+ return;
+
d = 0;
- for (; bnb != 1; bnb = (bnb + 1) >> 1)
- order[d++] = 1 + (bnb - 1) / GMP_LIMB_BITS;
+ for (; bn > 1; bn = (bn + 1) >> 1)
+ order[d++] = bn;
- rp[0] = 1;
for (i = d - 1; i >= 0; i--)
{
bn = order[i];
diff -r e93da8a39685 -r 48f07964d11a mpn/generic/perfpow.c
--- a/mpn/generic/perfpow.c Thu Nov 01 21:22:24 2012 +0100
+++ b/mpn/generic/perfpow.c Thu Nov 01 22:01:00 2012 +0100
@@ -142,8 +142,8 @@
else
{
b = 1 + (f - 1) / k;
- mpn_brootinv (rp, ip, b, k, tp);
rn = 1 + (b - 1) / GMP_LIMB_BITS;
+ mpn_brootinv (rp, ip, rn, k, tp);
if ((b % GMP_LIMB_BITS) != 0)
rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
MPN_NORMALIZE (rp, rn);
diff -r e93da8a39685 -r 48f07964d11a tests/mpn/t-brootinv.c
--- a/tests/mpn/t-brootinv.c Thu Nov 01 21:22:24 2012 +0100
+++ b/tests/mpn/t-brootinv.c Thu Nov 01 22:01:00 2012 +0100
@@ -81,7 +81,7 @@
else
k |= 1;
}
- mpn_brootinv (rp, ap, n * GMP_NUMB_BITS, k, scratch);
+ mpn_brootinv (rp, ap, n, k, scratch);
mpn_powlo (pp, rp, &k, 1, n, scratch);
mpn_mullo_n (app, ap, pp, n);
diff -r e93da8a39685 -r 48f07964d11a tune/speed.h
--- a/tune/speed.h Thu Nov 01 21:22:24 2012 +0100
+++ b/tune/speed.h Thu Nov 01 22:01:00 2012 +0100
@@ -2107,7 +2107,7 @@
speed_starttime (); \
i = s->reps; \
do \
- (*function) (wp, s->xp, s->size * GMP_NUMB_BITS, s->r, tp); \
+ (*function) (wp, s->xp, s->size, s->r, tp); \
while (--i != 0); \
t = speed_endtime (); \
\
More information about the gmp-commit
mailing list