[Gmp-commit] /var/hg/gmp: 11 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Jun 9 23:08:11 UTC 2015
details: /var/hg/gmp/rev/504ee88ff200
changeset: 16687:504ee88ff200
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue Jun 09 23:35:42 2015 +0200
description:
mpn/generic/rootrem.c: avoid division if result is 1.
details: /var/hg/gmp/rev/1c55c6695535
changeset: 16688:1c55c6695535
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue Jun 09 23:50:59 2015 +0200
description:
mpn/generic/rootrem.c: Shorten last iteration.
details: /var/hg/gmp/rev/c1b981e3d6b7
changeset: 16689:c1b981e3d6b7
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue Jun 09 23:57:29 2015 +0200
description:
mpn/generic/rootrem.c: Don't compute reminder if we know it's null.
details: /var/hg/gmp/rev/d1e6615b326a
changeset: 16690:d1e6615b326a
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 00:16:10 2015 +0200
description:
mpn/generic/rootrem.c: Hardwire first step, move it last.
details: /var/hg/gmp/rev/48cbff6d466d
changeset: 16691:48cbff6d466d
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 00:23:59 2015 +0200
description:
mpn/generic/rootrem.c: Hardwire second step, move it last.
details: /var/hg/gmp/rev/72f4970bc407
changeset: 16692:72f4970bc407
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 00:27:47 2015 +0200
description:
mpn/generic/rootrem.c: Hardcode third step, move it last.
details: /var/hg/gmp/rev/a97e01f7c3ce
changeset: 16693:a97e01f7c3ce
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 00:31:13 2015 +0200
description:
mpn/generic/rootrem.c: Hardcode sixth step, move it last.
details: /var/hg/gmp/rev/f4d21a1a55f6
changeset: 16694:f4d21a1a55f6
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 00:35:52 2015 +0200
description:
mpn/generic/rootrem.c: fuse step 4 and step 5.
details: /var/hg/gmp/rev/a79897f38d35
changeset: 16695:a79897f38d35
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 00:39:20 2015 +0200
description:
mpn/generic/rootrem.c: fuse step 4, 5, and 7.
details: /var/hg/gmp/rev/8c7fb6c38fd0
changeset: 16696:8c7fb6c38fd0
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 01:01:44 2015 +0200
description:
mpn/generic/rootrem.c: Don't divide if result will be discarded
details: /var/hg/gmp/rev/343444ab162a
changeset: 16697:343444ab162a
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Jun 10 01:03:22 2015 +0200
description:
mpn/generic/rootrem.c: Indent
diffstat:
mpn/generic/rootrem.c | 239 +++++++++++++++++++++++++++----------------------
1 files changed, 131 insertions(+), 108 deletions(-)
diffs (truncated from 321 to 300 lines):
diff -r a79ef80f3630 -r 343444ab162a mpn/generic/rootrem.c
--- a/mpn/generic/rootrem.c Tue Jun 09 17:16:08 2015 +0200
+++ b/mpn/generic/rootrem.c Wed Jun 10 01:03:22 2015 +0200
@@ -139,16 +139,14 @@
unsigned long b, kk;
unsigned long sizes[GMP_NUMB_BITS + 1];
int ni, i;
- int c;
+ int c, perf_pow;
int logk;
TMP_DECL;
MPN_SIZEINBASE_2EXP(unb, up, un, 1);
/* unb is the number of bits of the input U */
- xnb = (unb - 1) / k + 1; /* ceil (unb / k) */
- /* xnb is the number of bits of the root R */
- if (xnb == 1) /* root is 1 */
+ if (unb <= k) /* root is 1 */
{
if (remp == NULL)
un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
@@ -161,12 +159,17 @@
rootp[0] = 1;
return un;
}
+ /* if (unb - k <= k/2) // root is 2 */
+
+ xnb = (unb - 1) / k + 1; /* ceil (unb / k) */
+ /* xnb is the number of bits of the root R */
/* We initialize the algorithm with a 1-bit approximation to zero: since we
know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
kk = k * (xnb - 1); /* number of truncated bits in the input */
rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
+ --kk;
for (logk = 1; ((k - 1) >> logk) != 0; logk++)
;
@@ -190,7 +193,7 @@
b = sizes[ni] - 1; /* add just one bit at a time */
ni++;
} while (b != 0);
- sizes[ni] = 0;
+
ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
/* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
sizes[i] <= 2 * sizes[i+1].
@@ -219,15 +222,16 @@
sp = rootp;
MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
- mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since
+ mpn_sub_1 (rp, rp, rn, 2); /* subtract the initial approximation: since
the non-truncated part is less than 2^k, it
is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
- sp[0] = 1; /* initial approximation */
+ sp[0] = save = 2; /* initial approximation */
sn = 1; /* it has one limb */
- wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
+ wp[0] = k; /* k * {sp,sn}^(k-1) = 1 */
wn = 1;
i = ni;
+ b = bn = 1;
do
{
/* 1: loop invariant:
@@ -237,15 +241,103 @@
{wp, wn} = {sp, sn}^(k-1)
kk = number of truncated bits of the input
*/
- b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
- iteration */
+ /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
+
+ /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
+ if (UNLIKELY (rn < wn))
+ {
+ MPN_ZERO (sp, bn);
+ }
+ else
+ {
+ qn = rn - wn; /* expected quotient size */
+ if (qn <= bn) { /* Divide only if result is not too big. */
+ mpn_div_q (qp, rp, rn, wp, wn, scratch);
+ qn += qp[qn] != 0;
+ }
+
+ /* 5: current buffers: {sp,sn}, {qp,qn}.
+ Note: {rp,rn} is not needed any more since we'll compute it from
+ scratch at the end of the loop.
+ */
+
+ /* the quotient should be smaller than 2^b, since the previous
+ approximation was correctly rounded toward zero */
+ if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
+ qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
+ {
+ for (qn = 1; qn < bn; ++qn)
+ sp[qn - 1] = GMP_NUMB_MAX;
+ sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS-1 - ((b-1) % GMP_NUMB_BITS));
+ }
+ else
+ {
+ /* 7: current buffers: {sp,sn}, {qp,qn} */
+
+ /* Combine sB and q to form sB + q. */
+ MPN_COPY (sp, qp, qn);
+ MPN_ZERO (sp + qn, bn - qn);
+ }
+ }
+ sp[b / GMP_NUMB_BITS] |= save;
+
+ /* 8: current buffer: {sp,sn} */
+
+ if (--i == 0)
+ break;
+
+ /* Since each iteration treats b bits from the root and thus k*b bits
+ from the input, and we already considered b bits from the input,
+ we now have to take another (k-1)*b bits from the input. */
+ kk -= (k - 1) * b; /* remaining input bits */
+ /* {rp, rn} = floor({up, un} / 2^kk) */
+ MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
+ rn = un - kk / GMP_NUMB_BITS;
+ rn -= rp[rn - 1] == 0;
+
+ /* 9: current buffers: {sp,sn}, {rp,rn} */
+
+ for (c = 0;; c++)
+ {
+ /* Compute S^k in {qp,qn}. */
+ /* W <- S^(k-1) for the next iteration,
+ and S^k = W * S. */
+ wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
+ mpn_mul (qp, wp, wn, sp, sn);
+ qn = wn + sn;
+ qn -= qp[qn - 1] == 0;
+
+ perf_pow = 1;
+ /* if S^k > floor(U/2^kk), the root approximation was too large */
+ if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
+ MPN_DECR_U (sp, sn, 1);
+ else
+ break;
+ }
+
+ /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
+
+ ASSERT_ALWAYS (c <= 1);
+ ASSERT_ALWAYS (rn >= qn);
+
+ /* R = R - Q = floor(U/2^kk) - S^k */
+ if (perf_pow != 0)
+ {
+ mpn_sub (rp, rp, rn, qp, qn);
+ MPN_NORMALIZE (rp, rn);
+ }
+ else
+ {
+ rn = 1;
+ rp[0] = 0;
+ }
+ /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
/* Reinsert a low zero limb if we normalized away the entire remainder */
- if (rn == 0)
- {
- rp[0] = 0;
- rn = 1;
- }
+ rn += (rn == 0);
+
+ b = sizes[i - 1] - sizes[i]; /* number of bits to compute in the
+ next iteration */
/* first multiply the remainder by 2^b */
MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
@@ -292,41 +384,6 @@
wp[wn] = cy;
wn += cy != 0;
- /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
-
- /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
- if (rn < wn)
- {
- qn = 0;
- }
- else
- {
- qn = rn - wn; /* expected quotient size */
- mpn_div_q (qp, rp, rn, wp, wn, scratch);
- qn += qp[qn] != 0;
- }
-
- /* 5: current buffers: {sp,sn}, {qp,qn}.
- Note: {rp,rn} is not needed any more since we'll compute it from
- scratch at the end of the loop.
- */
-
- /* Number of limbs used by b bits, when least significant bit is
- aligned to least limb */
- bn = (b - 1) / GMP_NUMB_BITS + 1;
-
- /* the quotient should be smaller than 2^b, since the previous
- approximation was correctly rounded toward zero */
- if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
- qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
- {
- qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
- MPN_ZERO (qp, qn);
- qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
- MPN_DECR_U (qp, qn, 1);
- qn -= qp[qn - 1] == 0;
- }
-
/* 6: current buffers: {sp,sn}, {qp,qn} */
/* multiply the root approximation by 2^b */
@@ -338,74 +395,40 @@
sn++;
}
- /* 7: current buffers: {sp,sn}, {qp,qn} */
+ save = sp[b / GMP_NUMB_BITS];
- ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
- above, q is set to 2^b-1, which has
- exactly bn limbs */
+ /* Number of limbs used by b bits, when least significant bit is
+ aligned to least limb */
+ bn = (b - 1) / GMP_NUMB_BITS + 1;
- /* Combine sB and q to form sB + q. */
- save = sp[b / GMP_NUMB_BITS];
- MPN_COPY (sp, qp, qn);
- MPN_ZERO (sp + qn, bn - qn);
- sp[b / GMP_NUMB_BITS] |= save;
+ } while (1);
- /* 8: current buffer: {sp,sn} */
-
- /* Since each iteration treats b bits from the root and thus k*b bits
- from the input, and we already considered b bits from the input,
- we now have to take another (k-1)*b bits from the input. */
- kk -= (k - 1) * b; /* remaining input bits */
- /* {rp, rn} = floor({up, un} / 2^kk) */
- MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
- rn = un - kk / GMP_NUMB_BITS;
- rn -= rp[rn - 1] == 0;
-
- /* 9: current buffers: {sp,sn}, {rp,rn} */
-
- for (c = 0;; c++)
+ /* otherwise we have rn > 0, thus the return value is ok */
+ if (!approx || sp[0] <= CNST_LIMB (1))
+ {
+ for (c = 0;; c++)
{
/* Compute S^k in {qp,qn}. */
- if (i == 1)
- {
- /* Last iteration: we don't need W anymore. */
- /* mpn_pow_1 requires that both qp and wp have enough space to
- store the result {sp,sn}^k + 1 limb */
- approx = approx && (sp[0] > 1);
- qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
- }
- else
- {
- /* W <- S^(k-1) for the next iteration,
- and S^k = W * S. */
- wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
- mpn_mul (qp, wp, wn, sp, sn);
- qn = wn + sn;
- qn -= qp[qn - 1] == 0;
- }
+ /* Last iteration: we don't need W anymore. */
+ /* mpn_pow_1 requires that both qp and wp have enough
+ space to store the result {sp,sn}^k + 1 limb */
+ qn = mpn_pow_1 (qp, sp, sn, k, wp);
- /* if S^k > floor(U/2^kk), the root approximation was too large */
- if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
+ perf_pow = 1;
+ if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
MPN_DECR_U (sp, sn, 1);
else
break;
+ };
+
+ rn = perf_pow != 0;
+ if (rn != 0 && remp != NULL)
+ {
+ mpn_sub (remp, up, un, qp, qn);
+ rn = un;
+ MPN_NORMALIZE (remp, rn);
More information about the gmp-commit
mailing list