[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Aug 16 12:38:24 UTC 2019
details: /var/hg/gmp/rev/042ea5bf2ed1
changeset: 17818:042ea5bf2ed1
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Aug 16 14:28:42 2019 +0200
description:
mpn/generic/brootinv.c: Shorten computations, using even exponent.
details: /var/hg/gmp/rev/d863c8b31626
changeset: 17819:d863c8b31626
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Aug 16 14:29:24 2019 +0200
description:
mpn/generic/powlo.c: Avoid copies with a flipflop.
details: /var/hg/gmp/rev/a905169c7cdb
changeset: 17820:a905169c7cdb
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Aug 16 14:38:08 2019 +0200
description:
ChangeLog
diffstat:
ChangeLog | 5 +++
mpn/generic/brootinv.c | 76 ++++++++++++++++++++++++++++----------------
mpn/generic/powlo.c | 83 +++++++++++++++++++++++++++++--------------------
3 files changed, 102 insertions(+), 62 deletions(-)
diffs (truncated from 306 to 300 lines):
diff -r 31d5fb37995b -r a905169c7cdb ChangeLog
--- a/ChangeLog Fri Aug 16 08:21:42 2019 +0200
+++ b/ChangeLog Fri Aug 16 14:38:08 2019 +0200
@@ -1,3 +1,8 @@
+2019-08-16 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/generic/brootinv.c: Shorten computations, using even exponent.
+ * mpn/generic/powlo.c: Avoid copies with a flipflop.
+
2019-08-16 Niels Möller <nisse at lysator.liu.se>
Speed support for gcd_22. Calls mpn_gcd_22(al, al, bl, bl), so
diff -r 31d5fb37995b -r a905169c7cdb mpn/generic/brootinv.c
--- a/mpn/generic/brootinv.c Fri Aug 16 08:21:42 2019 +0200
+++ b/mpn/generic/brootinv.c Fri Aug 16 14:38:08 2019 +0200
@@ -2,7 +2,7 @@
Contributed to the GNU project by Martin Boij (as part of perfpow.c).
-Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -32,16 +32,21 @@
#include "gmp-impl.h"
-/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+/* Computes a^2e (mod B). Uses right-to-left binary algorithm, since
typical use will have e small. */
static mp_limb_t
-powlimb (mp_limb_t a, mp_limb_t e)
+powsquaredlimb (mp_limb_t a, mp_limb_t e)
{
mp_limb_t r;
- for (r = 1; e > 0; e >>= 1, a *= a)
+ r = 1;
+ /* if (LIKELY (e != 0)) */
+ do {
+ a *= a;
if (e & 1)
r *= a;
+ e >>= 1;
+ } while (e != 0);
return r;
}
@@ -66,7 +71,9 @@
(4) Use a small table to get starting value.
- Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
+ Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo
+ Currently mpn_powlo requires 3*bn
+ so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS).
*/
void
@@ -75,36 +82,36 @@
mp_ptr tp2, tp3;
mp_limb_t kinv, k2, r0, y0;
mp_size_t order[GMP_LIMB_BITS + 1];
- int i, d;
+ int d;
ASSERT (bn > 0);
ASSERT ((k & 1) != 0);
tp2 = tp + bn;
- tp3 = tp + 2 * bn;
- k2 = k + 1;
+ tp3 = tp + bn + ((bn + 3) >> 1);
+ k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */
binvert_limb (kinv, k);
/* 4-bit initial approximation:
y%16 | 1 3 5 7 9 11 13 15,
- k%4 +-------------------------+k2%4
- 1 | 1 11 13 7 9 3 5 15 | 2
+ k%4 +-------------------------+k2%2
+ 1 | 1 11 13 7 9 3 5 15 | 1
3 | 1 3 5 7 9 11 13 15 | 0
*/
y0 = yp[0];
- r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 2) & 8); /* 4 bits */
- r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f)); /* 8 bits */
- r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7fff)); /* 16 bits */
+ r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8); /* 4 bits */
+ r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f)); /* 8 bits */
+ r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff)); /* 16 bits */
#if GMP_NUMB_BITS > 16
{
unsigned prec = 16;
do
{
- r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
+ r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2));
prec *= 2;
}
while (prec < GMP_NUMB_BITS);
@@ -115,25 +122,38 @@
if (bn == 1)
return;
- /* This initialization doesn't matter for the result (any garbage is
- cancelled in the iteration), but proper initialization makes
- valgrind happier. */
- MPN_ZERO (rp+1, bn-1);
-
d = 0;
- for (; bn > 1; bn = (bn + 1) >> 1)
+ for (; bn != 2; bn = (bn + 1) >> 1)
order[d++] = bn;
- for (i = d - 1; i >= 0; i--)
+ order[d] = 2;
+ bn = 1;
+
+ do
{
- bn = order[i];
+ mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */
+ tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1);
- mpn_mul_1 (tp, rp, bn, k2);
+ bn = order[d];
+
+ mpn_powlo (rp, tp, &k2, 1, bn, tp3);
+ mpn_mullo_n (tp, yp, rp, bn);
- mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
- mpn_mullo_n (rp, yp, tp2, bn);
-
- mpn_sub_n (tp2, tp, rp, bn);
- mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
+ /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */
+ /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/
+ {
+ mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */
+ int borrow;
+ borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0;
+ if (bn > pbn) /* 3 < bn */
+ {
+ if (borrow)
+ mpn_com (tp + pbn, tp + pbn, bn - pbn);
+ else
+ mpn_neg (tp + pbn, tp + pbn, bn - pbn);
+ }
+ }
+ mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0);
}
+ while (--d >= 0);
}
diff -r 31d5fb37995b -r a905169c7cdb mpn/generic/powlo.c
--- a/mpn/generic/powlo.c Fri Aug 16 08:21:42 2019 +0200
+++ b/mpn/generic/powlo.c Fri Aug 16 14:38:08 2019 +0200
@@ -1,6 +1,6 @@
/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
-Copyright 2007-2009, 2012, 2015, 2016 Free Software Foundation, Inc.
+Copyright 2007-2009, 2012, 2015, 2016, 2018 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -66,9 +66,9 @@
unsigned k;
static mp_bitcnt_t x[] = {7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
ASSERT (eb > 1);
- for (k = 0; eb > x[k]; ++k)
+ for (k = 0; eb > x[k++];)
;
- return k + 1;
+ return k;
}
/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
@@ -85,8 +85,9 @@
mp_bitcnt_t ebi;
unsigned windowsize, this_windowsize;
mp_limb_t expbits;
- mp_limb_t *pp, *this_pp, *last_pp;
+ mp_limb_t *pp;
long i;
+ int flipflop;
TMP_DECL;
ASSERT (en > 1 || (en == 1 && ep[0] > 1));
@@ -96,41 +97,56 @@
MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
windowsize = win_size (ebi);
- ASSERT (windowsize < ebi);
+ if (windowsize > 1)
+ {
+ mp_limb_t *this_pp, *last_pp;
+ ASSERT (windowsize < ebi);
- pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)));
+ pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)));
- this_pp = pp;
+ this_pp = pp;
- MPN_COPY (this_pp, bp, n);
+ MPN_COPY (this_pp, bp, n);
+
+ /* Store b^2 in tp. */
+ mpn_sqrlo (tp, bp, n);
- /* Store b^2 in tp. */
- mpn_sqrlo (tp, bp, n);
+ /* Precompute odd powers of b and put them in the temporary area at pp. */
+ i = (1 << (windowsize - 1)) - 1;
+ do
+ {
+ last_pp = this_pp;
+ this_pp += n;
+ mpn_mullo_n (this_pp, last_pp, tp, n);
+ } while (--i != 0);
+
+ expbits = getbits (ep, ebi, windowsize);
- /* Precompute odd powers of b and put them in the temporary area at pp. */
- for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
+ /* THINK: Should we initialise the case expbits % 4 == 0 with a mullo? */
+ count_trailing_zeros (cnt, expbits);
+ ebi -= windowsize;
+ ebi += cnt;
+ expbits >>= cnt;
+
+ MPN_COPY (rp, pp + n * (expbits >> 1), n);
+ }
+ else
{
- last_pp = this_pp;
- this_pp += n;
- mpn_mullo_n (this_pp, last_pp, tp, n);
+ pp = tp + n;
+ MPN_COPY (pp, bp, n);
+ MPN_COPY (rp, bp, n);
+ --ebi;
}
- expbits = getbits (ep, ebi, windowsize);
-
- /* FIXME: for even expbits, we can init with a mullo. */
- count_trailing_zeros (cnt, expbits);
- ebi -= windowsize;
- ebi += cnt;
- expbits >>= cnt;
-
- MPN_COPY (rp, pp + n * (expbits >> 1), n);
+ flipflop = 0;
do
{
while (getbit (ep, ebi) == 0)
{
mpn_sqrlo (tp, rp, n);
- MPN_COPY (rp, tp, n);
+ MP_PTR_SWAP (rp, tp);
+ flipflop = ! flipflop;
if (--ebi == 0)
goto done;
}
@@ -139,14 +155,8 @@
bits <= windowsize, and such that the least significant bit is 1. */
expbits = getbits (ep, ebi, windowsize);
- this_windowsize = windowsize;
- if (ebi < windowsize)
- {
- this_windowsize -= windowsize - ebi;
- ebi = 0;
- }
- else
- ebi -= windowsize;
+ this_windowsize = MIN (windowsize, ebi);
+ ebi -= this_windowsize;
count_trailing_zeros (cnt, expbits);
this_windowsize -= cnt;
@@ -163,11 +173,16 @@
if (this_windowsize != 0)
mpn_sqrlo (tp, rp, n);
else
- MPN_COPY (tp, rp, n);
+ {
+ MP_PTR_SWAP (rp, tp);
+ flipflop = ! flipflop;
+ }
mpn_mullo_n (rp, tp, pp + n * (expbits >> 1), n);
} while (ebi != 0);
More information about the gmp-commit
mailing list