[Gmp-commit] /var/hg/gmp: mpn/generic/mul_fft.c: Remove an unused branch and ...
mercurial at gmplib.org
mercurial at gmplib.org
Sun Mar 6 15:40:58 CET 2022
details: /var/hg/gmp/rev/c4f5f7fe466b
changeset: 18320:c4f5f7fe466b
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Sun Mar 06 15:38:39 2022 +0100
description:
mpn/generic/mul_fft.c: Remove an unused branch and slightly improve carry propagation
diffstat:
mpn/generic/mul_fft.c | 58 +++++++++++++++++++++++++++++---------------------
1 files changed, 34 insertions(+), 24 deletions(-)
diffs (160 lines):
diff -r 6ff25532f2a4 -r c4f5f7fe466b mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c Fri Mar 04 10:03:38 2022 +0100
+++ b/mpn/generic/mul_fft.c Sun Mar 06 15:38:39 2022 +0100
@@ -237,14 +237,14 @@
r[n] = 0;
/* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
- cc++;
- mpn_incr_u (r, cc);
+ ++cc;
+ MPN_INCR_U (r, n + 1, cc);
- rd++;
+ ++rd;
/* rd might overflow when sh=GMP_NUMB_BITS-1 */
- cc = (rd == 0) ? 1 : rd;
+ cc = rd + (rd == 0);
r = r + m + (rd == 0);
- mpn_incr_u (r, cc);
+ MPN_INCR_U (r, n + 1 - m - (rd == 0), cc);
}
else
{
@@ -284,7 +284,10 @@
r[n] = -mpn_sub_1 (r + m, r + m, n - m, cc);
r[n] -= mpn_sub_1 (r + m, r + m, n - m, rd);
if (r[n] & GMP_LIMB_HIGHBIT)
- r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
+ {
+ r[n] = 0;
+ MPN_INCR_U (r, n + 1, CNST_LIMB(1));
+ }
}
}
@@ -560,7 +563,9 @@
*/
tp[0] += cc;
}
- a[n] = mpn_sub_n (a, tp, tpn, n) && mpn_add_1 (a, a, n, CNST_LIMB(1));
+ cc = mpn_sub_n (a, tp, tpn, n);
+ a[n] = 0;
+ MPN_INCR_U (a, n + 1, cc);
}
}
TMP_FREE;
@@ -657,8 +662,7 @@
}
/* remains to subtract {ap+n, l} from {rp, n+1} */
- cc = mpn_sub_n (rp, rp, ap + n, l);
- rpn -= mpn_sub_1 (rp + l, rp + l, n - l, cc);
+ rpn -= mpn_sub (rp, rp, n, ap + n, l);
if (rpn < 0) /* necessarily rpn = -1 */
rpn = mpn_add_1 (rp, rp, n, CNST_LIMB(1));
return rpn;
@@ -684,12 +688,18 @@
if (nl > Kl) /* normalize {n, nl} mod 2^(Kl*GMP_NUMB_BITS)+1 */
{
mp_size_t dif = nl - Kl;
- mp_limb_signed_t cy;
tmp = TMP_BALLOC_LIMBS(Kl + 1);
+ tmp[Kl] = 0;
- if (dif > Kl)
+#if ! WANT_OLD_FFT_FULL
+ ASSERT_ALWAYS (dif <= Kl);
+#else
+ /* The comment "We must have nl <= 2*K*l." says that
+ ((dif = nl - Kl) > Kl) should never happen. */
+ if (UNLIKELY (dif > Kl))
{
+ mp_limb_signed_t cy;
int subp = 0;
cy = mpn_sub_n (tmp, n, n + Kl, Kl);
@@ -713,16 +723,20 @@
else
cy -= mpn_add (tmp, tmp, Kl, n, dif);
if (cy >= 0)
- cy = mpn_add_1 (tmp, tmp, Kl, cy);
+ MPN_INCR_U (tmp, Kl + 1, cy);
else
- cy = mpn_sub_1 (tmp, tmp, Kl, -cy);
+ {
+ tmp[Kl] = 1;
+ MPN_DECR_U (tmp, Kl + 1, -cy - 1);
+ }
}
else /* dif <= Kl, i.e. nl <= 2 * Kl */
+#endif
{
+ mp_limb_t cy;
cy = mpn_sub (tmp, n, Kl, n + Kl, dif);
- cy = mpn_add_1 (tmp, tmp, Kl, cy);
+ MPN_INCR_U (tmp, Kl + 1, cy);
}
- tmp[Kl] = cy;
nl = Kl + 1;
n = tmp;
}
@@ -755,7 +769,7 @@
static mp_limb_t
mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
- mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B,
+ mp_ptr *Ap, mp_ptr *Bp, mp_ptr unusedA, mp_ptr B,
mp_size_t nprime, mp_size_t l, mp_size_t Mp,
int **fft_l, mp_ptr T, int sqr)
{
@@ -797,9 +811,7 @@
j = (K - i) & (K - 1);
- if (mpn_add_n (n, n, Bp[j], nprime + 1))
- cc += mpn_add_1 (n + nprime + 1, n + nprime + 1,
- pla - sh - nprime - 1, CNST_LIMB(1));
+ cc += mpn_add (n, n, pla - sh, Bp[j], nprime + 1);
T[2 * l] = i + 1; /* T = (i + 1)*2^(2*M) */
if (mpn_cmp (Bp[j], T, nprime + 1) > 0)
{ /* subtract 2^N'+1 */
@@ -825,8 +837,7 @@
}
else
{
- cc = mpn_sub_1 (p + pla - pl, p + pla - pl, pl, cc);
- ASSERT (cc == 0);
+ MPN_DECR_U (p + pla - pl, pl, cc);
}
}
else
@@ -918,18 +929,17 @@
A = TMP_BALLOC_LIMBS (K * (nprime + 1));
Ap = TMP_BALLOC_MP_PTRS (K);
+ Bp = TMP_BALLOC_MP_PTRS (K);
mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
if (sqr)
{
mp_size_t pla;
pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
B = TMP_BALLOC_LIMBS (pla);
- Bp = TMP_BALLOC_MP_PTRS (K);
}
else
{
B = TMP_BALLOC_LIMBS (K * (nprime + 1));
- Bp = TMP_BALLOC_MP_PTRS (K);
mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
}
h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
@@ -1039,6 +1049,6 @@
ASSERT_MPN_ZERO_P (pad_op + pl - pl3, pl2 + pl3 - pl);
__GMP_FREE_FUNC_LIMBS (pad_op, pl2);
/* since the final result has at most pl limbs, no carry out below */
- mpn_add_1 (op + pl2, op + pl2, pl - pl2, (mp_limb_t) c2);
+ MPN_INCR_U (op + pl2, pl - pl2, (mp_limb_t) c2);
}
#endif
More information about the gmp-commit
mailing list