[Gmp-commit] /var/hg/gmp: New implementation of mpn_gcd_22, with less branches.
mercurial at gmplib.org
mercurial at gmplib.org
Fri Aug 16 22:15:14 UTC 2019
details: /var/hg/gmp/rev/6938e1d9fdf1
changeset: 17821:6938e1d9fdf1
user: Niels M?ller <nisse at lysator.liu.se>
date: Sat Aug 17 00:14:58 2019 +0200
description:
New implementation of mpn_gcd_22, with less branches.
diffstat:
ChangeLog | 5 ++
mpn/generic/gcd_22.c | 108 +++++++++++++++++++++++++++++++++++++-------------
2 files changed, 84 insertions(+), 29 deletions(-)
diffs (138 lines):
diff -r a905169c7cdb -r 6938e1d9fdf1 ChangeLog
--- a/ChangeLog Fri Aug 16 14:38:08 2019 +0200
+++ b/ChangeLog Sat Aug 17 00:14:58 2019 +0200
@@ -1,3 +1,8 @@
+2019-08-17 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/gcd_22.c (mpn_gcd_22): New implementation with less
+ branches.
+
2019-08-16 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/brootinv.c: Shorten computations, using even exponent.
diff -r a905169c7cdb -r 6938e1d9fdf1 mpn/generic/gcd_22.c
--- a/mpn/generic/gcd_22.c Fri Aug 16 14:38:08 2019 +0200
+++ b/mpn/generic/gcd_22.c Sat Aug 17 00:14:58 2019 +0200
@@ -39,43 +39,93 @@
mpn_gcd_22 (mp_limb_t u1, mp_limb_t u0, mp_limb_t v1, mp_limb_t v0)
{
mp_double_limb_t g;
- ASSERT (u0 & 1);
- ASSERT (v0 & 1);
+ ASSERT (u0 & v0 & 1);
+
+ /* Implicit least significant bit */
+ u0 = (u0 >> 1) | (u1 << (GMP_LIMB_BITS - 1));
+ u1 >>= 1;
- /* Check for u0 != v0 needed to ensure that argument to
- * count_trailing_zeros is non-zero. */
- while (u1 != v1 && u0 != v0)
+ v0 = (v0 >> 1) | (v1 << (GMP_LIMB_BITS - 1));
+ v1 >>= 1;
+
+ while (u1 || v1) /* u1 == 0 can happen at most twice per call */
{
- unsigned long int r;
- if (u1 > v1)
+ mp_limb_t vgtu, t1, t0;
+ sub_ddmmss (t1, t0, u1, u0, v1, v0);
+ vgtu = LIMB_HIGHBIT_TO_MASK(t1);
+
+ if (UNLIKELY (t0 == 0))
{
- sub_ddmmss (u1, u0, u1, u0, v1, v0);
- count_trailing_zeros (r, u0);
- u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
- u1 >>= r;
+ if (t1 == 0)
+ {
+ g.d1 = (u1 << 1) | (u0 >> (GMP_LIMB_BITS - 1));
+ g.d0 = (u0 << 1) | 1;
+ return g;
+ }
+ int c;
+ count_trailing_zeros (c, t1);
+
+ /* v1 = min (u1, v1) */
+ v1 += (vgtu & t1);
+ /* u0 = |u1 - v1| */
+ u0 = (t1 ^ vgtu) - vgtu;
+ ASSERT (c < GMP_LIMB_BITS - 1);
+ u0 >>= c + 1;
+ u1 = 0;
}
- else /* u1 < v1. */
+ else
{
- sub_ddmmss (v1, v0, v1, v0, u1, u0);
- count_trailing_zeros (r, v0);
- v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
- v1 >>= r;
+ int c;
+ count_trailing_zeros (c, t0);
+ c++;
+ /* V <-- min (U, V).
+
+ Assembly version should use cmov. Another alternative,
+ avoiding carry propagation, would be
+
+ v0 += vgtu & t0; v1 += vtgu & (u1 - v1);
+ */
+ add_ssaaaa (v1, v0, v1, v0, vgtu & t1, vgtu & t0);
+ /* U <-- |U - V|
+ No carry handling needed in this conditional negation,
+ since t0 != 0. */
+ u0 = (t0 ^ vgtu) - vgtu;
+ u1 = t1 ^ vgtu;
+ if (UNLIKELY (c == GMP_LIMB_BITS))
+ {
+ u0 = u1;
+ u1 = 0;
+ }
+ else
+ {
+ u0 = (u0 >> c) | (u1 << (GMP_LIMB_BITS - c));
+ u1 >>= c;
+ }
}
}
+ while ((v0 | u0) & GMP_LIMB_HIGHBIT)
+ { /* At most two iterations */
+ mp_limb_t vgtu, t0;
+ int c;
+ sub_ddmmss (vgtu, t0, 0, u0, 0, v0);
+ if (UNLIKELY (t0 == 0))
+ {
+ g.d1 = u0 >> (GMP_LIMB_BITS - 1);
+ g.d0 = (u0 << 1) | 1;
+ return g;
+ }
- /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
- if (u1 == v1 && u0 == v0)
- {
- g.d0 = u0; g.d1 = u1;
- return g;
+ /* v <-- min (u, v) */
+ v0 += (vgtu & t0);
+
+ /* u <-- |u - v| */
+ u0 = (t0 ^ vgtu) - vgtu;
+
+ count_trailing_zeros (c, t0);
+ u0 = (u0 >> 1) >> c;
}
- else {
- mp_limb_t t[2];
- t[0] = u0; t[1] = u1;
- v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
- g.d0 = mpn_gcd_1 (t, 1 + (u1 != 0), v0);
- g.d1 = 0;
- return g;
- }
+ g.d0 = mpn_gcd_11 ((u0 << 1) + 1, (v0 << 1) + 1);
+ g.d1 = 0;
+ return g;
}
More information about the gmp-commit
mailing list