[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Oct 10 22:14:03 CEST 2011
details: /var/hg/gmp/rev/89a8e9d82995
changeset: 14300:89a8e9d82995
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Oct 10 21:26:58 2011 +0200
description:
Implemented subquadratic mpn_hgcd_appr.
details: /var/hg/gmp/rev/ea369b9e08c7
changeset: 14301:ea369b9e08c7
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Oct 10 21:32:28 2011 +0200
description:
Deleted debugging code.
diffstat:
ChangeLog | 14 +
mpn/generic/hgcd_appr.c | 476 +++++++++++++++++++++++++++++++++++------------
tests/mpn/t-hgcd_appr.c | 26 ++-
3 files changed, 389 insertions(+), 127 deletions(-)
diffs (truncated from 611 to 300 lines):
diff -r 124ff06551b5 -r ea369b9e08c7 ChangeLog
--- a/ChangeLog Mon Oct 10 21:18:05 2011 +0200
+++ b/ChangeLog Mon Oct 10 21:32:28 2011 +0200
@@ -1,3 +1,17 @@
+2011-10-10 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/hgcd_appr.c: Deleted debugging code.
+
+ * tests/mpn/t-hgcd_appr.c (main): Added -v flag.
+ (hgcd_appr_valid_p): Increased margin of non-minimality for
+ divide-and-conquer algorithm. Display bit counts only if
+ -v is used.
+
+ * mpn/generic/hgcd_appr.c (submul): New (static) function.
+ (hgcd_matrix_apply): New function.
+ (mpn_hgcd_appr_itch): Account for divide-and-conquer algorithm.
+ (mpn_hgcd_appr): Implemented divide-and-conquer.
+
2011-10-10 Torbjorn Granlund <tege at gmplib.org>
From Marco Trudel:
diff -r 124ff06551b5 -r ea369b9e08c7 mpn/generic/hgcd_appr.c
--- a/mpn/generic/hgcd_appr.c Mon Oct 10 21:18:05 2011 +0200
+++ b/mpn/generic/hgcd_appr.c Mon Oct 10 21:32:28 2011 +0200
@@ -21,20 +21,196 @@
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#include <stdio.h>
-
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#define TRACE 0
+/* Computes R -= A * B. Result must be non-negative. Normalized down
+ to size an, and resulting size is returned. */
+static mp_size_t
+submul (mp_ptr rp, mp_size_t rn,
+ mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+ mp_ptr tp;
+ TMP_DECL;
-/* Scratch need: Current Lehmer algorithm needs n limbs for
- hgcd_appr_step. */
+ ASSERT (bn > 0);
+ ASSERT (an >= bn);
+ ASSERT (rn >= an);
+ ASSERT (an + bn <= rn + 1);
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (an + bn);
+
+ mpn_mul (tp, ap, an, bp, bn);
+ if (an + bn > rn)
+ {
+ ASSERT (tp[rn] == 0);
+ bn--;
+ }
+ ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
+ TMP_FREE;
+
+ while (rn > an && (rp[rn-1] == 0))
+ rn--;
+
+ return rn;
+}
+
+/* Computes (a, b) <-- M^{-1} (a; b) */
+/* FIXME:
+ x Take scratch parameter, and figure out scratch need.
+
+ x Use some fallback for small M->n?
+*/
+static mp_size_t
+hgcd_matrix_apply (const struct hgcd_matrix *M,
+ mp_ptr ap, mp_ptr bp,
+ mp_size_t n)
+{
+ mp_size_t an, bn, un, vn, nn;
+ mp_size_t mn[2][2];
+ mp_size_t modn;
+ mp_ptr tp, sp, scratch;
+ mp_limb_t cy;
+ unsigned i, j;
+
+ TMP_DECL;
+
+ ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+ an = n;
+ MPN_NORMALIZE (ap, an);
+ bn = n;
+ MPN_NORMALIZE (bp, bn);
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ {
+ mp_size_t k;
+ k = M->n;
+ MPN_NORMALIZE (M->p[i][j], k);
+ mn[i][j] = k;
+ }
+
+ ASSERT (mn[0][0] > 0);
+ ASSERT (mn[1][1] > 0);
+ ASSERT ( (mn[0][1] | mn[1][0]) > 0);
+
+ TMP_MARK;
+
+ if (mn[0][1] == 0)
+ {
+ mp_size_t qn;
+
+ /* A unchanged, M = (1, 0; q, 1) */
+ ASSERT (mn[0][0] == 1);
+ ASSERT (M->p[0][0][0] == 1);
+ ASSERT (mn[1][1] == 1);
+ ASSERT (M->p[1][1][0] == 1);
+
+ /* Put B <-- B - q A */
+ nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
+ }
+ else if (mn[1][0] == 0)
+ {
+ /* B unchanged, M = (1, q; 0, 1) */
+ ASSERT (mn[0][0] == 1);
+ ASSERT (M->p[0][0][0] == 1);
+ ASSERT (mn[1][1] == 1);
+ ASSERT (M->p[1][1][0] == 1);
+
+ /* Put A <-- A - q * B */
+ nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
+ }
+ else
+ {
+ /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01.
+ B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */
+ un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
+ vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
+
+ nn = MAX (un, vn);
+ /* In the range of interest, mulmod_bnm1 should always beat mullo. */
+ modn = mpn_mulmod_bnm1_next_size (nn + 1);
+
+ scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));
+ tp = TMP_ALLOC_LIMBS (modn);
+ sp = TMP_ALLOC_LIMBS (modn);
+
+ ASSERT (n <= 2*modn);
+
+ if (n > modn)
+ {
+ cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
+ MPN_INCR_U (ap, modn, cy);
+
+ cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
+ MPN_INCR_U (bp, modn, cy);
+
+ n = modn;
+ }
+
+ mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
+ mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
+
+ /* FIXME: Handle the small n case in some better way. */
+ if (n + mn[1][1] < modn)
+ MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
+ if (n + mn[0][1] < modn)
+ MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
+
+ cy = mpn_sub_n (tp, tp, sp, modn);
+ MPN_DECR_U (tp, modn, cy);
+
+ ASSERT (mpn_zero_p (tp + nn, modn - nn));
+
+ mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
+ MPN_COPY (ap, tp, nn);
+ mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
+
+ if (n + mn[1][0] < modn)
+ MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
+ if (n + mn[0][0] < modn)
+ MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
+
+ cy = mpn_sub_n (tp, tp, sp, modn);
+ MPN_DECR_U (tp, modn, cy);
+
+ ASSERT (mpn_zero_p (tp + nn, modn - nn));
+ MPN_COPY (bp, tp, nn);
+
+ while ( (ap[nn-1] | bp[nn-1]) == 0)
+ {
+ nn--;
+ ASSERT (nn > 0);
+ }
+ }
+ TMP_FREE;
+
+ return nn;
+}
+
+/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
+ HGCD_THRESHOLD at the end? */
mp_size_t
mpn_hgcd_appr_itch (mp_size_t n)
{
- return n;
+ if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+ return n;
+ else
+ {
+ unsigned k;
+ int count;
+ mp_size_t nscaled;
+
+ /* Get the recursion depth. */
+ nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
+ count_leading_zeros (count, nscaled);
+ k = GMP_LIMB_BITS - count;
+
+ return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+ }
}
/* Destroys inputs. */
@@ -42,10 +218,8 @@
mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
struct hgcd_matrix *M, mp_ptr tp)
{
- unsigned extra_bits;
mp_size_t s;
mp_limb_t mask;
- int count;
int success = 0;
ASSERT (n > 0);
@@ -57,104 +231,18 @@
/* Implies s = n. A fairly uninteresting case but exercised by the
random inputs of the testsuite. */
return 0;
-
+
+ ASSERT ((n+1)/2 - 1 < M->alloc);
+
/* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
we discard some of the least significant limbs, we must keep one
additional bit to account for the truncation error. We maintain
the GMP_NUMB_BITS * s - extra_bits as the current target size. */
s = n/2 + 1;
- extra_bits = 0;
-
-#if TRACE
- fprintf (stderr, "hgcd_appr: In: n = %u, s = %u\n",
- (unsigned) n, (unsigned) s);
-#endif
- while (n > 2)
+ if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
{
- mp_size_t nn;
-
-#if TRACE
- fprintf (stderr, "loop: n = %u\n", (unsigned) n);
-#endif
- ASSERT (n > s);
- ASSERT (n <= 2*s);
-
- nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
- if (!nn)
- break;
-
- n = nn;
- success = 1;
-
- /* We can truncate and discard the lower p bits whenever nbits <=
- 2*sbits - p. To account for the truncation error, we must
- adjust
-
- sbits <-- sbits + 1 - p,
-
- rather than just sbits <-- sbits - p. This adjustment makes
- the produced matrix sligthly smaller than it could be. */
-
- if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
- {
- mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
-
-#if TRACE
- fprintf (stderr, "before: n = %u, s = %u, e = %u, p = %u\n",
- (unsigned) n, (unsigned) s, (unsigned) extra_bits,
- (unsigned) p);
-#endif
- if (extra_bits == 0)
- {
- /* We cross a limb boundary and bump s. We can't do that
- if the result is that it makes makes min(U, V)
- smaller than 2^{GMP_NUMB_BITS} s. */
- if (s + 1 == n
More information about the gmp-commit
mailing list