[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Oct 7 13:40:40 CEST 2011
details: /var/hg/gmp/rev/5fe92e6f2726
changeset: 14274:5fe92e6f2726
user: Niels M?ller <nisse at lysator.liu.se>
date: Fri Oct 07 13:29:33 2011 +0200
description:
New mpn_hgcd_appr code.
details: /var/hg/gmp/rev/e06307af03ac
changeset: 14275:e06307af03ac
user: Niels M?ller <nisse at lysator.liu.se>
date: Fri Oct 07 13:36:07 2011 +0200
description:
mpn_hgcd_appr testing
details: /var/hg/gmp/rev/2352c23fca8a
changeset: 14276:2352c23fca8a
user: Niels M?ller <nisse at lysator.liu.se>
date: Fri Oct 07 13:39:40 2011 +0200
description:
speed support for mpn_hgcd_appr
diffstat:
ChangeLog | 24 +
configure.in | 5 +-
gmp-impl.h | 9 +
mpn/generic/hgcd.c | 99 +-------
mpn/generic/hgcd_appr.c | 174 ++++++++++++++
mpn/generic/hgcd_step.c | 119 +++++++++
tests/mpn/Makefile.am | 2 +-
tests/mpn/t-hgcd_appr.c | 578 ++++++++++++++++++++++++++++++++++++++++++++++++
tune/common.c | 6 +
tune/speed.c | 1 +
tune/speed.h | 44 +++
11 files changed, 964 insertions(+), 97 deletions(-)
diffs (truncated from 1183 to 300 lines):
diff -r 7f4d6a9de980 -r 2352c23fca8a ChangeLog
--- a/ChangeLog Fri Oct 07 10:21:08 2011 +0200
+++ b/ChangeLog Fri Oct 07 13:39:40 2011 +0200
@@ -1,3 +1,27 @@
+2011-10-07 Niels Möller <nisse at lysator.liu.se>
+
+ * tune/speed.h (speed_mpn_hgcd_appr): New prototype.
+ (SPEED_ROUTINE_MPN_HGCD_APPR_CALL): New macro.
+ * tune/common.c (speed_mpn_hgcd_appr): New function.
+ * tune/speed.c (routine): Added mpn_hgcd_appr.
+
+ * tests/mpn/t-hgcd_appr.c: New file.
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-hgcd_appr.
+
+ * configure.in (gmp_mpn_functions): Added hgcd_step and hgcd_appr.
+
+ * gmp-impl.h: Added prototypes for mpn_hgcd_step,
+ mpn_hgcd_appr_itch and mpn_hgcd_appr.
+
+ * mpn/generic/hgcd_appr.c: New file.
+
+ * mpn/generic/hgcd_step.c: New file, extracted from hgcd.c.
+ (mpn_hgcd_step): Renamed, from...
+ * mpn/generic/hgcd.c (hgcd_step): ...old name. Renamed and moved
+ to hgcd_step.c.
+ (hgcd_hook): Also moved to hgcd_step.c.
+ (mpn_hgcd): Updated for hgcd_step renaming.
+
2011-10-04 Torbjorn Granlund <tege at gmplib.org>
* mpn/s390_64/submul_1.asm: New file.
diff -r 7f4d6a9de980 -r 2352c23fca8a configure.in
--- a/configure.in Fri Oct 07 10:21:08 2011 +0200
+++ b/configure.in Fri Oct 07 13:39:40 2011 +0200
@@ -2550,9 +2550,10 @@
perfsqr perfpow \
gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step \
gcdext_lehmer \
- div_q tdiv_qr jacbase jacobi_2 jacobi get_d \
+ div_q tdiv_qr jacbase jacobi_2 jacobi get_d \
matrix22_mul matrix22_mul1_inverse_vector \
- hgcd_matrix hgcd2 hgcd hgcd2_jacobi hgcd_jacobi \
+ hgcd_matrix hgcd2 hgcd_step hgcd hgcd_appr \
+ hgcd2_jacobi hgcd_jacobi \
mullo_n mullo_basecase \
toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
toom33_mul toom43_mul toom53_mul toom63_mul \
diff -r 7f4d6a9de980 -r 2352c23fca8a gmp-impl.h
--- a/gmp-impl.h Fri Oct 07 10:21:08 2011 +0200
+++ b/gmp-impl.h Fri Oct 07 13:39:40 2011 +0200
@@ -4003,12 +4003,21 @@
#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+#define mpn_hgcd_step __MPN(hgcd_step)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_step __GMP_PROTO ((mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+
#define mpn_hgcd_itch __MPN (hgcd_itch)
__GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
#define mpn_hgcd __MPN (hgcd)
__GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch __GMP_PROTO ((mp_size_t));
+
+#define mpn_hgcd_appr __MPN (hgcd_appr)
+__GMP_DECLSPEC int mpn_hgcd_appr __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+
#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr));
diff -r 7f4d6a9de980 -r 2352c23fca8a mpn/generic/hgcd.c
--- a/mpn/generic/hgcd.c Fri Oct 07 10:21:08 2011 +0200
+++ b/mpn/generic/hgcd.c Fri Oct 07 13:39:40 2011 +0200
@@ -26,98 +26,6 @@
#include "longlong.h"
-static void
-hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
- mp_srcptr qp, mp_size_t qn, int d)
-{
- ASSERT (!gp);
- ASSERT (d >= 0);
- ASSERT (d <= 1);
-
- MPN_NORMALIZE (qp, qn);
- if (qn > 0)
- {
- struct hgcd_matrix *M = (struct hgcd_matrix *) p;
- /* NOTES: This is a bit ugly. A tp area is passed to
- gcd_subdiv_step, which stores q at the start of that area. We
- now use the rest. */
- mp_ptr tp = (mp_ptr) qp + qn;
- mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
- }
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
- division. Reduces the size by almost one limb or more, but never
- below the given size s. Return new size for a and b, or 0 if no
- more steps are possible.
-
- If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
- limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
- fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
- hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
- resulting size of M.
-
- If N is the input size to the calling hgcd, then s = floor(N/2) +
- 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
- < N, so N is sufficient.
-*/
-
-static mp_size_t
-hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
- struct hgcd_matrix *M, mp_ptr tp)
-{
- struct hgcd_matrix1 M1;
- mp_limb_t mask;
- mp_limb_t ah, al, bh, bl;
- mp_size_t an, bn, qn;
- int col;
-
- ASSERT (n > s);
-
- mask = ap[n-1] | bp[n-1];
- ASSERT (mask > 0);
-
- if (n == s + 1)
- {
- if (mask < 4)
- goto subtract;
-
- ah = ap[n-1]; al = ap[n-2];
- bh = bp[n-1]; bl = bp[n-2];
- }
- else if (mask & GMP_NUMB_HIGHBIT)
- {
- ah = ap[n-1]; al = ap[n-2];
- bh = bp[n-1]; bl = bp[n-2];
- }
- else
- {
- int shift;
-
- count_leading_zeros (shift, mask);
- ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
- al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
- bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
- bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
- }
-
- /* Try an mpn_hgcd2 step */
- if (mpn_hgcd2 (ah, al, bh, bl, &M1))
- {
- /* Multiply M <- M * M1 */
- mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-
- /* Can't swap inputs, so we need to copy. */
- MPN_COPY (tp, ap, n);
- /* Multiply M1^{-1} (a;b) */
- return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
- }
-
- subtract:
-
- return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
-}
-
/* Size analysis for hgcd:
For the recursive calls, we have n1 <= ceil(n / 2). Then the
@@ -191,12 +99,15 @@
n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
success = 1;
}
+
+ /* NOTE: It apppears this loop never runs more than once. */
while (n > n2)
{
/* Needs n + 1 storage */
- nn = hgcd_step (n, ap, bp, s, M, tp);
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
if (!nn)
return success ? n : 0;
+
n = nn;
success = 1;
}
@@ -248,7 +159,7 @@
for (;;)
{
/* Needs s+3 < n */
- nn = hgcd_step (n, ap, bp, s, M, tp);
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
if (!nn)
return success ? n : 0;
diff -r 7f4d6a9de980 -r 2352c23fca8a mpn/generic/hgcd_appr.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/hgcd_appr.c Fri Oct 07 13:39:40 2011 +0200
@@ -0,0 +1,174 @@
+/* hgcd_appr.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define TRACE 0
+
+#define MPN_BITCOUNT(bits, n, high) do { \
+ int __cnt; \
+ ASSERT ((n) > 0); \
+ count_leading_zeros (__cnt, (high)); \
+ (bits) = (n) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
+ } while (0)
+
+/* Scratch need: Copies of inputs, and n limbs for hgcd_appr_step,
+ total 3*n limbs. */
+mp_size_t
+mpn_hgcd_appr_itch (mp_size_t n)
+{
+ return 3*n;
+}
+
+int
+mpn_hgcd_appr (mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+ struct hgcd_matrix *M, mp_ptr tp)
+{
+ mp_bitcnt_t nbits;
+ unsigned extra_bits;
+ mp_size_t s;
+ mp_limb_t mask;
+ int count;
+ mp_ptr up, vp;
+ int success = 0;
+
+ ASSERT (n > 0);
+ mask = ap[n-1] | bp[n-1];
+
+ ASSERT (mask > 0);
+
+ MPN_BITCOUNT (nbits, n, mask);
+
+ /* We aim for reduction of to size sbits, where sbits = nbits/2 + 1
+ = GMP_NUMB_BITS * s - extra_bits. We keep track of extra_bits
+ (and implicitly sbits) to decide when to discard the least
+ significant limbs, but we can't actually reduce to size less then
+ s limbs, since that may give matrix entries exceeding n-s limb
+ and overflow the current allocation for the matrix entries. */
+
+ s = n/2 + 1;
+ extra_bits = GMP_NUMB_BITS * s - (nbits/2 + 1);
+
+ ASSERT (extra_bits >= GMP_NUMB_BITS/2 - 1);
+ ASSERT (extra_bits < 3*GMP_NUMB_BITS/2);
+
+ /* FIXME: Could avoid copying now, and do it when applying the first
+ hgcd2 matrix. */
+ up = tp; vp = tp + n; tp = tp + 2*n;
+ MPN_COPY (up, ap, n);
+ MPN_COPY (vp, bp, n);
+
+#if TRACE
+ fprintf (stderr, "hgcd_appr: In: n = %u, s = %u, nbits = %u, extra = %u\n",
+ (unsigned) n, (unsigned) s, (unsigned) nbits, (unsigned) extra_bits);
+#endif
+ while (n > 2)
+ {
+ mp_size_t nn;
+
+#if TRACE
+ fprintf (stderr, "loop: n = %u\n", (unsigned) n);
+#endif
+ ASSERT (n > s);
More information about the gmp-commit
mailing list