[Gmp-commit] /var/hg/gmp: Let mpn_hgcd_appr destroy its inputs.
mercurial at gmplib.org
mercurial at gmplib.org
Mon Oct 10 12:07:08 CEST 2011
details: /var/hg/gmp/rev/27913f466a23
changeset: 14296:27913f466a23
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Oct 10 12:06:39 2011 +0200
description:
Let mpn_hgcd_appr destroy its inputs.
diffstat:
ChangeLog | 11 +++++++++++
gmp-impl.h | 2 +-
mpn/generic/hgcd_appr.c | 40 +++++++++++++++++-----------------------
tests/mpn/t-hgcd_appr.c | 14 ++++++++++----
tune/common.c | 2 +-
tune/speed.h | 43 -------------------------------------------
6 files changed, 40 insertions(+), 72 deletions(-)
diffs (252 lines):
diff -r f3cf63f51722 -r 27913f466a23 ChangeLog
--- a/ChangeLog Sun Oct 09 22:49:28 2011 +0200
+++ b/ChangeLog Mon Oct 10 12:06:39 2011 +0200
@@ -1,3 +1,14 @@
+2011-10-10 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Interface change.
+ Destroy inputs, let caller make working copies if needed.
+ (mpn_hgcd_appr_itch): Reduced scratch need.
+ * gmp-impl.h: Updated mpn_hgcd_appr prototype.
+ * tests/mpn/t-hgcd_appr.c (one_test): Make working copies for
+ hgcd_appr.
+ * tune/common.c (speed_mpn_hgcd_appr): Use SPEED_ROUTINE_MPN_HGCD_CALL.
+ * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_APPR_CALL): Deleted.
+
2011-10-09 Torbjorn Granlund <tege at gmplib.org>
* mpn/s390_64/copyi.asm: New file.
diff -r f3cf63f51722 -r 27913f466a23 gmp-impl.h
--- a/gmp-impl.h Sun Oct 09 22:49:28 2011 +0200
+++ b/gmp-impl.h Mon Oct 10 12:06:39 2011 +0200
@@ -4016,7 +4016,7 @@
__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch __GMP_PROTO ((mp_size_t));
#define mpn_hgcd_appr __MPN (hgcd_appr)
-__GMP_DECLSPEC int mpn_hgcd_appr __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+__GMP_DECLSPEC int mpn_hgcd_appr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr));
diff -r f3cf63f51722 -r 27913f466a23 mpn/generic/hgcd_appr.c
--- a/mpn/generic/hgcd_appr.c Sun Oct 09 22:49:28 2011 +0200
+++ b/mpn/generic/hgcd_appr.c Mon Oct 10 12:06:39 2011 +0200
@@ -29,23 +29,23 @@
#define TRACE 0
-/* Scratch need: Copies of inputs, and n limbs for hgcd_appr_step,
- total 3*n limbs. */
+/* Scratch need: Current Lehmer algorithm needs n limbs for
+ hgcd_appr_step. */
mp_size_t
mpn_hgcd_appr_itch (mp_size_t n)
{
- return 3*n;
+ return n;
}
+/* Destroys inputs. */
int
-mpn_hgcd_appr (mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
struct hgcd_matrix *M, mp_ptr tp)
{
unsigned extra_bits;
mp_size_t s;
mp_limb_t mask;
int count;
- mp_ptr up, vp;
int success = 0;
ASSERT (n > 0);
@@ -66,12 +66,6 @@
s = n/2 + 1;
extra_bits = 0;
- /* FIXME: Could avoid copying now, and do it when applying the first
- hgcd2 matrix. */
- up = tp; vp = tp + n; tp = tp + 2*n;
- MPN_COPY (up, ap, n);
- MPN_COPY (vp, bp, n);
-
#if TRACE
fprintf (stderr, "hgcd_appr: In: n = %u, s = %u\n",
(unsigned) n, (unsigned) s);
@@ -86,7 +80,7 @@
ASSERT (n > s);
ASSERT (n <= 2*s);
- nn = mpn_hgcd_step (n, up, vp, s, M, tp);
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
if (!nn)
break;
@@ -117,8 +111,8 @@
if the result is that it makes makes min(U, V)
smaller than 2^{GMP_NUMB_BITS} s. */
if (s + 1 == n
- || mpn_zero_p (up + s + 1, n - s - 1)
- || mpn_zero_p (vp + s + 1, n - s - 1))
+ || mpn_zero_p (ap + s + 1, n - s - 1)
+ || mpn_zero_p (bp + s + 1, n - s - 1))
continue;
extra_bits = GMP_NUMB_BITS - 1;
@@ -130,7 +124,7 @@
}
/* Drop the p least significant limbs */
- up += p; vp += p; n -= p; s -= p;
+ ap += p; bp += p; n -= p; s -= p;
#if TRACE
fprintf (stderr, "after: n = %u, s = %u, e = %u\n",
@@ -148,17 +142,17 @@
if (extra_bits > 0)
{
/* We can get here only of we have dropped at least one of the
- least significant bits, so we can decrement up and vp. We can
+ least significant bits, so we can decrement ap and bp. We can
then shift left extra bits using mpn_shiftr. */
/* NOTE: In the unlikely case that n is large, it would be
preferable to do an initial subdiv step to reduce the size
- before shifting, but that would mean duplicating
+ before shifting, but that would mean daplicating
mpn_gcd_subdiv_step with a bit count rather than a limb
count. */
- up--; vp--;
- up[0] = mpn_rshift (up+1, up+1, n, GMP_NUMB_BITS - extra_bits);
- vp[0] = mpn_rshift (vp+1, vp+1, n, GMP_NUMB_BITS - extra_bits);
- n += (up[n] | vp[n]) > 0;
+ ap--; bp--;
+ ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
+ bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
+ n += (ap[n] | bp[n]) > 0;
ASSERT (success);
@@ -172,7 +166,7 @@
#if TRACE
fprintf (stderr, "extra loop: n = %u\n", (unsigned) n);
#endif
- nn = mpn_hgcd_step (n, up, vp, s, M, tp);
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
#if TRACE
fprintf (stderr, "extra bit reduction: %s\n",
nn ? "success" : "fail");
@@ -190,7 +184,7 @@
struct hgcd_matrix1 M1;
ASSERT (s == 1);
- if (mpn_hgcd2 (up[1], up[0], vp[1], vp[0], &M1))
+ if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
{
#if TRACE
fprintf (stderr, "final hgcd2 succeeded\n");
diff -r f3cf63f51722 -r 27913f466a23 tests/mpn/t-hgcd_appr.c
--- a/tests/mpn/t-hgcd_appr.c Sun Oct 09 22:49:28 2011 +0200
+++ b/tests/mpn/t-hgcd_appr.c Mon Oct 10 12:06:39 2011 +0200
@@ -157,6 +157,8 @@
mpz_t ref_r0;
mpz_t ref_r1;
+ mpz_t hgcd_r0;
+ mpz_t hgcd_r1;
int res[2];
mp_size_t asize;
@@ -205,13 +207,15 @@
mpz_init_set (ref_r1, b);
res[0] = hgcd_ref (&ref, ref_r0, ref_r1);
+ mpz_init_set (hgcd_r0, a);
+ mpz_init_set (hgcd_r1, b);
if (bsize < asize)
{
- _mpz_realloc (b, asize);
- MPN_ZERO (b->_mp_d + bsize, asize - bsize);
+ _mpz_realloc (hgcd_r1, asize);
+ MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);
}
- res[1] = mpn_hgcd_appr (a->_mp_d,
- b->_mp_d,
+ res[1] = mpn_hgcd_appr (hgcd_r0->_mp_d,
+ hgcd_r1->_mp_d,
asize,
&hgcd, hgcd_tp);
@@ -264,6 +268,8 @@
hgcd_ref_clear (&ref);
mpz_clear (ref_r0);
mpz_clear (ref_r1);
+ mpz_clear (hgcd_r0);
+ mpz_clear (hgcd_r1);
return res[0];
}
diff -r f3cf63f51722 -r 27913f466a23 tune/common.c
--- a/tune/common.c Sun Oct 09 22:49:28 2011 +0200
+++ b/tune/common.c Mon Oct 10 12:06:39 2011 +0200
@@ -1516,7 +1516,7 @@
double
speed_mpn_hgcd_appr (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD_APPR_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
}
double
diff -r f3cf63f51722 -r 27913f466a23 tune/speed.h
--- a/tune/speed.h Sun Oct 09 22:49:28 2011 +0200
+++ b/tune/speed.h Mon Oct 10 12:06:39 2011 +0200
@@ -2680,49 +2680,6 @@
return t; \
}
-#define SPEED_ROUTINE_MPN_HGCD_APPR_CALL(func, itchfunc) \
- { \
- mp_size_t hgcd_init_itch, hgcd_itch; \
- mp_ptr wp, tmp1; \
- struct hgcd_matrix hgcd; \
- int res; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- if (s->size < 2) \
- return -1; \
- \
- TMP_MARK; \
- \
- s->xp[s->size - 1] |= 1; \
- s->yp[s->size - 1] |= 1; \
- \
- hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
- hgcd_itch = itchfunc (s->size); \
- \
- SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp); \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, wp, hgcd_itch); \
- speed_operand_dst (s, tmp1, hgcd_init_itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- { \
- mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
- res = func (s->xp, s->yp, s->size, &hgcd, wp); \
- } \
- while (--i != 0); \
- t = speed_endtime (); \
- TMP_FREE; \
- return t; \
- }
-
/* Run some GCDs of s->size limbs each. The number of different data values
is decreased as s->size**2, since GCD is a quadratic algorithm.
SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
More information about the gmp-commit
mailing list