[Gmp-commit] /var/hg/gmp: Let mpn_hgcd_appr destroy its inputs.

Mon Oct 10 12:07:08 CEST 2011

details:   /var/hg/gmp/rev/27913f466a23
changeset: 14296:27913f466a23
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Mon Oct 10 12:06:39 2011 +0200
description:
Let mpn_hgcd_appr destroy its inputs.

diffstat:

 ChangeLog               |  11 +++++++++++
 gmp-impl.h              |   2 +-
 mpn/generic/hgcd_appr.c |  40 +++++++++++++++++-----------------------
 tests/mpn/t-hgcd_appr.c |  14 ++++++++++----
 tune/common.c           |   2 +-
 tune/speed.h            |  43 -------------------------------------------
 6 files changed, 40 insertions(+), 72 deletions(-)

diffs (252 lines):

diff -r f3cf63f51722 -r 27913f466a23 ChangeLog

--- a/ChangeLog	Sun Oct 09 22:49:28 2011 +0200
+++ b/ChangeLog	Mon Oct 10 12:06:39 2011 +0200
@@ -1,3 +1,14 @@
+2011-10-10  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Interface change.
+	Destroy inputs, let caller make working copies if needed.
+	(mpn_hgcd_appr_itch): Reduced scratch need.
+	* gmp-impl.h: Updated mpn_hgcd_appr prototype.
+	* tests/mpn/t-hgcd_appr.c (one_test): Make working copies for
+	hgcd_appr.
+	* tune/common.c (speed_mpn_hgcd_appr): Use SPEED_ROUTINE_MPN_HGCD_CALL.
+	* tune/speed.h (SPEED_ROUTINE_MPN_HGCD_APPR_CALL): Deleted.
+
 2011-10-09  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/s390_64/copyi.asm: New file.
diff -r f3cf63f51722 -r 27913f466a23 gmp-impl.h
--- a/gmp-impl.h	Sun Oct 09 22:49:28 2011 +0200
+++ b/gmp-impl.h	Mon Oct 10 12:06:39 2011 +0200
@@ -4016,7 +4016,7 @@
 __GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch __GMP_PROTO ((mp_size_t));
 
 #define mpn_hgcd_appr __MPN (hgcd_appr)
-__GMP_DECLSPEC int mpn_hgcd_appr __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+__GMP_DECLSPEC int mpn_hgcd_appr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
 
 #define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
 __GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr));
diff -r f3cf63f51722 -r 27913f466a23 mpn/generic/hgcd_appr.c
--- a/mpn/generic/hgcd_appr.c	Sun Oct 09 22:49:28 2011 +0200
+++ b/mpn/generic/hgcd_appr.c	Mon Oct 10 12:06:39 2011 +0200
@@ -29,23 +29,23 @@
 
 #define TRACE 0
 
-/* Scratch need: Copies of inputs, and n limbs for hgcd_appr_step,
-   total 3*n limbs. */
+/* Scratch need: Current Lehmer algorithm needs n limbs for
+   hgcd_appr_step. */
 mp_size_t
 mpn_hgcd_appr_itch (mp_size_t n)
 {
-  return 3*n;
+  return n;
 }
 
+/* Destroys inputs. */
 int
-mpn_hgcd_appr (mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
 	       struct hgcd_matrix *M, mp_ptr tp)
 {
   unsigned extra_bits;
   mp_size_t s;
   mp_limb_t mask;
   int count;
-  mp_ptr up, vp;
   int success = 0;
 
   ASSERT (n > 0);
@@ -66,12 +66,6 @@
   s = n/2 + 1;
   extra_bits = 0;
 
-  /* FIXME: Could avoid copying now, and do it when applying the first
-     hgcd2 matrix. */
-  up = tp; vp = tp + n; tp = tp + 2*n;
-  MPN_COPY (up, ap, n);
-  MPN_COPY (vp, bp, n);
-
 #if TRACE
   fprintf (stderr, "hgcd_appr: In: n = %u, s = %u\n",
 	   (unsigned) n, (unsigned) s);
@@ -86,7 +80,7 @@
       ASSERT (n > s);
       ASSERT (n <= 2*s);
 
-      nn = mpn_hgcd_step (n, up, vp, s, M, tp);
+      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
       if (!nn)
 	break;
 
@@ -117,8 +111,8 @@
 		 if the result is that it makes makes min(U, V)
 		 smaller than 2^{GMP_NUMB_BITS} s. */
 	      if (s + 1 == n
-		  || mpn_zero_p (up + s + 1, n - s - 1)
-		  || mpn_zero_p (vp + s + 1, n - s - 1))
+		  || mpn_zero_p (ap + s + 1, n - s - 1)
+		  || mpn_zero_p (bp + s + 1, n - s - 1))
 		continue;
 
 	      extra_bits = GMP_NUMB_BITS - 1;
@@ -130,7 +124,7 @@
 	    }
 
 	  /* Drop the p least significant limbs */
-	  up += p; vp += p; n -= p; s -= p;
+	  ap += p; bp += p; n -= p; s -= p;
 
 #if TRACE
 	  fprintf (stderr, "after: n = %u, s = %u, e = %u\n",
@@ -148,17 +142,17 @@
   if (extra_bits > 0)
     {
       /* We can get here only of we have dropped at least one of the
-	 least significant bits, so we can decrement up and vp. We can
+	 least significant bits, so we can decrement ap and bp. We can
 	 then shift left extra bits using mpn_shiftr. */
       /* NOTE: In the unlikely case that n is large, it would be
 	 preferable to do an initial subdiv step to reduce the size
-	 before shifting, but that would mean duplicating
+	 before shifting, but that would mean daplicating
 	 mpn_gcd_subdiv_step with a bit count rather than a limb
 	 count. */
-      up--; vp--;
-      up[0] = mpn_rshift (up+1, up+1, n, GMP_NUMB_BITS - extra_bits);
-      vp[0] = mpn_rshift (vp+1, vp+1, n, GMP_NUMB_BITS - extra_bits);
-      n += (up[n] | vp[n]) > 0;
+      ap--; bp--;
+      ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
+      bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
+      n += (ap[n] | bp[n]) > 0;
 
       ASSERT (success);
 
@@ -172,7 +166,7 @@
 #if TRACE
       fprintf (stderr, "extra loop: n = %u\n", (unsigned) n);
 #endif
-	  nn = mpn_hgcd_step (n, up, vp, s, M, tp);
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
 #if TRACE
 	  fprintf (stderr, "extra bit reduction: %s\n",
 		   nn ? "success" : "fail");
@@ -190,7 +184,7 @@
       struct hgcd_matrix1 M1;
       ASSERT (s == 1);
 
-      if (mpn_hgcd2 (up[1], up[0], vp[1], vp[0], &M1))
+      if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
 	{
 #if TRACE
 	  fprintf (stderr, "final hgcd2 succeeded\n");
diff -r f3cf63f51722 -r 27913f466a23 tests/mpn/t-hgcd_appr.c
--- a/tests/mpn/t-hgcd_appr.c	Sun Oct 09 22:49:28 2011 +0200
+++ b/tests/mpn/t-hgcd_appr.c	Mon Oct 10 12:06:39 2011 +0200
@@ -157,6 +157,8 @@
 
   mpz_t ref_r0;
   mpz_t ref_r1;
+  mpz_t hgcd_r0;
+  mpz_t hgcd_r1;
 
   int res[2];
   mp_size_t asize;
@@ -205,13 +207,15 @@
   mpz_init_set (ref_r1, b);
   res[0] = hgcd_ref (&ref, ref_r0, ref_r1);
 
+  mpz_init_set (hgcd_r0, a);
+  mpz_init_set (hgcd_r1, b);
   if (bsize < asize)
     {
-      _mpz_realloc (b, asize);
-      MPN_ZERO (b->_mp_d + bsize, asize - bsize);
+      _mpz_realloc (hgcd_r1, asize);
+      MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);
     }
-  res[1] = mpn_hgcd_appr (a->_mp_d,
-			  b->_mp_d,
+  res[1] = mpn_hgcd_appr (hgcd_r0->_mp_d,
+			  hgcd_r1->_mp_d,
 			  asize,
 			  &hgcd, hgcd_tp);
 
@@ -264,6 +268,8 @@
   hgcd_ref_clear (&ref);
   mpz_clear (ref_r0);
   mpz_clear (ref_r1);
+  mpz_clear (hgcd_r0);
+  mpz_clear (hgcd_r1);
 
   return res[0];
 }
diff -r f3cf63f51722 -r 27913f466a23 tune/common.c
--- a/tune/common.c	Sun Oct 09 22:49:28 2011 +0200
+++ b/tune/common.c	Mon Oct 10 12:06:39 2011 +0200
@@ -1516,7 +1516,7 @@
 double
 speed_mpn_hgcd_appr (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD_APPR_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
 }
 
 double
diff -r f3cf63f51722 -r 27913f466a23 tune/speed.h
--- a/tune/speed.h	Sun Oct 09 22:49:28 2011 +0200
+++ b/tune/speed.h	Mon Oct 10 12:06:39 2011 +0200
@@ -2680,49 +2680,6 @@
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_HGCD_APPR_CALL(func, itchfunc)		\
-  {									\
-    mp_size_t hgcd_init_itch, hgcd_itch;				\
-    mp_ptr wp, tmp1;							\
-    struct hgcd_matrix hgcd;						\
-    int res;								\
-    unsigned i;								\
-    double t;								\
-    TMP_DECL;								\
-    									\
-    if (s->size < 2)							\
-      return -1;							\
-    									\
-    TMP_MARK;								\
-    									\
-    s->xp[s->size - 1] |= 1;						\
-    s->yp[s->size - 1] |= 1;						\
-    									\
-    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
-    hgcd_itch = itchfunc (s->size);					\
-    									\
-    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp);			\
-    									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, wp, hgcd_itch);				\
-    speed_operand_dst (s, tmp1, hgcd_init_itch);			\
-    speed_cache_fill (s);						\
-    									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      {									\
-	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
-	res = func (s->xp, s->yp, s->size, &hgcd, wp);			\
-      }									\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-    TMP_FREE;								\
-    return t;								\
-  }
-
 /* Run some GCDs of s->size limbs each.  The number of different data values
    is decreased as s->size**2, since GCD is a quadratic algorithm.
    SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT