[Gmp-commit] /var/hg/gmp: 3 new changesets

Fri Oct 7 13:40:40 CEST 2011

details:   /var/hg/gmp/rev/5fe92e6f2726
changeset: 14274:5fe92e6f2726
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Fri Oct 07 13:29:33 2011 +0200
description:
New mpn_hgcd_appr code.

details:   /var/hg/gmp/rev/e06307af03ac
changeset: 14275:e06307af03ac
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Fri Oct 07 13:36:07 2011 +0200
description:
mpn_hgcd_appr testing

details:   /var/hg/gmp/rev/2352c23fca8a
changeset: 14276:2352c23fca8a
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Fri Oct 07 13:39:40 2011 +0200
description:
speed support for mpn_hgcd_appr

diffstat:

 ChangeLog               |   24 +
 configure.in            |    5 +-
 gmp-impl.h              |    9 +
 mpn/generic/hgcd.c      |   99 +-------
 mpn/generic/hgcd_appr.c |  174 ++++++++++++++
 mpn/generic/hgcd_step.c |  119 +++++++++
 tests/mpn/Makefile.am   |    2 +-
 tests/mpn/t-hgcd_appr.c |  578 ++++++++++++++++++++++++++++++++++++++++++++++++
 tune/common.c           |    6 +
 tune/speed.c            |    1 +
 tune/speed.h            |   44 +++
 11 files changed, 964 insertions(+), 97 deletions(-)

diffs (truncated from 1183 to 300 lines):

diff -r 7f4d6a9de980 -r 2352c23fca8a ChangeLog

--- a/ChangeLog	Fri Oct 07 10:21:08 2011 +0200
+++ b/ChangeLog	Fri Oct 07 13:39:40 2011 +0200
@@ -1,3 +1,27 @@
+2011-10-07  Niels Möller  <nisse at lysator.liu.se>
+
+	* tune/speed.h (speed_mpn_hgcd_appr): New prototype.
+	(SPEED_ROUTINE_MPN_HGCD_APPR_CALL): New macro.
+	* tune/common.c (speed_mpn_hgcd_appr): New function.
+	* tune/speed.c (routine): Added mpn_hgcd_appr.
+
+	* tests/mpn/t-hgcd_appr.c: New file.
+	* tests/mpn/Makefile.am (check_PROGRAMS): Added t-hgcd_appr.
+
+	* configure.in (gmp_mpn_functions): Added hgcd_step and hgcd_appr.
+
+	* gmp-impl.h: Added prototypes for mpn_hgcd_step,
+	mpn_hgcd_appr_itch and mpn_hgcd_appr.
+
+	* mpn/generic/hgcd_appr.c: New file.
+
+	* mpn/generic/hgcd_step.c: New file, extracted from hgcd.c.
+	(mpn_hgcd_step): Renamed, from...
+	* mpn/generic/hgcd.c (hgcd_step): ...old name. Renamed and moved
+	to hgcd_step.c.
+	(hgcd_hook): Also moved to hgcd_step.c.
+	(mpn_hgcd): Updated for hgcd_step renaming.
+
 2011-10-04  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/s390_64/submul_1.asm: New file.
diff -r 7f4d6a9de980 -r 2352c23fca8a configure.in
--- a/configure.in	Fri Oct 07 10:21:08 2011 +0200
+++ b/configure.in	Fri Oct 07 13:39:40 2011 +0200
@@ -2550,9 +2550,10 @@
   perfsqr perfpow							   \
   gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step				   \
   gcdext_lehmer								   \
-  div_q tdiv_qr jacbase jacobi_2 jacobi get_d		   \
+  div_q tdiv_qr jacbase jacobi_2 jacobi get_d				   \
   matrix22_mul matrix22_mul1_inverse_vector				   \
-  hgcd_matrix hgcd2 hgcd hgcd2_jacobi hgcd_jacobi			   \
+  hgcd_matrix hgcd2 hgcd_step hgcd hgcd_appr				   \
+  hgcd2_jacobi hgcd_jacobi						   \
   mullo_n mullo_basecase						   \
   toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul		   \
   toom33_mul toom43_mul toom53_mul toom63_mul				   \
diff -r 7f4d6a9de980 -r 2352c23fca8a gmp-impl.h
--- a/gmp-impl.h	Fri Oct 07 10:21:08 2011 +0200
+++ b/gmp-impl.h	Fri Oct 07 13:39:40 2011 +0200
@@ -4003,12 +4003,21 @@
 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
 
+#define mpn_hgcd_step __MPN(hgcd_step)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_step __GMP_PROTO ((mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+
 #define mpn_hgcd_itch __MPN (hgcd_itch)
 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
 
 #define mpn_hgcd __MPN (hgcd)
 __GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
 
+#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch __GMP_PROTO ((mp_size_t));
+
+#define mpn_hgcd_appr __MPN (hgcd_appr)
+__GMP_DECLSPEC int mpn_hgcd_appr __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+
 #define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
 __GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr));
 
diff -r 7f4d6a9de980 -r 2352c23fca8a mpn/generic/hgcd.c
--- a/mpn/generic/hgcd.c	Fri Oct 07 10:21:08 2011 +0200
+++ b/mpn/generic/hgcd.c	Fri Oct 07 13:39:40 2011 +0200
@@ -26,98 +26,6 @@
 #include "longlong.h"
 
 
-static void
-hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
-	   mp_srcptr qp, mp_size_t qn, int d)
-{
-  ASSERT (!gp);
-  ASSERT (d >= 0);
-  ASSERT (d <= 1);
-
-  MPN_NORMALIZE (qp, qn);
-  if (qn > 0)
-    {
-      struct hgcd_matrix *M = (struct hgcd_matrix *) p;
-      /* NOTES: This is a bit ugly. A tp area is passed to
-	 gcd_subdiv_step, which stores q at the start of that area. We
-	 now use the rest. */
-      mp_ptr tp = (mp_ptr) qp + qn;
-      mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
-    }
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
-   division. Reduces the size by almost one limb or more, but never
-   below the given size s. Return new size for a and b, or 0 if no
-   more steps are possible.
-
-   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
-   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
-   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
-   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
-   resulting size of M.
-
-   If N is the input size to the calling hgcd, then s = floor(N/2) +
-   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
-   < N, so N is sufficient.
-*/
-
-static mp_size_t
-hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
-	   struct hgcd_matrix *M, mp_ptr tp)
-{
-  struct hgcd_matrix1 M1;
-  mp_limb_t mask;
-  mp_limb_t ah, al, bh, bl;
-  mp_size_t an, bn, qn;
-  int col;
-
-  ASSERT (n > s);
-
-  mask = ap[n-1] | bp[n-1];
-  ASSERT (mask > 0);
-
-  if (n == s + 1)
-    {
-      if (mask < 4)
-	goto subtract;
-
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else if (mask & GMP_NUMB_HIGHBIT)
-    {
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else
-    {
-      int shift;
-
-      count_leading_zeros (shift, mask);
-      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-    }
-
-  /* Try an mpn_hgcd2 step */
-  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
-    {
-      /* Multiply M <- M * M1 */
-      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-
-      /* Can't swap inputs, so we need to copy. */
-      MPN_COPY (tp, ap, n);
-      /* Multiply M1^{-1} (a;b) */
-      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
-    }
-
- subtract:
-
-  return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
-}
-
 /* Size analysis for hgcd:
 
    For the recursive calls, we have n1 <= ceil(n / 2). Then the
@@ -191,12 +99,15 @@
 	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
 	  success = 1;
 	}
+
+      /* NOTE: It apppears this loop never runs more than once. */
       while (n > n2)
 	{
 	  /* Needs n + 1 storage */
-	  nn = hgcd_step (n, ap, bp, s, M, tp);
+	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
 	  if (!nn)
 	    return success ? n : 0;
+
 	  n = nn;
 	  success = 1;
 	}
@@ -248,7 +159,7 @@
   for (;;)
     {
       /* Needs s+3 < n */
-      nn = hgcd_step (n, ap, bp, s, M, tp);
+      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
       if (!nn)
 	return success ? n : 0;
 
diff -r 7f4d6a9de980 -r 2352c23fca8a mpn/generic/hgcd_appr.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/hgcd_appr.c	Fri Oct 07 13:39:40 2011 +0200
@@ -0,0 +1,174 @@
+/* hgcd_appr.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define TRACE 0
+
+#define MPN_BITCOUNT(bits, n, high) do {			\
+    int __cnt;							\
+    ASSERT ((n) > 0);						\
+    count_leading_zeros (__cnt, (high));			\
+    (bits) = (n) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);	\
+  } while (0)
+
+/* Scratch need: Copies of inputs, and n limbs for hgcd_appr_step,
+   total 3*n limbs. */
+mp_size_t
+mpn_hgcd_appr_itch (mp_size_t n)
+{
+  return 3*n;
+}
+
+int
+mpn_hgcd_appr (mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+	       struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_bitcnt_t nbits;
+  unsigned extra_bits;
+  mp_size_t s;
+  mp_limb_t mask;
+  int count;
+  mp_ptr up, vp;
+  int success = 0;
+
+  ASSERT (n > 0);
+  mask = ap[n-1] | bp[n-1];
+
+  ASSERT (mask > 0);
+
+  MPN_BITCOUNT (nbits, n, mask);
+
+  /* We aim for reduction of to size sbits, where sbits = nbits/2 + 1
+     = GMP_NUMB_BITS * s - extra_bits. We keep track of extra_bits
+     (and implicitly sbits) to decide when to discard the least
+     significant limbs, but we can't actually reduce to size less then
+     s limbs, since that may give matrix entries exceeding n-s limb
+     and overflow the current allocation for the matrix entries. */
+     
+  s = n/2 + 1;
+  extra_bits = GMP_NUMB_BITS * s - (nbits/2 + 1);
+
+  ASSERT (extra_bits >= GMP_NUMB_BITS/2 - 1);
+  ASSERT (extra_bits < 3*GMP_NUMB_BITS/2);
+
+  /* FIXME: Could avoid copying now, and do it when applying the first
+     hgcd2 matrix. */
+  up = tp; vp = tp + n; tp = tp + 2*n;
+  MPN_COPY (up, ap, n);
+  MPN_COPY (vp, bp, n);
+
+#if TRACE
+  fprintf (stderr, "hgcd_appr: In: n = %u, s = %u, nbits = %u, extra = %u\n",
+	   (unsigned) n, (unsigned) s, (unsigned) nbits, (unsigned) extra_bits);
+#endif
+  while (n > 2)
+    {
+      mp_size_t nn;
+
+#if TRACE
+      fprintf (stderr, "loop: n = %u\n", (unsigned) n);
+#endif
+      ASSERT (n > s);