divappr or approximate fraction (a diversion from sqrt...)

Fri Oct 16 21:08:32 UTC 2015

When I looked into sqrt some months ago, I found that it would be nice
with a divappr function which didn't require the low mostly
uninteresting dividend limbs as input, a bit similar to bdiv_q.

Maybe one shouldn't think about that as an approximate division, but
rather as an approximate fraction. I've implemented one variant (see
below), which takes as input two n limb numbers U and D, with D
normalized. 

It computes an approximative fraction, the n-1 fraction limbs of B^{n-1}
U / D (and the single non-fraction bit of the quotient, one iff U >= D)

I think this fits the needs of my sqrt code pretty well, in particular
the version enforcing normalization, but I haven't yet tried to put them
together.

Each iteration computes one quotient limb, reduces U by one limb at the
high end, and drops one limb of precision at the low end of D. I think
that's simple and beautiful.

Let Q be the returned value, an approximation of B^{n-1} U / D, and let

  R = B^{n-1} U - Q D

Then we get R in the range

  - epsilon D < R < D

for some pretty small epsilon. In my testing, with 64-bit limbs, it
seems epsilon is on the order of 5e-19, but I haven't done any rigorous
error analysis.

So compared to the exact quotient, floor (B^{n-1} U / D), Q is never too
small, and at most one too large. And in case it *is* too large, the
corresponding candidate remainder is negative, but with R/D close to
zero. Which also makes error quite unlikely.

The code below depends on submul_1c, since the lowest limb to subtract
from the partial remainder is the high half of (q * dp[0]), while low
half is not going to be significant. (And it's nice that the c input to
submul_1c satisfies the requirement c < q, which we'd need in submul_1
implementations using the complement trick. If we ever make
mpn_submul_1c (rp, up, n, v, cin) public, I think we should require of
callers that cin <= v).

I imagine there are similarities to some other division routines in gmp.
If desired it's pretty easy to extend to U larger than D, then the
first un - dn quotient limbs can be computed (in the schoolbook range)
using mpn_sbpi1_div_qr.

When thinking about the precision, maybe one could to a more efficient
schoolbook division if one replaced our friend udiv_qr_3by2 by code to
compute (B^2 u1 + B u0) / (B d1 + d0), i.e., fix the lowest input limb
of udiv_qr_3by2 to zero, simplifying the code to compute the candidate
quotient, and then handle any resulting error in the the unlikely
adjustment step in the loop.

Regards,
/Niels

-----8<-----------

#include <stdio.h>
#include <stdlib.h>

#define WANT_ASSERT 1

#include <gmp.h>
#include <gmp-impl.h>
#include <longlong.h>

static int verbose = 0;

/* Compute an approximation to B^{dn-1} U / D */
static mp_limb_t
divappr_n (mp_ptr qp, mp_ptr up, mp_srcptr dp, mp_size_t n)
{
  mp_limb_t hi;
  gmp_pi1_t dinv;
  mp_limb_t d1, d0;
  mp_limb_t t1, t0;
  mp_limb_t u1, u0;

  ASSERT (n > 0);
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);

  /*
     +------------+----------+
     | U (n)      | 0 (n-1)  |
     +----------+-+----------+
                | D (n)      |
                +------------+

     Quotient size n-1 (+ high bit, returned)
   */

  if (n == 1)
    return up[0] >= dp[0];;

  d1 = dp[n-1];
  d0 = dp[n-2];
  invert_pi1 (dinv, d1, d0);

  hi = mpn_cmp (up, dp, n) >= 0;
  if (hi)
    ASSERT_NOCARRY (mpn_sub_n (up, up, dp, n));

  while (n > 2)
    {
      mp_limb_t u1 = up[n-1];
      mp_limb_t u0 = up[n-2];
      mp_limb_t q;

      if (UNLIKELY (u1 == d1) && (u0 == d0))
	{
	  mp_limb_t cy;
	  cy = d1;
	  cy += mpn_add_n (up, up, dp + 1, n-2);
	  cy -= mpn_sub_n (up, up, dp, n-2);
	  up[n-2] = cy;
	  q = GMP_NUMB_MASK;
	}
      else
	{
	  mp_limb_t cy;
	  /* FIXME: Duplicate work for top limbs. See mpn_sbpi1_div_qr. */
	  udiv_qr_3by2 (q, t1, t0, u1, u0, up[n-3], d1, d0, dinv.inv32);
	  umul_ppmm (t1, t0, q, dp[0]);
	  cy = mpn_submul_1c (up, dp+1, n-1, q, t1);
	  ASSERT (cy >= u1);
	  if (cy > u1)
	    {
	      ASSERT (cy == u1 + 1);
	      ASSERT_CARRY (mpn_add_n (up, up, dp+1, n-1));
	      q--;
	    }
	}
      n--; dp++;
      qp[n-1] = q;
    }

  u1 = up[1];
  u0 = up[0];

  if (UNLIKELY (u1 == d1) && (u0 == d0))
    qp[0] = GMP_NUMB_MASK;
  else
    udiv_qr_3by2 (qp[0], t1, t0, up[1], up[0], 0, d1, d0, dinv.inv32);

  return hi;
}

#define MAX_SIZE 20

int
main (int argc, char **argv)
{
  gmp_randstate_t rands;

  mp_limb_t q[MAX_SIZE];
  mp_limb_t tu[MAX_SIZE];
  mpz_t u;
  mpz_t d;
  mpz_t ref_q;
  mpz_t ref_r;
  mpz_t r;

  unsigned c;

  if (argc > 1)
    verbose = 1;

  mpz_init (u);
  mpz_init (d);
  mpz_init (ref_q);
  mpz_init (ref_r);
  mpz_init (r);

  gmp_randinit_default (rands);
  for (c = 0; c < 100000; c++)
    {
      unsigned bits = 1 + gmp_urandomm_ui (rands, GMP_NUMB_BITS * MAX_SIZE);
      mpz_t t1;
      mp_size_t n;

      mpz_rrandomb (u, rands, bits);
      n = mpz_size (u);
      mpz_rrandomb (d, rands, n*GMP_NUMB_BITS);
      if (verbose)
	gmp_fprintf (stderr, "%u, size %u, U = %Zx, D = %Zx\n",
		     c, (unsigned) n, u, d);

      mpn_copyi (tu, mpz_limbs_read (u), n);
      q[n-1] = divappr_n (q, tu, mpz_limbs_read (d), n);

      mpz_mul_2exp (r, u, (n-1) * GMP_NUMB_BITS);
      mpz_tdiv_qr (ref_q, ref_r, r, d);
      mpz_submul (r, d, mpz_roinit_n (t1, q, n));

      if (mpz_cmpabs (r, d) >= 0)
	{
	  gmp_fprintf (stderr, "Test %u failed, size %u:\n"
		       "  u: %Zx\n"
		       "  d: %Zx\n"
		       "  q: %Nx (bad)\n"
		       "  q: %Zx (expected)\n"
		       "  r: %Zx (bad)\n"
		       "  r: %Zx (expected)\n",
		       c, (unsigned) n, u, d, q, n, ref_q, r, ref_r);
	  abort ();
	}
      else if (verbose && mpz_sgn (r) < 0)
	fprintf (stderr, "r/d: %g\n",
		 - (double) mpz_getlimbn (r, n-1)
		 / (double) mpz_getlimbn (d, n-1));
    }
  mpz_clear (u);
  mpz_clear (d);
  mpz_clear (ref_q);
  mpz_clear (ref_r);
  mpz_clear (r);

  return EXIT_SUCCESS;
}

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.