# bdiv vs redc

Niels Möller nisse at lysator.liu.se
Wed Jul 4 23:00:07 CEST 2012

```Torbjorn Granlund <tg at gmplib.org> writes:

> Have you checked that e.g. sbpi1_bdiv_qr gets N = QD+R?

It's N + QD = R now. The patched t-bdiv checks that. So I think the
sb_bdiv code is mostly correct.

I've now made a first attempt at mu_bdiv_qr. Non-working patch below.

Here it gets a bit messier when we cancel limbs by adding multiples of
d, since we want to avoid actually adding the lower limbs, but we need to
know if we get a carry. And we almost always get one, except when the
corresponding input/partial remainder limbs were already zero. This
affects both the main additions and the wraparound logic. Hence special
cases for trailing zero limbs.

I'll also write down my notes to get my head around the wraparound. We
want to compute Q D. We compute it mod B^{tn}-1, lets call this result
W. We also know apriori that Q D = - U mod B^{qn}, and since wn (the
number of wrapround limbs) is <= qn, we also have Q D = -U mod B^{wn}.
So apply the CRT to

Q D = W (mod (B^{tn} -1))
Q D = -U (mod B^{wn})

The solution boils down to

Q D = W + B(^{tn} - 1) T

with T = (U + W) mod B^{wn}.

As a diagram in usual gmp style, this is

+---+--------+
| T |   W    |
+---+-----+--+
|-T|
+--+

So to get the interesting part of Q D, which is the high half, all we
need from the low subtraction is the borrow. And we get a borrow if U !=
0 (mod B^{wm}) *and* we did not get a carry from the addition when we
computed T <-- U + W (mod B^{wn}).

(And then the borrow will most likely be absorbed by uninteresting limbs
of Q D; it affects the high half only if W has all zero limbs from
position wn to position qn).

Maybe it's easier to keep the body of the old mu_bdiv, working with
subtractions, and just negate the quotient, and adjust the remainder, at
the end? That's O(n) additional work, and with fairly small constant.

Regards,
/Niels

*** /tmp/extdiff.LyHtEv/gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c	2012-07-04 22:32:05.000000000 +0200
--- /home/nisse/hack/gmp-bdiv/mpn/generic/mu_bdiv_qr.c	2012-07-04 22:17:28.000000000 +0200
***************
*** 65,69 ****
mp_size_t in;
mp_limb_t cy, c0;
!   mp_size_t tn, wn;

qn = nn - dn;
--- 65,69 ----
mp_size_t in;
mp_limb_t cy, c0;
!   mp_size_t tn, wn, zn;

qn = nn - dn;
***************
*** 87,90 ****
--- 87,92 ----
in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */

+       ASSERT (in <= dn);
+
/* Some notes on allocation:

***************
*** 96,99 ****
--- 98,102 ----

mpn_binvert (ip, dp, in, tp);
+       mpn_neg (ip, ip, in);

MPN_COPY (rp, np, dn);
***************
*** 103,106 ****
--- 106,122 ----
while (qn > in)
{
+ 	  for (zn = 0; zn < in && rp[zn] == 0; zn++)
+ 	    ;
+
+ 	  if (zn == in)
+ 	    {
+ 	      mpn_zero (qp, in);
+ 	      if (dn != in)
+ 		MPN_COPY_INCR (rp, rp + in, dn - in);
+
+ 	      cy = mpn_add_1 (rp + dn - in, np, in, cy);
+ 	    }
+ 	  else
+ 	    {
mpn_mullo_n (qp, rp, ip, in);

***************
*** 114,129 ****
if (wn > 0)
{
! 		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
! 		  mpn_decr_u (tp + wn, c0);
! 		}
}

- 	  qp += in;
- 	  qn -= in;
-
if (dn != in)
{
! 	      /* Subtract tp[dn-1...in] from partial remainder.  */
! 	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
if (cy == 2)
{
--- 130,146 ----
if (wn > 0)
{
! 		      c0 = mpn_add_n (tp + tn, tp, rp, wn);
! 		      c0 = c0 || zn >= wn;
! 		      mpn_decr_u (tp + wn, 1-c0);
! 		    }
! 		  /* Carry from lower limbs, since we checked earlier
! 		     that {rp, in} > 0. */
! 		  mpn_incr_u (tp + in, 1);
}

if (dn != in)
{
! 		  /* Add tp[dn-1...in] to partial remainder. */
! 		  cy += mpn_add_n (rp, rp + in, tp + in, dn - in);
if (cy == 2)
{
***************
*** 132,141 ****
}
}
! 	  /* Subtract tp[dn+in-1...dn] from dividend.  */
! 	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
np += in;
}

/* Generate last qn limbs.  */
mpn_mullo_n (qp, rp, ip, qn);

--- 149,173 ----
}
}
! 	      /* Add tp[dn+in-1...dn] to dividend.  */
! 	      cy = mpn_add_nc (rp + dn - in, np, tp + dn, in, cy);
! 	    }
np += in;
+ 	  qp += in;
+ 	  qn -= in;
}

/* Generate last qn limbs.  */
+       for (zn = 0; zn < qn && rp[zn] == 0; zn++)
+ 	;
+
+       if (zn == qn)
+ 	{
+ 	  mpn_zero (qp, qn);
+ 	  if (dn != qn)
+ 	    MPN_COPY_INCR (rp, rp + in, qn - in);
+
+ 	  return mpn_add_1 (rp + dn - qn, np, qn, cy);
+ 	}
+
mpn_mullo_n (qp, rp, ip, qn);

***************
*** 149,160 ****
if (wn > 0)
{
! 	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
! 	      mpn_decr_u (tp + wn, c0);
}
}

if (dn != qn)
{
! 	  cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
if (cy == 2)
{
--- 181,194 ----
if (wn > 0)
{
! 	      c0 = mpn_add_n (tp + tn, tp, rp, wn);
! 	      c0 = c0 || zn >= wn;
! 	      mpn_decr_u (tp + wn, 1-c0);
}
+ 	  mpn_incr_u (tp + qn, 1);
}

if (dn != qn)
{
! 	  cy += mpn_add_n (rp, rp + qn, tp + qn, dn - qn);
if (cy == 2)
{
***************
*** 163,167 ****
}
}
!       return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);

#undef ip
--- 197,201 ----
}
}
!       return mpn_add_nc (rp + dn - qn, np, tp + dn, qn, cy);

#undef ip
***************
*** 183,186 ****
--- 217,229 ----
mpn_binvert (ip, dp, in, tp);

+       for (zn = 0; zn < in && np[zn] == 0; zn++)
+ 	;
+       if (zn == in)
+ 	{
+ 	  mpn_zero (qp, in);
+ 	  MPN_COPY (rp, np + in, dn);
+ 	  cy = 0;
+ 	}
+       else {
mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */

***************
*** 194,206 ****
if (wn > 0)
{
! 	      c0 = mpn_sub_n (tp + tn, tp, np, wn);
! 	      mpn_decr_u (tp + wn, c0);
}
}

qp += in;
qn -= in;

!       cy = mpn_sub_n (rp, np + in, tp + in, dn);
mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */

--- 237,261 ----
if (wn > 0)
{
! 		c0 = mpn_add_n (tp + tn, tp, np, wn);
! 		c0 = c0 || zn >= wn;
! 		mpn_incr_u (tp + wn, 1-c0);
}
+ 	    mpn_incr_u (tp + in, 1);
}

+ 	cy = mpn_sub_n (rp, np + in, tp + in, dn);
+       }
qp += in;
qn -= in;

!       for (zn = 0; zn < qn && np[zn] == 0; zn++)
! 	;
!       if (zn == qn)
! 	{
! 	  mpn_zero (qp, qn);
! 	  return mpn_add_1 (rp, np + qn, dn, cy);
! 	}
!       else
! 	{
mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */

***************
*** 214,223 ****
if (wn > 0)
{
! 	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
! 	      mpn_decr_u (tp + wn, c0);
}
}

!       cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
if (cy == 2)
{
--- 269,280 ----
if (wn > 0)
{
! 		  c0 = mpn_add_n (tp + tn, tp, rp, wn);
! 		  cy |= zn >= wn;
! 		  mpn_incr_u (tp + wn, 1-c0);
}
+ 	      mpn_incr_u (tp + qn, 1);
}

! 	  cy += mpn_add_n (rp, rp + qn, tp + qn, dn - qn);
if (cy == 2)
{
***************
*** 225,230 ****
cy = 1;
}
!       return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
!
#undef ip
#undef tp
--- 282,287 ----
cy = 1;
}
! 	  return mpn_add_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
! 	}
#undef ip
#undef tp

--
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.
```