bdiv vs redc

Niels Möller nisse at lysator.liu.se
Tue Jul 17 23:59:04 CEST 2012


Torbjorn Granlund <tg at gmplib.org> writes:

>   where the R == 0 case can happen only if U == 0. So if it is known a
>   priori that U != 0, then the test can be simplified to just
>   
>     R == D.
>   
> Ah.  This makes some sense.

And after a second look, I noticed that mpn_divisible_p handles zero
specially, so the simple test R == D should suffice there.

I've checked in my changes in a new repository,

  ssh://shell.gmplib.org//var/hg/gmp-proj/gmp-bdiv

Patch also appended below. The code passes make check now.

Regards,
/Niels

diff -Nrc2 gmp-bdiv.b5ca16212198/ChangeLog gmp-bdiv.aa820fde2c64/ChangeLog
*** gmp-bdiv.b5ca16212198/ChangeLog	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/ChangeLog	2012-07-17 23:52:21.000000000 +0200
***************
*** 1,2 ****
--- 1,27 ----
+ 2012-07-17  Niels Möller  <nisse at lysator.liu.se>
+ 
+ 	* mpn/generic/divis.c (mpn_divisible_p): Updated the divisibility
+ 	test; bdiv now returns R = D rather than R = 0 when D divides a
+ 	non-zero U.
+ 
+ 	* mpn/generic/binvert.c (mpn_binvert): Negate bdiv quotient.
+ 	* mpn/generic/divexact.c (mpn_divexact): Likewise.
+ 	* mpn/generic/remove.c (mpn_remove): Likewise.
+ 	* mpz/bin_uiui.c (mpz_bdiv_bin_uiui): Likewise.
+ 
+ 	* tests/mpn/t-bdiv.c (check_one): Updated to new convention,
+ 	B^{qn} R = U + QD.
+ 
+ 	* mpn/generic/sbpi1_bdiv_qr.c (mpn_sbpi1_bdiv_qr): Reimplemented,
+ 	for new bdiv convention.
+ 	* mpn/generic/sbpi1_bdiv_q.c (mpn_sbpi1_bdiv_q): Likewise.
+ 	* mpn/generic/dcpi1_bdiv_q.c (mpn_dcpi1_bdiv_q_n)
+ 	(mpn_dcpi1_bdiv_q): Adapted to new bdiv convention.
+ 	* mpn/generic/dcpi1_bdiv_qr.c (mpn_dcpi1_bdiv_qr_n)
+ 	(mpn_dcpi1_bdiv_qr): Likewise.
+ 	* mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr): Adapted to new bdiv
+ 	convention, using a wrapper calling the old function.
+ 	* mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q): Likewise.
+ 
  2012-06-26  Torbjorn Granlund  <tege at gmplib.org>
  
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/binvert.c gmp-bdiv.aa820fde2c64/mpn/generic/binvert.c
*** gmp-bdiv.b5ca16212198/mpn/generic/binvert.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/binvert.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 75,78 ****
--- 75,80 ----
      mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
  
+   mpn_neg (rp, rp, rn);
+ 
    /* Use Newton iterations to get the desired precision.  */
    for (; rn < n; rn = newrn)
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_q.c
*** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 36,40 ****
  }
  
! /* Computes Q = N / D mod B^n, destroys N.
  
     N = {np,n}
--- 36,40 ----
  }
  
! /* Computes Q = - N / D mod B^n, destroys N.
  
     N = {np,n}
***************
*** 58,67 ****
  
        mpn_mullo_n (tp, qp, dp + hi, lo);
!       mpn_sub_n (np + hi, np + hi, tp, lo);
  
        if (lo < hi)
  	{
! 	  cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]);
! 	  np[n - 1] -= cy;
  	}
        qp += lo;
--- 58,67 ----
  
        mpn_mullo_n (tp, qp, dp + hi, lo);
!       mpn_add_n (np + hi, np + hi, tp, lo);
  
        if (lo < hi)
  	{
! 	  cy += mpn_addmul_1 (np + lo, qp, lo, dp[lo]);
! 	  np[n - 1] += cy;
  	}
        qp += lo;
***************
*** 72,76 ****
  }
  
! /* Computes Q = N / D mod B^nn, destroys N.
  
     N = {np,nn}
--- 72,76 ----
  }
  
! /* Computes Q = - N / D mod B^nn, destroys N.
  
     N = {np,nn}
***************
*** 120,124 ****
  	  mpn_incr_u (tp + qn, cy);
  
! 	  mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
  	  cy = 0;
  	}
--- 120,124 ----
  	  mpn_incr_u (tp + qn, cy);
  
! 	  mpn_add (np + qn, np + qn, nn - qn, tp, dn);
  	  cy = 0;
  	}
***************
*** 130,134 ****
        while (qn > dn)
  	{
! 	  mpn_sub_1 (np + dn, np + dn, qn - dn, cy);
  	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
  	  qp += dn;
--- 130,134 ----
        while (qn > dn)
  	{
! 	  mpn_add_1 (np + dn, np + dn, qn - dn, cy);
  	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
  	  qp += dn;
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_qr.c
*** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 33,42 ****
     Output:
  
!       q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
  
!       r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
  
     Stores q at qp. Stores the n least significant limbs of r at the high half
!    of np, and returns the borrow from the subtraction n - q*d.
  
     d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
--- 33,42 ----
     Output:
  
!       q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
  
!       r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS}
  
     Stores q at qp. Stores the n least significant limbs of r at the high half
!    of np, and returns the carry from the addition n + q*d.
  
     d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
***************
*** 67,71 ****
  
    mpn_incr_u (tp + lo, cy);
!   rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
  
    if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
--- 67,71 ----
  
    mpn_incr_u (tp + lo, cy);
!   rh = mpn_add (np + lo, np + lo, n + hi, tp, n);
  
    if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
***************
*** 77,81 ****
  
    mpn_incr_u (tp + hi, cy);
!   rh += mpn_sub_n (np + n, np + n, tp, n);
  
    return rh;
--- 77,81 ----
  
    mpn_incr_u (tp + hi, cy);
!   rh += mpn_add_n (np + n, np + n, tp, n);
  
    return rh;
***************
*** 123,127 ****
  	  mpn_incr_u (tp + qn, cy);
  
! 	  rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
  	  cy = 0;
  	}
--- 123,127 ----
  	  mpn_incr_u (tp + qn, cy);
  
! 	  rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
  	  cy = 0;
  	}
***************
*** 133,137 ****
        do
  	{
! 	  rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
  	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
  	  qp += dn;
--- 133,137 ----
        do
  	{
! 	  rr += mpn_add_1 (np + dn, np + dn, qn, cy);
  	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
  	  qp += dn;
***************
*** 158,162 ****
        mpn_incr_u (tp + qn, cy);
  
!       rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
        cy = 0;
      }
--- 158,162 ----
        mpn_incr_u (tp + qn, cy);
  
!       rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
        cy = 0;
      }
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/divexact.c gmp-bdiv.aa820fde2c64/mpn/generic/divexact.c
*** gmp-bdiv.b5ca16212198/mpn/generic/divexact.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/divexact.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 89,92 ****
--- 89,95 ----
    mpn_bdiv_q (qp, np, qn, dp, dn, tp);
    TMP_FREE;
+ 
+   /* Since bdiv_q computes -N/D (mod B^{qn}), we must negate now. */
+   mpn_neg (qp, qp, qn);
  }
  
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/divis.c gmp-bdiv.aa820fde2c64/mpn/generic/divis.c
*** gmp-bdiv.b5ca16212198/mpn/generic/divis.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/divis.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 55,58 ****
--- 55,59 ----
    mp_limb_t di;
    unsigned  twos;
+   int c;
    TMP_DECL;
  
***************
*** 173,190 ****
        mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
      }
  
!   /* test for {rp,dn} zero or non-zero */
!   i = 0;
!   do
!     {
!       if (rp[i] != 0)
! 	{
! 	  TMP_FREE;
! 	  return 0;
! 	}
!     }
!   while (++i < dn);
  
    TMP_FREE;
!   return 1;
  }
--- 174,186 ----
        mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
      }
+   
+   /* In general, bdiv may return either R = 0 or R = D when D divides
+      A. But R = 0 can happen only when A = 0, which we already have
+      excluded. Furthermore, R == D (mod B^{dn}) implies no carry, so
+      we don't need to check the carry returned from bdiv. */
  
!   MPN_CMP (c, rp, dp, dn);
  
    TMP_FREE;
!   return c == 0;
  }
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_q.c
*** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 56,61 ****
     FIXME: Trim final quotient calculation to qn limbs of precision.
  */
! void
! mpn_mu_bdiv_q (mp_ptr qp,
  	       mp_srcptr np, mp_size_t nn,
  	       mp_srcptr dp, mp_size_t dn,
--- 56,61 ----
     FIXME: Trim final quotient calculation to qn limbs of precision.
  */
! static void
! mpn_mu_bdiv_q_old (mp_ptr qp,
  	       mp_srcptr np, mp_size_t nn,
  	       mp_srcptr dp, mp_size_t dn,
***************
*** 215,218 ****
--- 215,228 ----
  }
  
+ void
+ mpn_mu_bdiv_q (mp_ptr qp,
+ 	       mp_srcptr np, mp_size_t nn,
+ 	       mp_srcptr dp, mp_size_t dn,
+ 	       mp_ptr scratch)
+ {
+   mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch);
+   mpn_neg (qp, qp, nn);
+ }
+ 
  mp_size_t
  mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_qr.c
*** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 55,64 ****
  	  particular, when dn==in, tp and rp could use the same space.
  */
! mp_limb_t
! mpn_mu_bdiv_qr (mp_ptr qp,
! 		mp_ptr rp,
! 		mp_srcptr np, mp_size_t nn,
! 		mp_srcptr dp, mp_size_t dn,
! 		mp_ptr scratch)
  {
    mp_size_t qn;
--- 55,64 ----
  	  particular, when dn==in, tp and rp could use the same space.
  */
! static mp_limb_t
! mpn_mu_bdiv_qr_old (mp_ptr qp,
! 		    mp_ptr rp,
! 		    mp_srcptr np, mp_size_t nn,
! 		    mp_srcptr dp, mp_size_t dn,
! 		    mp_ptr scratch)
  {
    mp_size_t qn;
***************
*** 233,236 ****
--- 233,269 ----
  }
  
+ mp_limb_t
+ mpn_mu_bdiv_qr (mp_ptr qp,
+ 		mp_ptr rp,
+ 		mp_srcptr np, mp_size_t nn,
+ 		mp_srcptr dp, mp_size_t dn,
+ 		mp_ptr scratch)
+ {
+   mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch);
+ 
+   /* R' B^{qn} = U - Q' D
+    *
+    * Q = B^{qn} - Q' (assuming Q' != 0)
+    *
+    * R B^{qn} = U + Q D = U + B^{qn} D - Q' D
+    *          = B^{qn} D + R'
+    */
+ 
+   if (UNLIKELY (!mpn_neg (qp, qp, nn - dn)))
+     {
+       /* Zero quotient. */
+       ASSERT (cy == 0);
+       return 0;
+     }
+   else
+     {
+       mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn);
+       ASSERT (cy2 >= cy);
+       
+       return cy2 - cy;
+     }
+ }
+ 
+ 
  mp_size_t
  mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/remove.c gmp-bdiv.aa820fde2c64/mpn/generic/remove.c
*** gmp-bdiv.b5ca16212198/mpn/generic/remove.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/remove.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 93,96 ****
--- 93,98 ----
        MP_PTR_SWAP (qp, qp2);
        qn = qn - pn;
+       mpn_neg (qp, qp, qn+1);
+ 
        qn += qp[qn] != 0;
  
***************
*** 132,135 ****
--- 134,139 ----
        MP_PTR_SWAP (qp, qp2);
        qn = qn - pn;
+       mpn_neg (qp, qp, qn+1);
+ 
        qn += qp[qn] != 0;
  
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_q.c
*** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 28,89 ****
  #include "gmp-impl.h"
  
! 
! /* Computes Q = N / D mod B^nn, destroys N.
  
     D must be odd. dinv is (-D)^-1 mod B.
  
! 
!    The straightforward way to compute Q is to cancel one limb at a time, using
! 
!      qp[i] = D^{-1} * np[i] (mod B)
!      N -= B^i * qp[i] * D
! 
!    But we prefer addition to subtraction, since mpn_addmul_1 is often faster
!    than mpn_submul_1.  Q = - N / D can be computed by iterating
! 
!      qp[i] = (-D)^{-1} * np[i] (mod B)
!      N += B^i * qp[i] * D
! 
!    And then we flip the sign, -Q = (not Q) + 1. */
  
  void
  mpn_sbpi1_bdiv_q (mp_ptr qp,
! 		  mp_ptr np, mp_size_t nn,
  		  mp_srcptr dp, mp_size_t dn,
  		  mp_limb_t dinv)
  {
    mp_size_t i;
!   mp_limb_t cy, q;
  
    ASSERT (dn > 0);
!   ASSERT (nn >= dn);
    ASSERT ((dp[0] & 1) != 0);
!   /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
!      but some over N/Q overlaps will not work.  */
  
!   for (i = nn - dn; i > 0; i--)
      {
!       q = dinv * np[0];
!       cy = mpn_addmul_1 (np, dp, dn, q);
!       mpn_add_1 (np + dn, np + dn, i, cy);
!       ASSERT (np[0] == 0);
!       qp[0] = ~q;
!       qp++;
!       np++;
      }
- 
    for (i = dn; i > 1; i--)
      {
!       q = dinv * np[0];
!       mpn_addmul_1 (np, dp, i, q);
!       ASSERT (np[0] == 0);
!       qp[0] = ~q;
!       qp++;
!       np++;
      }
  
    /* Final limb */
!   q = dinv * np[0];
!   qp[0] = ~q;
!   mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
  }
--- 28,85 ----
  #include "gmp-impl.h"
  
! /* Computes Q = - U / D mod B^un, destroys U.
  
     D must be odd. dinv is (-D)^-1 mod B.
  
! */
  
  void
  mpn_sbpi1_bdiv_q (mp_ptr qp,
! 		  mp_ptr up, mp_size_t un,
  		  mp_srcptr dp, mp_size_t dn,
  		  mp_limb_t dinv)
  {
    mp_size_t i;
!   mp_limb_t q;
  
    ASSERT (dn > 0);
!   ASSERT (un >= dn);
    ASSERT ((dp[0] & 1) != 0);
!   ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
  
!   if (un > dn)
      {
!       mp_limb_t cy, hi;
!       for (i = un - dn - 1, cy = 0; i > 0; i--)
! 	{
! 	  q = dinv * up[0];
! 	  hi = mpn_addmul_1 (up, dp, dn, q);
! 
! 	  ASSERT (up[0] == 0);
! 	  *qp++ = q;
! 	  hi += cy;
! 	  cy = hi < cy;
! 	  hi += up[dn];
! 	  cy += hi < up[dn];
! 	  up[dn] = hi;
! 	  up++;
! 	}
!       q = dinv * up[0];
!       hi = cy + mpn_addmul_1 (up, dp, dn, q);
!       ASSERT (up[0] == 0);
!       *qp++ = q;
!       up[dn] += hi;
!       up++;
      }
    for (i = dn; i > 1; i--)
      {
!       mp_limb_t q = dinv * up[0];
!       mpn_addmul_1 (up, dp, i, q);
!       ASSERT (up[0] == 0);
!       *qp++ = q;
!       up++;
      }
  
    /* Final limb */
!   *qp = dinv * up[0];
  }
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_qr.c
*** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 29,41 ****
  
  
! /* Computes a binary quotient of size qn = nn - dn.
     Output:
  
!       Q = N * D^{-1} mod B^qn,
  
!       R = (N - Q * D) * B^(-qn)
  
!    Stores the dn least significant limbs of R at {np + nn - dn, dn},
!    and returns the borrow from the subtraction N - Q*D.
  
     D must be odd. dinv is (-D)^-1 mod B. */
--- 29,41 ----
  
  
! /* Computes a binary quotient of size qn = un - dn.
     Output:
  
!       Q = -U * D^{-1} mod B^qn,
  
!       R = (U + Q * D) * B^(-qn)
  
!    Stores the dn least significant limbs of R at {up + un - dn, dn},
!    and returns the carry from the addition N + Q*D.
  
     D must be odd. dinv is (-D)^-1 mod B. */
***************
*** 43,108 ****
  mp_limb_t
  mpn_sbpi1_bdiv_qr (mp_ptr qp,
! 		   mp_ptr np, mp_size_t nn,
  		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
  {
-   mp_size_t qn;
    mp_size_t i;
!   mp_limb_t rh;
!   mp_limb_t ql;
  
    ASSERT (dn > 0);
!   ASSERT (nn > dn);
    ASSERT ((dp[0] & 1) != 0);
!   /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
!      but some over N/Q overlaps will not work.  */
  
!   qn = nn - dn;
! 
!   rh = 0;
! 
!   /* To complete the negation, this value is added to q. */
!   ql = 1;
!   while (qn > dn)
      {
!       for (i = 0; i < dn; i++)
! 	{
! 	  mp_limb_t q;
! 
! 	  q = dinv * np[i];
! 	  np[i] = mpn_addmul_1 (np + i, dp, dn, q);
! 	  qp[i] = ~q;
! 	}
!       rh += mpn_add (np + dn, np + dn, qn, np, dn);
!       ql = mpn_add_1 (qp, qp, dn, ql);
! 
!       qp += dn; qn -= dn;
!       np += dn; nn -= dn;
!     }
! 
!   for (i = 0; i < qn; i++)
!     {
!       mp_limb_t q;
! 
!       q = dinv * np[i];
!       np[i] = mpn_addmul_1 (np + i, dp, dn, q);
!       qp[i] = ~q;
      }
  
!   rh += mpn_add_n (np + dn, np + dn, np, qn);
!   ql = mpn_add_1 (qp, qp, qn, ql);
! 
!   if (UNLIKELY (ql > 0))
!     {
!       /* q == 0 */
!       ASSERT (rh == 0);
!       return 0;
!     }
!   else
!     {
!       mp_limb_t cy;
! 
!       cy = mpn_sub_n (np + qn, np + qn, dp, dn);
!       ASSERT (cy >= rh);
!       return cy - rh;
!     }
  }
--- 43,71 ----
  mp_limb_t
  mpn_sbpi1_bdiv_qr (mp_ptr qp,
! 		   mp_ptr up, mp_size_t un,
  		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
  {
    mp_size_t i;
!   mp_limb_t cy;
  
    ASSERT (dn > 0);
!   ASSERT (un > dn);
    ASSERT ((dp[0] & 1) != 0);
!   ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
  
!   for (i = un - dn, cy = 0; i != 0; i--)
      {
!       mp_limb_t q = dinv * up[0];
!       mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
!       *qp++ = q;
!       
!       hi += cy;
!       cy = hi < cy;
!       hi += up[dn];
!       cy += hi < up[dn];
!       up[dn] = hi;
!       up++;
      }
  
!   return cy;
  }
diff -Nrc2 gmp-bdiv.b5ca16212198/mpz/bin_uiui.c gmp-bdiv.aa820fde2c64/mpz/bin_uiui.c
*** gmp-bdiv.b5ca16212198/mpz/bin_uiui.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/mpz/bin_uiui.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 271,274 ****
--- 271,275 ----
        nn -= kn;
        mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
+       mpn_neg (np, np, nn);
  
        if (kmax == 0)
diff -Nrc2 gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c gmp-bdiv.aa820fde2c64/tests/mpn/t-bdiv.c
*** gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c	2012-07-17 23:52:21.000000000 +0200
--- gmp-bdiv.aa820fde2c64/tests/mpn/t-bdiv.c	2012-07-17 23:52:21.000000000 +0200
***************
*** 59,63 ****
  {
    mp_size_t qn;
-   int cmp;
    mp_ptr tp;
    mp_limb_t cy = 4711;		/* silence warnings */
--- 59,62 ----
***************
*** 78,90 ****
      mpn_mul (tp, qp, qn, dp, dn);
  
!   if (rp != NULL)
!     {
!       cy = mpn_add_n (tp + qn, tp + qn, rp, dn);
!       cmp = cy != rh || mpn_cmp (tp, np, nn) != 0;
!     }
!   else
!     cmp = mpn_cmp (tp, np, nn - dn) != 0;
  
!   if (cmp != 0)
      {
        printf ("\r*******************************************************************************\n");
--- 77,84 ----
      mpn_mul (tp, qp, qn, dp, dn);
  
!   cy = mpn_add_n (tp, tp, np, nn);
  
!   if (! mpn_zero_p (tp, qn)
!       || (rp != NULL && (cy != rh || mpn_cmp (tp + qn, rp, dn) != 0)))
      {
        printf ("\r*******************************************************************************\n");

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.


More information about the gmp-devel mailing list