bdiv vs redc

Niels Möller nisse at lysator.liu.se
Fri Apr 11 12:30:35 UTC 2014


nisse at lysator.liu.se (Niels Möller) writes:

> I've checked in my changes in a new repository,
>
>   ssh://shell.gmplib.org//var/hg/gmp-proj/gmp-bdiv
>
> Patch also appended below. The code passes make check now.

I just remember this old sub-project: Changing bdiv convention from

  U = Q D + R B^n  (Q = U D^-1 mod B^n)

to

  U + Q D = R B^n  (Q = - U D^-1 mod B^n)

to make it more consistent with redc. It would be nice to get this in.
Do we have any 6.0.x repository yet? Changes like this shouldn't be
included in any coming bug-fix release.

Regards,
/Niels

> diff -Nrc2 gmp-bdiv.b5ca16212198/ChangeLog gmp-bdiv.aa820fde2c64/ChangeLog
> *** gmp-bdiv.b5ca16212198/ChangeLog	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/ChangeLog	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 1,2 ****
> --- 1,27 ----
> + 2012-07-17  Niels Möller  <nisse at lysator.liu.se>
> + 
> + 	* mpn/generic/divis.c (mpn_divisible_p): Updated the divisibility
> + 	test; bdiv now returns R = D rather than R = 0 when D divides a
> + 	non-zero U.
> + 
> + 	* mpn/generic/binvert.c (mpn_binvert): Negate bdiv quotient.
> + 	* mpn/generic/divexact.c (mpn_divexact): Likewise.
> + 	* mpn/generic/remove.c (mpn_remove): Likewise.
> + 	* mpz/bin_uiui.c (mpz_bdiv_bin_uiui): Likewise.
> + 
> + 	* tests/mpn/t-bdiv.c (check_one): Updated to new convention,
> + 	B^{qn} R = U + QD.
> + 
> + 	* mpn/generic/sbpi1_bdiv_qr.c (mpn_sbpi1_bdiv_qr): Reimplemented,
> + 	for new bdiv convention.
> + 	* mpn/generic/sbpi1_bdiv_q.c (mpn_sbpi1_bdiv_q): Likewise.
> + 	* mpn/generic/dcpi1_bdiv_q.c (mpn_dcpi1_bdiv_q_n)
> + 	(mpn_dcpi1_bdiv_q): Adapted to new bdiv convention.
> + 	* mpn/generic/dcpi1_bdiv_qr.c (mpn_dcpi1_bdiv_qr_n)
> + 	(mpn_dcpi1_bdiv_qr): Likewise.
> + 	* mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr): Adapted to new bdiv
> + 	convention, using a wrapper calling the old function.
> + 	* mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q): Likewise.
> + 
>   2012-06-26  Torbjorn Granlund  <tege at gmplib.org>
>   
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/binvert.c gmp-bdiv.aa820fde2c64/mpn/generic/binvert.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/binvert.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/binvert.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 75,78 ****
> --- 75,80 ----
>       mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
>   
> +   mpn_neg (rp, rp, rn);
> + 
>     /* Use Newton iterations to get the desired precision.  */
>     for (; rn < n; rn = newrn)
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_q.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 36,40 ****
>   }
>   
> ! /* Computes Q = N / D mod B^n, destroys N.
>   
>      N = {np,n}
> --- 36,40 ----
>   }
>   
> ! /* Computes Q = - N / D mod B^n, destroys N.
>   
>      N = {np,n}
> ***************
> *** 58,67 ****
>   
>         mpn_mullo_n (tp, qp, dp + hi, lo);
> !       mpn_sub_n (np + hi, np + hi, tp, lo);
>   
>         if (lo < hi)
>   	{
> ! 	  cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]);
> ! 	  np[n - 1] -= cy;
>   	}
>         qp += lo;
> --- 58,67 ----
>   
>         mpn_mullo_n (tp, qp, dp + hi, lo);
> !       mpn_add_n (np + hi, np + hi, tp, lo);
>   
>         if (lo < hi)
>   	{
> ! 	  cy += mpn_addmul_1 (np + lo, qp, lo, dp[lo]);
> ! 	  np[n - 1] += cy;
>   	}
>         qp += lo;
> ***************
> *** 72,76 ****
>   }
>   
> ! /* Computes Q = N / D mod B^nn, destroys N.
>   
>      N = {np,nn}
> --- 72,76 ----
>   }
>   
> ! /* Computes Q = - N / D mod B^nn, destroys N.
>   
>      N = {np,nn}
> ***************
> *** 120,124 ****
>   	  mpn_incr_u (tp + qn, cy);
>   
> ! 	  mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
>   	  cy = 0;
>   	}
> --- 120,124 ----
>   	  mpn_incr_u (tp + qn, cy);
>   
> ! 	  mpn_add (np + qn, np + qn, nn - qn, tp, dn);
>   	  cy = 0;
>   	}
> ***************
> *** 130,134 ****
>         while (qn > dn)
>   	{
> ! 	  mpn_sub_1 (np + dn, np + dn, qn - dn, cy);
>   	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
>   	  qp += dn;
> --- 130,134 ----
>         while (qn > dn)
>   	{
> ! 	  mpn_add_1 (np + dn, np + dn, qn - dn, cy);
>   	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
>   	  qp += dn;
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_qr.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/dcpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 33,42 ****
>      Output:
>   
> !       q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
>   
> !       r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
>   
>      Stores q at qp. Stores the n least significant limbs of r at the high half
> !    of np, and returns the borrow from the subtraction n - q*d.
>   
>      d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
> --- 33,42 ----
>      Output:
>   
> !       q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
>   
> !       r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS}
>   
>      Stores q at qp. Stores the n least significant limbs of r at the high half
> !    of np, and returns the carry from the addition n + q*d.
>   
>      d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
> ***************
> *** 67,71 ****
>   
>     mpn_incr_u (tp + lo, cy);
> !   rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
>   
>     if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
> --- 67,71 ----
>   
>     mpn_incr_u (tp + lo, cy);
> !   rh = mpn_add (np + lo, np + lo, n + hi, tp, n);
>   
>     if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
> ***************
> *** 77,81 ****
>   
>     mpn_incr_u (tp + hi, cy);
> !   rh += mpn_sub_n (np + n, np + n, tp, n);
>   
>     return rh;
> --- 77,81 ----
>   
>     mpn_incr_u (tp + hi, cy);
> !   rh += mpn_add_n (np + n, np + n, tp, n);
>   
>     return rh;
> ***************
> *** 123,127 ****
>   	  mpn_incr_u (tp + qn, cy);
>   
> ! 	  rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
>   	  cy = 0;
>   	}
> --- 123,127 ----
>   	  mpn_incr_u (tp + qn, cy);
>   
> ! 	  rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
>   	  cy = 0;
>   	}
> ***************
> *** 133,137 ****
>         do
>   	{
> ! 	  rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
>   	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
>   	  qp += dn;
> --- 133,137 ----
>         do
>   	{
> ! 	  rr += mpn_add_1 (np + dn, np + dn, qn, cy);
>   	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
>   	  qp += dn;
> ***************
> *** 158,162 ****
>         mpn_incr_u (tp + qn, cy);
>   
> !       rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
>         cy = 0;
>       }
> --- 158,162 ----
>         mpn_incr_u (tp + qn, cy);
>   
> !       rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
>         cy = 0;
>       }
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/divexact.c gmp-bdiv.aa820fde2c64/mpn/generic/divexact.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/divexact.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/divexact.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 89,92 ****
> --- 89,95 ----
>     mpn_bdiv_q (qp, np, qn, dp, dn, tp);
>     TMP_FREE;
> + 
> +   /* Since bdiv_q computes -N/D (mod B^{qn}), we must negate now. */
> +   mpn_neg (qp, qp, qn);
>   }
>   
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/divis.c gmp-bdiv.aa820fde2c64/mpn/generic/divis.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/divis.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/divis.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 55,58 ****
> --- 55,59 ----
>     mp_limb_t di;
>     unsigned  twos;
> +   int c;
>     TMP_DECL;
>   
> ***************
> *** 173,190 ****
>         mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
>       }
>   
> !   /* test for {rp,dn} zero or non-zero */
> !   i = 0;
> !   do
> !     {
> !       if (rp[i] != 0)
> ! 	{
> ! 	  TMP_FREE;
> ! 	  return 0;
> ! 	}
> !     }
> !   while (++i < dn);
>   
>     TMP_FREE;
> !   return 1;
>   }
> --- 174,186 ----
>         mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
>       }
> +   
> +   /* In general, bdiv may return either R = 0 or R = D when D divides
> +      A. But R = 0 can happen only when A = 0, which we already have
> +      excluded. Furthermore, R == D (mod B^{dn}) implies no carry, so
> +      we don't need to check the carry returned from bdiv. */
>   
> !   MPN_CMP (c, rp, dp, dn);
>   
>     TMP_FREE;
> !   return c == 0;
>   }
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_q.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 56,61 ****
>      FIXME: Trim final quotient calculation to qn limbs of precision.
>   */
> ! void
> ! mpn_mu_bdiv_q (mp_ptr qp,
>   	       mp_srcptr np, mp_size_t nn,
>   	       mp_srcptr dp, mp_size_t dn,
> --- 56,61 ----
>      FIXME: Trim final quotient calculation to qn limbs of precision.
>   */
> ! static void
> ! mpn_mu_bdiv_q_old (mp_ptr qp,
>   	       mp_srcptr np, mp_size_t nn,
>   	       mp_srcptr dp, mp_size_t dn,
> ***************
> *** 215,218 ****
> --- 215,228 ----
>   }
>   
> + void
> + mpn_mu_bdiv_q (mp_ptr qp,
> + 	       mp_srcptr np, mp_size_t nn,
> + 	       mp_srcptr dp, mp_size_t dn,
> + 	       mp_ptr scratch)
> + {
> +   mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch);
> +   mpn_neg (qp, qp, nn);
> + }
> + 
>   mp_size_t
>   mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_qr.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/mu_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 55,64 ****
>   	  particular, when dn==in, tp and rp could use the same space.
>   */
> ! mp_limb_t
> ! mpn_mu_bdiv_qr (mp_ptr qp,
> ! 		mp_ptr rp,
> ! 		mp_srcptr np, mp_size_t nn,
> ! 		mp_srcptr dp, mp_size_t dn,
> ! 		mp_ptr scratch)
>   {
>     mp_size_t qn;
> --- 55,64 ----
>   	  particular, when dn==in, tp and rp could use the same space.
>   */
> ! static mp_limb_t
> ! mpn_mu_bdiv_qr_old (mp_ptr qp,
> ! 		    mp_ptr rp,
> ! 		    mp_srcptr np, mp_size_t nn,
> ! 		    mp_srcptr dp, mp_size_t dn,
> ! 		    mp_ptr scratch)
>   {
>     mp_size_t qn;
> ***************
> *** 233,236 ****
> --- 233,269 ----
>   }
>   
> + mp_limb_t
> + mpn_mu_bdiv_qr (mp_ptr qp,
> + 		mp_ptr rp,
> + 		mp_srcptr np, mp_size_t nn,
> + 		mp_srcptr dp, mp_size_t dn,
> + 		mp_ptr scratch)
> + {
> +   mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch);
> + 
> +   /* R' B^{qn} = U - Q' D
> +    *
> +    * Q = B^{qn} - Q' (assuming Q' != 0)
> +    *
> +    * R B^{qn} = U + Q D = U + B^{qn} D - Q' D
> +    *          = B^{qn} D + R'
> +    */
> + 
> +   if (UNLIKELY (!mpn_neg (qp, qp, nn - dn)))
> +     {
> +       /* Zero quotient. */
> +       ASSERT (cy == 0);
> +       return 0;
> +     }
> +   else
> +     {
> +       mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn);
> +       ASSERT (cy2 >= cy);
> +       
> +       return cy2 - cy;
> +     }
> + }
> + 
> + 
>   mp_size_t
>   mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/remove.c gmp-bdiv.aa820fde2c64/mpn/generic/remove.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/remove.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/remove.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 93,96 ****
> --- 93,98 ----
>         MP_PTR_SWAP (qp, qp2);
>         qn = qn - pn;
> +       mpn_neg (qp, qp, qn+1);
> + 
>         qn += qp[qn] != 0;
>   
> ***************
> *** 132,135 ****
> --- 134,139 ----
>         MP_PTR_SWAP (qp, qp2);
>         qn = qn - pn;
> +       mpn_neg (qp, qp, qn+1);
> + 
>         qn += qp[qn] != 0;
>   
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_q.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_q.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 28,89 ****
>   #include "gmp-impl.h"
>   
> ! 
> ! /* Computes Q = N / D mod B^nn, destroys N.
>   
>      D must be odd. dinv is (-D)^-1 mod B.
>   
> ! 
> !    The straightforward way to compute Q is to cancel one limb at a time, using
> ! 
> !      qp[i] = D^{-1} * np[i] (mod B)
> !      N -= B^i * qp[i] * D
> ! 
> !    But we prefer addition to subtraction, since mpn_addmul_1 is often faster
> !    than mpn_submul_1.  Q = - N / D can be computed by iterating
> ! 
> !      qp[i] = (-D)^{-1} * np[i] (mod B)
> !      N += B^i * qp[i] * D
> ! 
> !    And then we flip the sign, -Q = (not Q) + 1. */
>   
>   void
>   mpn_sbpi1_bdiv_q (mp_ptr qp,
> ! 		  mp_ptr np, mp_size_t nn,
>   		  mp_srcptr dp, mp_size_t dn,
>   		  mp_limb_t dinv)
>   {
>     mp_size_t i;
> !   mp_limb_t cy, q;
>   
>     ASSERT (dn > 0);
> !   ASSERT (nn >= dn);
>     ASSERT ((dp[0] & 1) != 0);
> !   /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
> !      but some over N/Q overlaps will not work.  */
>   
> !   for (i = nn - dn; i > 0; i--)
>       {
> !       q = dinv * np[0];
> !       cy = mpn_addmul_1 (np, dp, dn, q);
> !       mpn_add_1 (np + dn, np + dn, i, cy);
> !       ASSERT (np[0] == 0);
> !       qp[0] = ~q;
> !       qp++;
> !       np++;
>       }
> - 
>     for (i = dn; i > 1; i--)
>       {
> !       q = dinv * np[0];
> !       mpn_addmul_1 (np, dp, i, q);
> !       ASSERT (np[0] == 0);
> !       qp[0] = ~q;
> !       qp++;
> !       np++;
>       }
>   
>     /* Final limb */
> !   q = dinv * np[0];
> !   qp[0] = ~q;
> !   mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
>   }
> --- 28,85 ----
>   #include "gmp-impl.h"
>   
> ! /* Computes Q = - U / D mod B^un, destroys U.
>   
>      D must be odd. dinv is (-D)^-1 mod B.
>   
> ! */
>   
>   void
>   mpn_sbpi1_bdiv_q (mp_ptr qp,
> ! 		  mp_ptr up, mp_size_t un,
>   		  mp_srcptr dp, mp_size_t dn,
>   		  mp_limb_t dinv)
>   {
>     mp_size_t i;
> !   mp_limb_t q;
>   
>     ASSERT (dn > 0);
> !   ASSERT (un >= dn);
>     ASSERT ((dp[0] & 1) != 0);
> !   ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
>   
> !   if (un > dn)
>       {
> !       mp_limb_t cy, hi;
> !       for (i = un - dn - 1, cy = 0; i > 0; i--)
> ! 	{
> ! 	  q = dinv * up[0];
> ! 	  hi = mpn_addmul_1 (up, dp, dn, q);
> ! 
> ! 	  ASSERT (up[0] == 0);
> ! 	  *qp++ = q;
> ! 	  hi += cy;
> ! 	  cy = hi < cy;
> ! 	  hi += up[dn];
> ! 	  cy += hi < up[dn];
> ! 	  up[dn] = hi;
> ! 	  up++;
> ! 	}
> !       q = dinv * up[0];
> !       hi = cy + mpn_addmul_1 (up, dp, dn, q);
> !       ASSERT (up[0] == 0);
> !       *qp++ = q;
> !       up[dn] += hi;
> !       up++;
>       }
>     for (i = dn; i > 1; i--)
>       {
> !       mp_limb_t q = dinv * up[0];
> !       mpn_addmul_1 (up, dp, i, q);
> !       ASSERT (up[0] == 0);
> !       *qp++ = q;
> !       up++;
>       }
>   
>     /* Final limb */
> !   *qp = dinv * up[0];
>   }
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_qr.c
> *** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpn/generic/sbpi1_bdiv_qr.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 29,41 ****
>   
>   
> ! /* Computes a binary quotient of size qn = nn - dn.
>      Output:
>   
> !       Q = N * D^{-1} mod B^qn,
>   
> !       R = (N - Q * D) * B^(-qn)
>   
> !    Stores the dn least significant limbs of R at {np + nn - dn, dn},
> !    and returns the borrow from the subtraction N - Q*D.
>   
>      D must be odd. dinv is (-D)^-1 mod B. */
> --- 29,41 ----
>   
>   
> ! /* Computes a binary quotient of size qn = un - dn.
>      Output:
>   
> !       Q = -U * D^{-1} mod B^qn,
>   
> !       R = (U + Q * D) * B^(-qn)
>   
> !    Stores the dn least significant limbs of R at {up + un - dn, dn},
> !    and returns the carry from the addition N + Q*D.
>   
>      D must be odd. dinv is (-D)^-1 mod B. */
> ***************
> *** 43,108 ****
>   mp_limb_t
>   mpn_sbpi1_bdiv_qr (mp_ptr qp,
> ! 		   mp_ptr np, mp_size_t nn,
>   		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
>   {
> -   mp_size_t qn;
>     mp_size_t i;
> !   mp_limb_t rh;
> !   mp_limb_t ql;
>   
>     ASSERT (dn > 0);
> !   ASSERT (nn > dn);
>     ASSERT ((dp[0] & 1) != 0);
> !   /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
> !      but some over N/Q overlaps will not work.  */
>   
> !   qn = nn - dn;
> ! 
> !   rh = 0;
> ! 
> !   /* To complete the negation, this value is added to q. */
> !   ql = 1;
> !   while (qn > dn)
>       {
> !       for (i = 0; i < dn; i++)
> ! 	{
> ! 	  mp_limb_t q;
> ! 
> ! 	  q = dinv * np[i];
> ! 	  np[i] = mpn_addmul_1 (np + i, dp, dn, q);
> ! 	  qp[i] = ~q;
> ! 	}
> !       rh += mpn_add (np + dn, np + dn, qn, np, dn);
> !       ql = mpn_add_1 (qp, qp, dn, ql);
> ! 
> !       qp += dn; qn -= dn;
> !       np += dn; nn -= dn;
> !     }
> ! 
> !   for (i = 0; i < qn; i++)
> !     {
> !       mp_limb_t q;
> ! 
> !       q = dinv * np[i];
> !       np[i] = mpn_addmul_1 (np + i, dp, dn, q);
> !       qp[i] = ~q;
>       }
>   
> !   rh += mpn_add_n (np + dn, np + dn, np, qn);
> !   ql = mpn_add_1 (qp, qp, qn, ql);
> ! 
> !   if (UNLIKELY (ql > 0))
> !     {
> !       /* q == 0 */
> !       ASSERT (rh == 0);
> !       return 0;
> !     }
> !   else
> !     {
> !       mp_limb_t cy;
> ! 
> !       cy = mpn_sub_n (np + qn, np + qn, dp, dn);
> !       ASSERT (cy >= rh);
> !       return cy - rh;
> !     }
>   }
> --- 43,71 ----
>   mp_limb_t
>   mpn_sbpi1_bdiv_qr (mp_ptr qp,
> ! 		   mp_ptr up, mp_size_t un,
>   		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
>   {
>     mp_size_t i;
> !   mp_limb_t cy;
>   
>     ASSERT (dn > 0);
> !   ASSERT (un > dn);
>     ASSERT ((dp[0] & 1) != 0);
> !   ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
>   
> !   for (i = un - dn, cy = 0; i != 0; i--)
>       {
> !       mp_limb_t q = dinv * up[0];
> !       mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
> !       *qp++ = q;
> !       
> !       hi += cy;
> !       cy = hi < cy;
> !       hi += up[dn];
> !       cy += hi < up[dn];
> !       up[dn] = hi;
> !       up++;
>       }
>   
> !   return cy;
>   }
> diff -Nrc2 gmp-bdiv.b5ca16212198/mpz/bin_uiui.c gmp-bdiv.aa820fde2c64/mpz/bin_uiui.c
> *** gmp-bdiv.b5ca16212198/mpz/bin_uiui.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/mpz/bin_uiui.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 271,274 ****
> --- 271,275 ----
>         nn -= kn;
>         mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
> +       mpn_neg (np, np, nn);
>   
>         if (kmax == 0)
> diff -Nrc2 gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c gmp-bdiv.aa820fde2c64/tests/mpn/t-bdiv.c
> *** gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c	2012-07-17 23:52:21.000000000 +0200
> --- gmp-bdiv.aa820fde2c64/tests/mpn/t-bdiv.c	2012-07-17 23:52:21.000000000 +0200
> ***************
> *** 59,63 ****
>   {
>     mp_size_t qn;
> -   int cmp;
>     mp_ptr tp;
>     mp_limb_t cy = 4711;		/* silence warnings */
> --- 59,62 ----
> ***************
> *** 78,90 ****
>       mpn_mul (tp, qp, qn, dp, dn);
>   
> !   if (rp != NULL)
> !     {
> !       cy = mpn_add_n (tp + qn, tp + qn, rp, dn);
> !       cmp = cy != rh || mpn_cmp (tp, np, nn) != 0;
> !     }
> !   else
> !     cmp = mpn_cmp (tp, np, nn - dn) != 0;
>   
> !   if (cmp != 0)
>       {
>         printf ("\r*******************************************************************************\n");
> --- 77,84 ----
>       mpn_mul (tp, qp, qn, dp, dn);
>   
> !   cy = mpn_add_n (tp, tp, np, nn);
>   
> !   if (! mpn_zero_p (tp, qn)
> !       || (rp != NULL && (cy != rh || mpn_cmp (tp + qn, rp, dn) != 0)))
>       {
>         printf ("\r*******************************************************************************\n");

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.


More information about the gmp-devel mailing list