bdiv vs redc
Niels Möller
nisse at lysator.liu.se
Sun Jul 15 00:26:44 CEST 2012
nisse at lysator.liu.se (Niels Möller) writes:
> I don't think I have a real plan. But the most practical way forward I
> see is to
>
> 1. Adapt bdiv_mu to the new convention by writing a simple wrapper around
> the current code.
>
> 2. Iron out remaining problems in bdiv callers as well as any
> remaining bugs in dc and sb.
>
> 3. Possibly change the implementation of mu_bdiv.
I'm now halfway through step (2), also fixing a few bugs in the sb
code.. The updated patch below seems to work fine according to (also
patched) tests/mpn/t-bdiv. I'll leave a
while GMP_CHECK_RANDOMIZE=0 tests/mpn/t-bdiv ; do : ; done
running overnight.
Some other test cases fail, though: t-root, t-perfpow, t-divis, and
t-cong. I'm not familiar with this code, I had a quick look at the root
code but didn't see any obvious dependency on bdiv.
Regards,
/Niels
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/binvert.c gmp-bdiv/mpn/generic/binvert.c
*** gmp-bdiv.b5ca16212198/mpn/generic/binvert.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/binvert.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 75,78 ****
--- 75,80 ----
mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+ mpn_neg (rp, rp, rn);
+
/* Use Newton iterations to get the desired precision. */
for (; rn < n; rn = newrn)
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c gmp-bdiv/mpn/generic/dcpi1_bdiv_q.c
*** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/dcpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 36,40 ****
}
! /* Computes Q = N / D mod B^n, destroys N.
N = {np,n}
--- 36,40 ----
}
! /* Computes Q = - N / D mod B^n, destroys N.
N = {np,n}
***************
*** 58,67 ****
mpn_mullo_n (tp, qp, dp + hi, lo);
! mpn_sub_n (np + hi, np + hi, tp, lo);
if (lo < hi)
{
! cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]);
! np[n - 1] -= cy;
}
qp += lo;
--- 58,67 ----
mpn_mullo_n (tp, qp, dp + hi, lo);
! mpn_add_n (np + hi, np + hi, tp, lo);
if (lo < hi)
{
! cy += mpn_addmul_1 (np + lo, qp, lo, dp[lo]);
! np[n - 1] += cy;
}
qp += lo;
***************
*** 72,76 ****
}
! /* Computes Q = N / D mod B^nn, destroys N.
N = {np,nn}
--- 72,76 ----
}
! /* Computes Q = - N / D mod B^nn, destroys N.
N = {np,nn}
***************
*** 120,124 ****
mpn_incr_u (tp + qn, cy);
! mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
cy = 0;
}
--- 120,124 ----
mpn_incr_u (tp + qn, cy);
! mpn_add (np + qn, np + qn, nn - qn, tp, dn);
cy = 0;
}
***************
*** 130,134 ****
while (qn > dn)
{
! mpn_sub_1 (np + dn, np + dn, qn - dn, cy);
cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
qp += dn;
--- 130,134 ----
while (qn > dn)
{
! mpn_add_1 (np + dn, np + dn, qn - dn, cy);
cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
qp += dn;
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c gmp-bdiv/mpn/generic/dcpi1_bdiv_qr.c
*** gmp-bdiv.b5ca16212198/mpn/generic/dcpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/dcpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 33,42 ****
Output:
! q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
! r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
Stores q at qp. Stores the n least significant limbs of r at the high half
! of np, and returns the borrow from the subtraction n - q*d.
d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
--- 33,42 ----
Output:
! q = -n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
! r = (n + q * d) * 2^{-qn * GMP_NUMB_BITS}
Stores q at qp. Stores the n least significant limbs of r at the high half
! of np, and returns the carry from the addition n + q*d.
d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
***************
*** 67,71 ****
mpn_incr_u (tp + lo, cy);
! rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
--- 67,71 ----
mpn_incr_u (tp + lo, cy);
! rh = mpn_add (np + lo, np + lo, n + hi, tp, n);
if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
***************
*** 77,81 ****
mpn_incr_u (tp + hi, cy);
! rh += mpn_sub_n (np + n, np + n, tp, n);
return rh;
--- 77,81 ----
mpn_incr_u (tp + hi, cy);
! rh += mpn_add_n (np + n, np + n, tp, n);
return rh;
***************
*** 123,127 ****
mpn_incr_u (tp + qn, cy);
! rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
cy = 0;
}
--- 123,127 ----
mpn_incr_u (tp + qn, cy);
! rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
cy = 0;
}
***************
*** 133,137 ****
do
{
! rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
qp += dn;
--- 133,137 ----
do
{
! rr += mpn_add_1 (np + dn, np + dn, qn, cy);
cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
qp += dn;
***************
*** 158,162 ****
mpn_incr_u (tp + qn, cy);
! rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
cy = 0;
}
--- 158,162 ----
mpn_incr_u (tp + qn, cy);
! rr = mpn_add (np + qn, np + qn, nn - qn, tp, dn);
cy = 0;
}
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/divexact.c gmp-bdiv/mpn/generic/divexact.c
*** gmp-bdiv.b5ca16212198/mpn/generic/divexact.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/divexact.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 89,92 ****
--- 89,95 ----
mpn_bdiv_q (qp, np, qn, dp, dn, tp);
TMP_FREE;
+
+ /* Since bdiv_q computes -N/D (mod B^{qn}), we must negate now. */
+ mpn_neg (qp, qp, qn);
}
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c gmp-bdiv/mpn/generic/mu_bdiv_q.c
*** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/mu_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 56,61 ****
FIXME: Trim final quotient calculation to qn limbs of precision.
*/
! void
! mpn_mu_bdiv_q (mp_ptr qp,
mp_srcptr np, mp_size_t nn,
mp_srcptr dp, mp_size_t dn,
--- 56,61 ----
FIXME: Trim final quotient calculation to qn limbs of precision.
*/
! static void
! mpn_mu_bdiv_q_old (mp_ptr qp,
mp_srcptr np, mp_size_t nn,
mp_srcptr dp, mp_size_t dn,
***************
*** 215,218 ****
--- 215,228 ----
}
+ void
+ mpn_mu_bdiv_q (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+ {
+ mpn_mu_bdiv_q_old (qp, np, nn, dp, dn, scratch);
+ mpn_neg (qp, qp, nn);
+ }
+
mp_size_t
mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c gmp-bdiv/mpn/generic/mu_bdiv_qr.c
*** gmp-bdiv.b5ca16212198/mpn/generic/mu_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/mu_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 55,64 ****
particular, when dn==in, tp and rp could use the same space.
*/
! mp_limb_t
! mpn_mu_bdiv_qr (mp_ptr qp,
! mp_ptr rp,
! mp_srcptr np, mp_size_t nn,
! mp_srcptr dp, mp_size_t dn,
! mp_ptr scratch)
{
mp_size_t qn;
--- 55,64 ----
particular, when dn==in, tp and rp could use the same space.
*/
! static mp_limb_t
! mpn_mu_bdiv_qr_old (mp_ptr qp,
! mp_ptr rp,
! mp_srcptr np, mp_size_t nn,
! mp_srcptr dp, mp_size_t dn,
! mp_ptr scratch)
{
mp_size_t qn;
***************
*** 233,236 ****
--- 233,269 ----
}
+ mp_limb_t
+ mpn_mu_bdiv_qr (mp_ptr qp,
+ mp_ptr rp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr scratch)
+ {
+ mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch);
+
+ /* R' B^{qn} = U - Q' D
+ *
+ * Q = B^{qn} - Q' (assuming Q' != 0)
+ *
+ * R B^{qn} = U + Q D = U + B^{qn} D - Q' D
+ * = B^{qn} D + R'
+ */
+
+ if (UNLIKELY (!mpn_neg (qp, qp, nn - dn)))
+ {
+ /* Zero quotient. */
+ ASSERT (cy == 0);
+ return 0;
+ }
+ else
+ {
+ mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn);
+ ASSERT (cy2 >= cy);
+
+ return cy2 - cy;
+ }
+ }
+
+
mp_size_t
mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/remove.c gmp-bdiv/mpn/generic/remove.c
*** gmp-bdiv.b5ca16212198/mpn/generic/remove.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/remove.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 93,96 ****
--- 93,98 ----
MP_PTR_SWAP (qp, qp2);
qn = qn - pn;
+ mpn_neg (qp, qp, qn+1);
+
qn += qp[qn] != 0;
***************
*** 132,135 ****
--- 134,139 ----
MP_PTR_SWAP (qp, qp2);
qn = qn - pn;
+ mpn_neg (qp, qp, qn+1);
+
qn += qp[qn] != 0;
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c gmp-bdiv/mpn/generic/sbpi1_bdiv_q.c
*** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/sbpi1_bdiv_q.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 27,89 ****
#include "gmp.h"
#include "gmp-impl.h"
!
! /* Computes Q = N / D mod B^nn, destroys N.
D must be odd. dinv is (-D)^-1 mod B.
!
! The straightforward way to compute Q is to cancel one limb at a time, using
!
! qp[i] = D^{-1} * np[i] (mod B)
! N -= B^i * qp[i] * D
!
! But we prefer addition to subtraction, since mpn_addmul_1 is often faster
! than mpn_submul_1. Q = - N / D can be computed by iterating
!
! qp[i] = (-D)^{-1} * np[i] (mod B)
! N += B^i * qp[i] * D
!
! And then we flip the sign, -Q = (not Q) + 1. */
void
mpn_sbpi1_bdiv_q (mp_ptr qp,
! mp_ptr np, mp_size_t nn,
mp_srcptr dp, mp_size_t dn,
mp_limb_t dinv)
{
mp_size_t i;
! mp_limb_t cy, q;
ASSERT (dn > 0);
! ASSERT (nn >= dn);
ASSERT ((dp[0] & 1) != 0);
! /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
! but some over N/Q overlaps will not work. */
! for (i = nn - dn; i > 0; i--)
{
! q = dinv * np[0];
! cy = mpn_addmul_1 (np, dp, dn, q);
! mpn_add_1 (np + dn, np + dn, i, cy);
! ASSERT (np[0] == 0);
! qp[0] = ~q;
! qp++;
! np++;
}
-
for (i = dn; i > 1; i--)
{
! q = dinv * np[0];
! mpn_addmul_1 (np, dp, i, q);
! ASSERT (np[0] == 0);
! qp[0] = ~q;
! qp++;
! np++;
}
/* Final limb */
! q = dinv * np[0];
! qp[0] = ~q;
! mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
}
--- 27,107 ----
#include "gmp.h"
#include "gmp-impl.h"
+ #include "longlong.h"
! /* Computes Q = - U / D mod B^un, destroys U.
D must be odd. dinv is (-D)^-1 mod B.
! */
void
mpn_sbpi1_bdiv_q (mp_ptr qp,
! mp_ptr up, mp_size_t un,
mp_srcptr dp, mp_size_t dn,
mp_limb_t dinv)
{
mp_size_t i;
! mp_limb_t q;
ASSERT (dn > 0);
! ASSERT (un >= dn);
ASSERT ((dp[0] & 1) != 0);
! ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
! if (dn == 1)
{
! mp_limb_t dl = dp[0];
! mp_limb_t ul = *up++;
! mp_limb_t cy = 0;
! for (i = un - 1; i > 0; i--)
! {
! mp_limb_t p1, p0;
! q = dinv * ul;
! *qp++ = q;
! umul_ppmm (p1, p0, q, dl);
! ASSERT (p0 + ul == 0);
! cy += (ul > 0);
! p1 += cy;
! cy = p1 < cy;
! ul = p1 + *up++;
! cy += ul < p1;
! }
! *qp = dinv * ul;
! return;
! }
! if (un > dn)
! {
! mp_limb_t cy, hi;
! for (i = un - dn - 1, cy = 0; i > 0; i--)
! {
! q = dinv * up[0];
! hi = mpn_addmul_1 (up, dp, dn, q);
!
! ASSERT (up[0] == 0);
! *qp++ = q;
! hi += cy;
! cy = hi < cy;
! hi += up[dn];
! cy += hi < up[dn];
! up[dn] = hi;
! up++;
! }
! q = dinv * up[0];
! hi = cy + mpn_addmul_1 (up, dp, dn, q);
! ASSERT (up[0] == 0);
! *qp++ = q;
! up[dn] += hi;
! up++;
}
for (i = dn; i > 1; i--)
{
! mp_limb_t q = dinv * up[0];
! mpn_addmul_1 (up, dp, i, q);
! ASSERT (up[0] == 0);
! *qp++ = q;
! up++;
}
/* Final limb */
! *qp = dinv * up[0];
}
diff -Nrc2 gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c gmp-bdiv/mpn/generic/sbpi1_bdiv_qr.c
*** gmp-bdiv.b5ca16212198/mpn/generic/sbpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpn/generic/sbpi1_bdiv_qr.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 29,41 ****
! /* Computes a binary quotient of size qn = nn - dn.
Output:
! Q = N * D^{-1} mod B^qn,
! R = (N - Q * D) * B^(-qn)
! Stores the dn least significant limbs of R at {np + nn - dn, dn},
! and returns the borrow from the subtraction N - Q*D.
D must be odd. dinv is (-D)^-1 mod B. */
--- 29,41 ----
! /* Computes a binary quotient of size qn = un - dn.
Output:
! Q = -U * D^{-1} mod B^qn,
! R = (U + Q * D) * B^(-qn)
! Stores the dn least significant limbs of R at {up + un - dn, dn},
! and returns the carry from the addition N + Q*D.
D must be odd. dinv is (-D)^-1 mod B. */
***************
*** 43,108 ****
mp_limb_t
mpn_sbpi1_bdiv_qr (mp_ptr qp,
! mp_ptr np, mp_size_t nn,
mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
{
- mp_size_t qn;
mp_size_t i;
! mp_limb_t rh;
! mp_limb_t ql;
ASSERT (dn > 0);
! ASSERT (nn > dn);
ASSERT ((dp[0] & 1) != 0);
! /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
! but some over N/Q overlaps will not work. */
! qn = nn - dn;
!
! rh = 0;
!
! /* To complete the negation, this value is added to q. */
! ql = 1;
! while (qn > dn)
{
! for (i = 0; i < dn; i++)
! {
! mp_limb_t q;
!
! q = dinv * np[i];
! np[i] = mpn_addmul_1 (np + i, dp, dn, q);
! qp[i] = ~q;
! }
! rh += mpn_add (np + dn, np + dn, qn, np, dn);
! ql = mpn_add_1 (qp, qp, dn, ql);
!
! qp += dn; qn -= dn;
! np += dn; nn -= dn;
! }
!
! for (i = 0; i < qn; i++)
! {
! mp_limb_t q;
!
! q = dinv * np[i];
! np[i] = mpn_addmul_1 (np + i, dp, dn, q);
! qp[i] = ~q;
}
! rh += mpn_add_n (np + dn, np + dn, np, qn);
! ql = mpn_add_1 (qp, qp, qn, ql);
!
! if (UNLIKELY (ql > 0))
! {
! /* q == 0 */
! ASSERT (rh == 0);
! return 0;
! }
! else
! {
! mp_limb_t cy;
!
! cy = mpn_sub_n (np + qn, np + qn, dp, dn);
! ASSERT (cy >= rh);
! return cy - rh;
! }
}
--- 43,71 ----
mp_limb_t
mpn_sbpi1_bdiv_qr (mp_ptr qp,
! mp_ptr up, mp_size_t un,
mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
{
mp_size_t i;
! mp_limb_t cy;
ASSERT (dn > 0);
! ASSERT (un > dn);
ASSERT ((dp[0] & 1) != 0);
! ASSERT (up == qp || !MPN_OVERLAP_P (up, un, qp, un - dn));
! for (i = un - dn, cy = 0; i != 0; i--)
{
! mp_limb_t q = dinv * up[0];
! mp_limb_t hi = mpn_addmul_1 (up, dp, dn, q);
! *qp++ = q;
!
! hi += cy;
! cy = hi < cy;
! hi += up[dn];
! cy += hi < up[dn];
! up[dn] = hi;
! up++;
}
! return cy;
}
diff -Nrc2 gmp-bdiv.b5ca16212198/mpz/bin_uiui.c gmp-bdiv/mpz/bin_uiui.c
*** gmp-bdiv.b5ca16212198/mpz/bin_uiui.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/mpz/bin_uiui.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 271,274 ****
--- 271,275 ----
nn -= kn;
mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
+ mpn_neg (np, np, nn);
if (kmax == 0)
diff -Nrc2 gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c gmp-bdiv/tests/mpn/t-bdiv.c
*** gmp-bdiv.b5ca16212198/tests/mpn/t-bdiv.c 2012-07-15 00:16:56.000000000 +0200
--- gmp-bdiv/tests/mpn/t-bdiv.c 2012-07-15 00:16:56.000000000 +0200
***************
*** 59,63 ****
{
mp_size_t qn;
- int cmp;
mp_ptr tp;
mp_limb_t cy = 4711; /* silence warnings */
--- 59,62 ----
***************
*** 78,90 ****
mpn_mul (tp, qp, qn, dp, dn);
! if (rp != NULL)
! {
! cy = mpn_add_n (tp + qn, tp + qn, rp, dn);
! cmp = cy != rh || mpn_cmp (tp, np, nn) != 0;
! }
! else
! cmp = mpn_cmp (tp, np, nn - dn) != 0;
! if (cmp != 0)
{
printf ("\r*******************************************************************************\n");
--- 77,84 ----
mpn_mul (tp, qp, qn, dp, dn);
! cy = mpn_add_n (tp, tp, np, nn);
! if (! mpn_zero_p (tp, qn)
! || (rp != NULL && (cy != rh || mpn_cmp (tp + qn, rp, dn) != 0)))
{
printf ("\r*******************************************************************************\n");
--
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list