udiv_qr_3by2 vs divappr
Niels Möller
nisse at lysator.liu.se
Sun Apr 29 06:21:20 UTC 2018
nisse at lysator.liu.se (Niels Möller) writes:
> If we can get away with that, it gets a lot
> simpler,
>
> if (r >= q_0) /* Needs to be branch free */
> {
> q--;
> r += d_1;
> }
> return q + (r >= d_1);
I've given it a try. Below patch almost works (as in "passes most but
not all tests")... I don't yet know if the failure is due to lack of
accuracy, or some less subtle type of bug.
Regards,
/Niels
*** /tmp/extdiff.ynEWOO/gmp.27296256af57/mpn/generic/sbpi1_div_qr.c 2018-04-29 08:10:46.612753563 +0200
--- /home/nisse/hack/gmp/mpn/generic/sbpi1_div_qr.c 2018-04-29 08:10:15.232010203 +0200
***************
*** 40,43 ****
--- 40,68 ----
#include "longlong.h"
+ static inline mp_limb_t
+ divappr_2 (mp_limb_t n2, mp_limb_t n1,
+ mp_limb_t d1, mp_limb_t d0, mp_limb_t dinv)
+ {
+ mp_limb_t q1, q0, r1, t1, t0, mask;
+
+ umul_ppmm (q1, q0, n2, dinv);
+ add_ssaaaa (q1, q0, q1, q0, n2, n1);
+
+ umul_ppmm (t1, t0, q1, d0);
+ q1++; /* May overflow */
+ r1 = n1 - d1 * q1 - t1;
+
+ /* FIXME: See if we can get sufficient accuracy without this? */
+ t0 += d0;
+ r1 -= (t0 < d0);
+
+ mask = - (r1 >= q0);
+ q1 += mask;
+ r1 += mask & d1;
+ q1 += (r1 >= d1);
+
+ return q1;
+ }
+
mp_limb_t
mpn_sbpi1_div_qr (mp_ptr qp,
***************
*** 48,52 ****
mp_limb_t qh;
mp_size_t i;
! mp_limb_t n1, n0;
mp_limb_t d1, d0;
mp_limb_t cy, cy1;
--- 73,77 ----
mp_limb_t qh;
mp_size_t i;
! mp_limb_t n1;
mp_limb_t d1, d0;
mp_limb_t cy, cy1;
***************
*** 72,79 ****
np -= 2;
- n1 = np[1];
-
for (i = nn - (dn + 2); i > 0; i--)
{
np--;
if (UNLIKELY (n1 == d1) && np[1] == d0)
--- 97,104 ----
np -= 2;
for (i = nn - (dn + 2); i > 0; i--)
{
+ n1 = np[1];
+
np--;
if (UNLIKELY (n1 == d1) && np[1] == d0)
***************
*** 85,101 ****
else
{
! udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
!
! cy = mpn_submul_1 (np - dn, dp, dn, q);
!
! cy1 = n0 < cy;
! n0 = (n0 - cy) & GMP_NUMB_MASK;
! cy = n1 < cy1;
! n1 = (n1 - cy1) & GMP_NUMB_MASK;
! np[0] = n0;
! if (UNLIKELY (cy != 0))
{
! n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
q--;
}
--- 110,123 ----
else
{
! q = divappr_2 (n1, np[1], d1, d0, dinv);
! cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
! /* Fails for
! GMP_CHECK_RANDOMIZE=264733444330758 ./tests/mpn/t-div
! */
! ASSERT_ALWAYS (cy >= n1);
! if (UNLIKELY (cy != n1))
{
! ASSERT_CARRY(mpn_add_n (np - dn, np - dn, dp, dn + 2));
q--;
}
***************
*** 104,108 ****
*--qp = q;
}
- np[1] = n1;
return qh;
--- 126,129 ----
--
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list