udiv_qr_3by2 vs divappr

Niels Möller nisse at lysator.liu.se
Sun Apr 29 06:21:20 UTC 2018


nisse at lysator.liu.se (Niels Möller) writes:

>  If we can get away with that, it gets a lot
> simpler,
>
> if (r >= q_0)   /* Needs to be branch free */
>   {
>     q--;
>     r += d_1;
>   } 
> return q + (r >= d_1);

I've given it a try. Below patch almost works (as in "passes most but
not all tests")... I don't yet know if the failure is due to lack of
accuracy, or some less subtle type of bug.

Regards,
/Niels

*** /tmp/extdiff.ynEWOO/gmp.27296256af57/mpn/generic/sbpi1_div_qr.c	2018-04-29 08:10:46.612753563 +0200
--- /home/nisse/hack/gmp/mpn/generic/sbpi1_div_qr.c	2018-04-29 08:10:15.232010203 +0200
***************
*** 40,43 ****
--- 40,68 ----
  #include "longlong.h"
  
+ static inline mp_limb_t
+ divappr_2 (mp_limb_t n2, mp_limb_t n1,
+ 	   mp_limb_t d1, mp_limb_t d0, mp_limb_t dinv)
+ {
+   mp_limb_t q1, q0, r1, t1, t0, mask;
+ 
+   umul_ppmm (q1, q0, n2, dinv);
+   add_ssaaaa (q1, q0, q1, q0, n2, n1);
+ 
+   umul_ppmm (t1, t0, q1, d0);
+   q1++; /* May overflow */
+   r1 = n1 - d1 * q1 - t1;
+ 
+   /* FIXME: See if we can get sufficient accuracy without this? */
+   t0 += d0;
+   r1 -= (t0 < d0);
+ 
+   mask = - (r1 >= q0);
+   q1 += mask;
+   r1 += mask & d1;
+   q1 += (r1 >= d1);
+ 
+   return q1;
+ }
+ 
  mp_limb_t
  mpn_sbpi1_div_qr (mp_ptr qp,
***************
*** 48,52 ****
    mp_limb_t qh;
    mp_size_t i;
!   mp_limb_t n1, n0;
    mp_limb_t d1, d0;
    mp_limb_t cy, cy1;
--- 73,77 ----
    mp_limb_t qh;
    mp_size_t i;
!   mp_limb_t n1;
    mp_limb_t d1, d0;
    mp_limb_t cy, cy1;
***************
*** 72,79 ****
    np -= 2;
  
-   n1 = np[1];
- 
    for (i = nn - (dn + 2); i > 0; i--)
      {
        np--;
        if (UNLIKELY (n1 == d1) && np[1] == d0)
--- 97,104 ----
    np -= 2;
  
    for (i = nn - (dn + 2); i > 0; i--)
      {
+       n1 = np[1];
+ 
        np--;
        if (UNLIKELY (n1 == d1) && np[1] == d0)
***************
*** 85,101 ****
        else
  	{
! 	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
! 
! 	  cy = mpn_submul_1 (np - dn, dp, dn, q);
! 
! 	  cy1 = n0 < cy;
! 	  n0 = (n0 - cy) & GMP_NUMB_MASK;
! 	  cy = n1 < cy1;
! 	  n1 = (n1 - cy1) & GMP_NUMB_MASK;
! 	  np[0] = n0;
  
! 	  if (UNLIKELY (cy != 0))
  	    {
! 	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
  	      q--;
  	    }
--- 110,123 ----
        else
  	{
! 	  q = divappr_2 (n1, np[1], d1, d0, dinv);
  
! 	  cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
! 	  /* Fails for
! 	       GMP_CHECK_RANDOMIZE=264733444330758 ./tests/mpn/t-div
! 	   */
! 	  ASSERT_ALWAYS (cy >= n1);
! 	  if (UNLIKELY (cy != n1))
  	    {
! 	      ASSERT_CARRY(mpn_add_n (np - dn, np - dn, dp, dn + 2));
  	      q--;
  	    }
***************
*** 104,108 ****
        *--qp = q;
      }
-   np[1] = n1;
  
    return qh;
--- 126,129 ----


-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.


More information about the gmp-devel mailing list