hgcd1/2
Niels Möller
nisse at lysator.liu.se
Sun Sep 1 08:39:43 UTC 2019
I've been plaing a bit with table based hgcd2, but to keep things
simple, I started with hgcd1 (reducing full limbs to half limbs, with an
associated matrix of half limb elements). Complete code below, the main
loop is
for (;;)
{
mp_limb_t a_hi;
mp_limb_t b_hi;
mp_limb_t q;
int c;
if (a < b)
{
MP_LIMB_T_SWAP (a,b);
MP_LIMB_T_SWAP (u00, u01);
MP_LIMB_T_SWAP (u10, u11);
swapped ^= 1;
}
count_leading_zeros (c, a);
assert (c + TAB_BITS <= GMP_LIMB_BITS);
a_hi = a >> (GMP_LIMB_BITS - TAB_BITS - c);
b_hi = (b >> (GMP_LIMB_BITS - TAB_BITS - c));
if (UNLIKELY(b_hi == 0))
{
q = a / b;
}
else
q = q_tab[b_hi - 1][a_hi - (1U << (TAB_BITS - 1))];
assert (q > 0);
assert (a >= q*b);
a -= q*b;
u01 += q*u00;
u11 += q*u10;
if (a < REDUCED_LIMIT)
{
u01 -= u00;
u11 -= u10;
break;
}
}
I think it's promising, and also a bit simpler than the current div1
code. Some concerns:
1. I think the approximate q means that we will often reduce from a > b
to a close to but larger than b, so we need two iterations to get to
a < b. Might improve progress to add some kind of adjustment step
after a - q*b, checking either if result >= b or < 0.
2. There are diferent options for the UNLIKELY branch. One could count
leading zeros of b, shift, and look up a quotient, and apply
a -= q * b << k;
for suitable q and k. Likely cheaper than a division, but we'll then
do multiple iterations if size difference is large (which I think is
ok).
3. We get poor accuracy for q when b_hi == 1. One could make that less
likely by using a somewhat asymmetric table, say looking up 3 bits of
a but 6 bits of b.
4. One could also go in the other direction and do something closer to
left-to-right binary, and in each step always use q = 2^k for
suitable k, based on count_leading_zeros on both a and b.
Regards,
/Niels
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
#define REDUCED_BITS (GMP_NUMB_BITS / 2 + 1)
#define REDUCED_LIMIT ((mp_limb_t) 1 << (GMP_NUMB_BITS / 2 + 1))
#define MATRIX_LIMIT ((mp_limb_t) 1 << (GMP_NUMB_BITS / 2 - 1))
#define TAB_BITS 4
#define ABS_DIFF(a,b) ((a < b) ? b - a : a - b)
static int
hgcd1_ref(struct hgcd_matrix1 *m, mp_limb_t a, mp_limb_t b)
{
mp_limb_t u00, u01, u10, u11;
if (a < REDUCED_LIMIT || b < REDUCED_LIMIT || ABS_DIFF(a,b) < REDUCED_LIMIT)
return 0;
u00 = u11 = 1;
u01 = u10 = 0;
if (a < b)
goto a_less_b;
for (;;)
{
mp_limb_t q;
assert(a > b);
q = a / b;
a -= q*b;
u01 += q*u00;
u11 += q*u10;
if (a < REDUCED_LIMIT)
{
u01 -= u00;
u11 -= u10;
break;
}
a_less_b:
assert (a < b);
q = b / a;
b -= q * a;
u00 += q * u01;
u10 += q * u11;
if (b < REDUCED_LIMIT)
{
u00 -= u01;
u10 -= u11;
break;
}
}
m->u[0][0] = u00;
m->u[0][1] = u01;
m->u[1][0] = u10;
m->u[1][1] = u11;
return 1;
}
#if TAB_BITS == 3
/* 1,a1,a0.xxx / b2,b1,b0.xxx > 1,a1,a0 / b2,b1,b0 + 1
* Need to round b up, except when a_hi == b_hi, since we always have a > b.
*/
unsigned char q_tab[7][4] = {
/* a = 4, 5, 6, 7
/ b */
{ /* 1 */ 2, 2, 3, 3 },
{ /* 2 */ 1, 1, 2, 2 },
{ /* 3 */ 1, 1, 1, 1 },
{ /* 4 */ 1, 1, 1, 1 },
{ /* 5 */ 0, 1, 1, 1 },
{ /* 6 */ 0, 0, 1, 1 },
{ /* 7 */ 0, 0, 0, 1 },
};
#elif TAB_BITS == 4
unsigned char q_tab[15][8] = {
/* a = 8, 9,10,11,12,13,14,15
/ b */
{ /* 1 */ 4, 4, 5, 5, 6, 6, 7, 7 },
{ /* 2 */ 2, 3, 3, 3, 4, 4, 4, 5 },
{ /* 3 */ 2, 2, 2, 2, 3, 3, 3, 3 },
{ /* 4 */ 1, 1, 2, 2, 2, 2, 2, 3 },
{ /* 5 */ 1, 1, 1, 1, 2, 2, 2, 2 },
{ /* 6 */ 1, 1, 1, 1, 1, 1, 2, 2 },
{ /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1 },
{ /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1 },
{ /* 9 */ 0, 1, 1, 1, 1, 1, 1, 1 },
{ /*10 */ 0, 0, 1, 1, 1, 1, 1, 1 },
{ /*11 */ 0, 0, 0, 1, 1, 1, 1, 1 },
{ /*12 */ 0, 0, 0, 0, 1, 1, 1, 1 },
{ /*13 */ 0, 0, 0, 0, 0, 1, 1, 1 },
{ /*14 */ 0, 0, 0, 0, 0, 0, 1, 1 },
{ /*15 */ 0, 0, 0, 0, 0, 0, 0, 1 },
};
#else
#error unsupported table size
#endif
static int
hgcd1_tab(struct hgcd_matrix1 *m, mp_limb_t a, mp_limb_t b)
{
mp_limb_t u00, u01, u10, u11;
int swapped;
if (a < REDUCED_LIMIT || b < REDUCED_LIMIT || ABS_DIFF(a,b) < REDUCED_LIMIT)
return 0;
u00 = u11 = 1;
u01 = u10 = 0;
swapped = 0;
for (;;)
{
mp_limb_t a_hi;
mp_limb_t b_hi;
mp_limb_t q;
int c;
if (a < b)
{
MP_LIMB_T_SWAP (a,b);
MP_LIMB_T_SWAP (u00, u01);
MP_LIMB_T_SWAP (u10, u11);
swapped ^= 1;
}
count_leading_zeros (c, a);
assert (c + TAB_BITS <= GMP_LIMB_BITS);
a_hi = a >> (GMP_LIMB_BITS - TAB_BITS - c);
b_hi = (b >> (GMP_LIMB_BITS - TAB_BITS - c));
if (UNLIKELY(b_hi == 0))
{
q = a / b;
}
else
q = q_tab[b_hi - 1][a_hi - (1U << (TAB_BITS - 1))];
assert (q > 0);
assert (a >= q*b);
a -= q*b;
u01 += q*u00;
u11 += q*u10;
if (a < REDUCED_LIMIT)
{
u01 -= u00;
u11 -= u10;
break;
}
}
if (swapped)
{
MP_LIMB_T_SWAP (u00, u01);
MP_LIMB_T_SWAP (u10, u11);
}
m->u[0][0] = u00;
m->u[0][1] = u01;
m->u[1][0] = u10;
m->u[1][1] = u11;
return 1;
}
static int
hgcd1_valid (const struct hgcd_matrix1 *m, mp_limb_t a, mp_limb_t b)
{
mp_limb_t u00, u01, u10, u11;
mp_limb_t s0, s1, t0, t1;
mp_limb_t alpha, beta;
u00 = m->u[0][0];
u01 = m->u[0][1];
u10 = m->u[1][0];
u11 = m->u[1][1];
if (u00 >= MATRIX_LIMIT
|| u01 >= MATRIX_LIMIT
|| u10 >= MATRIX_LIMIT
|| u11 >= MATRIX_LIMIT)
return 0;
/* M^-1 = (u11, -u01; -u10,u00) */
umul_ppmm (s1, s0, u11, a);
umul_ppmm (t1, t0, u01, b);
if (t1 > s1 || (t1 == s1 && t0 > s0))
return 0;
sub_ddmmss (s1, alpha, s1, s0, t1, t0);
if (s1 != 0)
return 0;
if (alpha < REDUCED_LIMIT)
return 0;
umul_ppmm (s1, s0, u00, b);
umul_ppmm (t1, t0, u10, a);
if (t1 > s1 || (t1 == s1 && t0 > s0))
return 0;
sub_ddmmss (s1, beta, s1, s0, t1, t0);
if (s1 != 0)
return 0;
if (beta < REDUCED_LIMIT)
return 0;
return ABS_DIFF(alpha, beta) < REDUCED_LIMIT;
}
int main (int argc, char **argv)
{
gmp_randstate_t rands;
unsigned i;
gmp_randinit_default (rands);
gmp_randseed_ui (rands, 17);
for (i = 0; i < 10000; i++)
{
mp_limb_t a, b;
struct hgcd_matrix1 m;
a = gmp_urandomb_ui (rands, GMP_NUMB_BITS);
b = gmp_urandomb_ui (rands, GMP_NUMB_BITS);
if (hgcd1_ref (&m, a, b) && !hgcd1_valid (&m, a, b))
{
gmp_printf("hgcd1 failed: i = %u, a = %Mx b = %Mx\n"
" M = (%Mx, %Mx;%Mx, %Mx)\n",
i, a, b, m.u[0][0], m.u[0][1], m.u[1][0], m.u[1][1]);
abort();
}
if (hgcd1_tab (&m, a, b) && !hgcd1_valid (&m, a, b))
{
gmp_printf("hgcd1 failed: i = %u, a = %Mx b = %Mx\n"
" M = (%Mx, %Mx;%Mx, %Mx)\n",
i, a, b, m.u[0][0], m.u[0][1], m.u[1][0], m.u[1][1]);
abort();
}
}
return 0;
}
--
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list