Multi-limb inverse for mpn_divrem_1
Torbjorn Granlund
tege at swox.com
Mon Sep 15 01:41:07 CEST 2003
We are planning to radically speed mpn_divrem_1, mpn_mod_1, and
friends by allowing for a multi-limb divisor reciprocal.
The problem with division by a single-limb divisor is that the
limb multiply instructions lie on the recurrency path. Pipelined
multiply units are largely wasted.
By using a n-limb inverse, we can develop n quotient limbs per
iteration, at the cost of O(n^2) multiplies, but without
significantly increased recurrency depth between iterations. It
will be practical to use n of up to about 4, bigger n result in
too many limb multiplies.
Here is a proof-of-concept implementation for n = 2.
(Karl Hasselström and I wrote this.)
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
/* # of limbs in inverse # of full limb mults # of limb half mults
1 1+1 1+2
2 4+2 7+3
3 8+3 14+5
4 13+4 23+7
*/
mp_limb_t
xmpn_divrem_1 (mp_ptr qp, mp_ptr up, mp_size_t un, mp_limb_t vl)
{
mp_limb_t vip[2];
mp_limb_t tp[4];
mp_limb_t rp[4];
mp_limb_t pp[2];
int i;
mp_limb_t r;
#if GMP_NAIL_BITS != 0
#error not yet for anils
#endif
/* Compute 2-limb inverse. */
if ((vl << 1) == 0)
{
vip[0] = ~ (mp_limb_t) 0;
vip[1] = ~ (mp_limb_t) 0;
}
else
{
/* For now use mpn_divrem_1. */
mp_limb_t pow2[3];
pow2[0] = 0; pow2[1] = 0; pow2[2] = -vl;
mpn_divrem_1 (tp, 0, pow2, 3, vl);
vip[0] = tp[0];
vip[1] = tp[1];
}
qp += un - 1;
up += un - 1;
if (up[0] >= vl)
{
qp[0] = 1;
rp[0] = up[0] - vl;
}
else
{
qp[0] = 0;
rp[0] = up[0];
}
qp -= 2;
up -= 2;
for (i = un - 3; i >= 0; i -= 2)
{
rp[1] = rp[0];
rp[0] = up[1];
mpn_mul_basecase (tp, rp, 2, vip, 2);
add_ssaaaa (qp[1], qp[0], tp[3], tp[2], rp[1], rp[0]);
mpn_mul_1 (pp, qp, 2, vl);
rp[1] = rp[0];
rp[0] = up[0];
sub_ddmmss (rp[1], rp[0], rp[1], rp[0], pp[1], pp[0]);
if (rp[1] != 0)
{
mpn_incr_u (qp, 1);
mpn_decr_u (rp, vl);
}
if (rp[1] != 0)
{
mpn_incr_u (qp, 1);
mpn_decr_u (rp, vl);
}
if (rp[0] >= vl)
{
mpn_incr_u (qp, 1);
mpn_decr_u (rp, vl);
}
ASSERT (rp[1] == 0);
qp -= 2;
up -= 2;
}
if ((un & 1) == 0)
{
udiv_qrnnd_preinv (qp[1], r, rp[0], up[1], vl, vip[1]);
}
else
{
r = rp[0];
}
return r;
}
More information about the gmp-devel
mailing list