Better tabselect
Torbjorn Granlund
tg at gmplib.org
Fri Apr 12 17:14:41 CEST 2013
I'd suggest to use the loop below for sparc64. It limits `which' to be <
2^32 by creating the mask based on 32-bit comparison. It would be
possible to replace "subcc o1,1,o1; subc ..." by "addcc o1,-1,o1; addxc
..." for newer chips, but I think that's no use.
I sincerely apologise for the odd number of insns in the loop. :-)
(Note that we support >= 2^32 operand sizes in mpn for 64bit chips, but
that doesn't mean that the number of vectors in tabselect need to be
that large.)
sllx n, 3, stride
mov tp, tporig
sub n, 4, j
brlz j, L(outer_end)
nop
L(outer_loop):
clr data0
clr data1
clr data2
clr data3
mov tporig, tp
mov nents, i
mov which, %o1
L(top): subcc %o1, 1, %o1 C set carry iff o1 = 0
ldx [tp + 0], t0
subc %g0, %g0, mask
ldx [tp + 8], t1
sub i, 1, i
ldx [tp + 16], t2
ldx [tp + 24], t3
add tp, stride, tp
and t0, mask, t0
and t1, mask, t1
or t0, data0, data0
and t2, mask, t2
or t1, data1, data1
and t3, mask, t3
or t2, data2, data2
brnz i, L(top)
or t3, data3, data3
stx data0, [rp + 0]
subcc j, 4, j
stx data1, [rp + 8]
stx data2, [rp + 16]
stx data3, [rp + 24]
add tporig, (4 * 8), tporig
brgez j, L(outer_loop)
add rp, (4 * 8), rp
L(outer_end):
--
Torbjörn
More information about the gmp-devel
mailing list