get_d_2exp test failures: clang -emit-llvm/-flto/-O4; icc -ipo/-fast
Niels Möller
nisse at lysator.liu.se
Wed Aug 21 15:00:38 CEST 2013
nisse at lysator.liu.se (Niels Möller) writes:
> I haven't tried to debug it, but I guess the floating point is using
> round-to-nearest or something like that. I haven't debugged it, but it
> looks like the critical oepration is the addition
>
> d += up[i] * weight;
In the common case of 64-bit limbs, I think the problematic operation is
conversion of a 64-bit integer to floating point. This may round
upwards, depending on the rounding mode.
I tried to write a function to portably convert an uint64_t to a double,
always truncating towards zero, regardless of the current rounding mode.
It's annoyingly messy. I include a complete test program below, the
interesting function is limb2d. One would also need something similar
for addition.
A reliable configure test to find the precision (number of mantissa
bits) would definitely make for simpler code.
When cross compiling, it should be possible to do a binary search using
testprograms of the form
char x[1-2*(1.0 == 1.0 + 1.0 / (1UL<<N))];
for varying N, which will fail to compile iff we have less than N
mantissa bits (and one should use several factors if needed, to avoid
using shift counts which might exceeed the word size). Assuming the
compiler gets compile time floating point operations right.
Autoconf uses something similar for the standard AC_CHECK_SIZEOF test
when cross compiling.
I'd wish there were a simpler *and* portable way.
Regards,
/Niels
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <fenv.h>
#include <float.h>
#include <assert.h>
struct round_mode
{
const char *name;
int value;
};
const struct round_mode mode[] = {
#ifdef FE_TONEAREST
{ "nearest", FE_TONEAREST },
#endif
#ifdef FE_UPWARD
{ "upward", FE_UPWARD },
#endif
#ifdef FE_DOWNWARD
{ "downward", FE_DOWNWARD },
#endif
#ifdef FE_TOWARDZERO
{ "zero", FE_TOWARDZERO },
#endif
{ NULL, 0 }
};
static unsigned
clz (uint64_t x)
{
unsigned shift, count;
assert (x > 0);
for (shift = 32, count = 0; shift > 0; shift >>= 1)
{
uint64_t h = x >> shift;
if (h == 0)
count += shift;
else
x = h;
}
return count;
}
static unsigned
ctz (uint64_t x)
{
assert (x > 0);
return 63 - clz(x & -x);
}
#define BASE_AS_DOUBLE (4.0 * ((uint64_t) 1 << 62))
static double
limb2d (uint64_t limb)
{
double d = limb;
uint64_t x;
/* Special case; can't be converted back to a uint64_t without overflow */
if (d >= BASE_AS_DOUBLE)
{
unsigned min = 64 - clz (-limb);
unsigned bit;
/* FIXME: Could use binary search */
for (bit = min; bit <= 63; bit++)
{
d = - ((uint64_t) 1 << bit);
if (d < BASE_AS_DOUBLE)
return d;
}
abort ();
}
x = d;
if (x <= limb)
return d;
else
{
unsigned min, max, bit;
/* Rounded upwards, figure out index of least significant bit. */
min = 64 - clz (x - limb);
max = ctz (x);
/* FIXME: Could use binary search */
for (bit = min; bit <= max; bit++)
{
d = x - ((uint64_t) 1 << bit);
if ((uint64_t) d <= limb)
return d;
}
abort ();
}
}
int
main (int argc, char **argv)
{
unsigned i;
printf ("FLT_ROUNDS: %d\n", FLT_ROUNDS);
printf ("fegetround: %d\n", fegetround ());
for (i = 0; mode[i].name; i++)
{
unsigned bits;
uint64_t x;
double d;
printf ("%d %s\n", mode[i].value, mode[i].name);
fesetround (mode[i].value);
for (bits = 52; bits <= 64; bits++)
{
x = (~(uint64_t) 0) >> (64 - bits);
d = x;
printf ("%d %jx: %jx, %jx\n",
bits, x, (uint64_t) d, (uint64_t) limb2d (x));
}
}
return 0;
}
--
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.
More information about the gmp-bugs
mailing list