binvert_limb speedup on 64 bit machines with UHWtype
John Gatrell
gatrelljm at gmail.com
Fri Feb 25 17:06:31 CET 2022
Hi everyone. I'm new here so I don't know how to submit a new gmp-impl.h
I tested using UHWtype in the macro for binvert_limb. On a 64 bit machine
one of my programs gained a 3% speedup. On a 32 bit machine, there was no
difference.
I noticed that replacing '(n/2)&0x7F' with '(unsigned char)n/2', may give a
hint to assembler implementers that the 7F mask is unnecessary.
For your consideration
#if (GMP_NUMB_BITS < 64)
#define binvert_limb(inv,n) \
do { \
mp_limb_t __n = (n); \
mp_limb_t __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \
ASSERT ((__n & 1) == 1); \
if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
\
ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
(inv) = __inv & GMP_NUMB_MASK; \
} while (0)
#endif
#if (GMP_NUMB_BITS >= 64)
#define binvert_limb(inv,n) \
do { \
mp_limb_t __n = (n); \
mp_limb_t __inv; \
UHWtype __hinv = binvert_limb_table[(unsigned char)__n/2]; /* 8 */
\
ASSERT ((__n & 1) == 1); \
\
__hinv = 2 * __hinv - __hinv * __hinv * __n; /* 16 */ \
__inv = 2 * __hinv - __hinv * __hinv * __n; /* 32 */ \
__inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
\
if (GMP_NUMB_BITS > 64) \
{ \
int __invbits = 64; \
do { \
__inv = 2 * __inv - __inv * __inv * __n; \
__invbits *= 2; \
} while (__invbits < GMP_NUMB_BITS); \
} \
\
ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
(inv) = __inv & GMP_NUMB_MASK; \
} while (0)
#endif
More information about the gmp-devel
mailing list