mpn_sqrtrem1
Marco Bodrato
bodrato at mail.dm.unipi.it
Tue Dec 20 21:57:36 UTC 2016
Ciao,
Il Mar, 20 Dicembre 2016 7:29 pm, Adrien Prost-Boucle ha scritto:
>> Reducing the number of multiplications is possible... but I bet a
>> Karatsuba umul_ppmm() is not faster than the plain version (at least not
>> on current 64-bits CPUs ;-)
>
> Thank you for the analysis, I was curious :-)
Me too.
I decided to try an as-branch-free-as-I-can Karatsuba implementation for
umul_ppmm in the simpler environment: mini-gmp.
Apply the following patch, define UMUL_PPMM_USE_KARATSUBA, and experience
a slowed-down, larger-code version of mini-gmp ;-D
diff -r 1cdb2f2d864b mini-gmp/mini-gmp.c
--- a/mini-gmp/mini-gmp.c Sun Dec 18 09:19:44 2016 +0100
+++ b/mini-gmp/mini-gmp.c Tue Dec 20 23:40:55 2016 +0100
@@ -111,6 +111,50 @@
(sl) = __x; \
} while (0)
+#ifdef UMUL_PPMM_USE_KARATSUBA
+#define gmp_umul_ppmm(w1, w0, u, v) \
+ do { \
+ mp_limb_t __x1, __x2, __x3; \
+ unsigned __x0, __ul, __vl, __uh, __vh; \
+ mp_limb_t __u = (u), __v = (v); \
+ \
+ __ul = __u & GMP_LLIMB_MASK; \
+ __uh = __u >> (GMP_LIMB_BITS / 2); \
+ __vl = __v & GMP_LLIMB_MASK; \
+ __vh = __v >> (GMP_LIMB_BITS / 2); \
+ \
+ __x2 = (mp_limb_t) __ul * __vl; \
+ __x3 = (mp_limb_t) __uh * __vh; \
+ __u = __uh; \
+ __u -= __ul; \
+ __uh = __u >> (GMP_LIMB_BITS / 2); \
+ __ul = (((unsigned)__u ^ __uh) - __uh) & GMP_LLIMB_MASK; \
+ __v = __vh; \
+ __v -= __vl; \
+ __vh = __v >> (GMP_LIMB_BITS / 2); \
+ __vl = (((unsigned)__v ^ __vh) - __vh) & GMP_LLIMB_MASK; \
+ __vh ^= __uh; \
+ \
+ __x1 = (mp_limb_t) __ul * __vl; \
+ __x0 = __x2 & GMP_LLIMB_MASK; \
+ __x2 = __x2 + (__x2 >> (GMP_LIMB_BITS / 2)) + __x3; \
+ __ul = __x2 < __x3; \
+ if (__vh) \
+ { \
+ __x2 += __x1; \
+ __ul += __x2 < __x1; \
+ } \
+ else \
+ { \
+ __ul -= __x1 > __x2; \
+ __x2 -= __x1; \
+ } \
+ \
+ (w0) = (__x2 << (GMP_LIMB_BITS / 2)) + __x0; \
+ (w1) = __x3 + (__x2 >> (GMP_LIMB_BITS / 2)) + \
+ ((mp_limb_t)__ul << (GMP_LIMB_BITS / 2)); \
+ } while (0)
+#else
#define gmp_umul_ppmm(w1, w0, u, v) \
do { \
mp_limb_t __x0, __x1, __x2, __x3; \
@@ -135,6 +179,7 @@
(w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \
(w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \
} while (0)
+#endif
#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
do { \
Regards,
m
--
http://bodrato.it/toom/
More information about the gmp-devel
mailing list