Please update addaddmul_1msb0.asm to support ABI in mingw64
Niels Möller
nisse at lysator.liu.se
Thu Oct 7 06:13:48 UTC 2021
Marco Bodrato <bodrato at mail.dm.unipi.it> writes:
> Well, the question is also if the current code beats mul_1 + addmul_1
> :-)
See below patch to add speed support (does it look right? I get a lot of
"speed_measure() could not get 4 results within 1.0%" with the default
precision).
Gave it a run on my closest x86_64 (intel broadwell, no mulx)), and
numbers for mpn_addaddmul_1msb0 are not impressing. Also, it appears
mpn_addmul_2 is significantly slower than two addmul_1.
$ ~/build/gmp/tune/speed -p 10000000 -C mpn_mul_1.0x123456789abcdef0 mpn_addmul_1.0x123456789abcdef0 mpn_addaddmul_1msb0 mpn_addmul_2 -s 1-100 -f 1.1
overhead 5.00 cycles, precision 10000000 units of 1.25e-09 secs, CPU freq 798.59 MHz
mpn_mul_1.0x123456789abcdef0 mpn_addmul_1.0x123456789abcdef0 mpn_addaddmul_1msb0 mpn_addmul_2
1 #8.0042 9.0095 10.0050 n/a
2 #4.5050 5.0527 6.0068 7.5036
3 #3.6689 4.3355 5.0216 6.3365
4 #2.7513 3.2515 4.7523 5.6555
5 #2.4016 3.0013 4.4757 5.5103
6 #2.3616 2.6682 4.3433 5.4663
7 #2.1452 2.5732 4.2189 5.0895
8 #2.0055 2.3760 4.3061 5.3977
9 #1.7788 2.2374 4.1520 5.1461
10 #1.8663 2.2163 4.1433 5.1280
11 #1.8191 2.2130 4.1799 4.9452
12 #1.7510 2.1759 4.1168 5.1707
13 #1.6931 2.2332 4.1418 5.0084
14 #1.6452 2.2315 4.1852 4.9592
15 #1.8189 2.1533 4.1467 4.8717
16 #1.7478 2.1749 4.1757 5.0281
17 #1.7248 2.0152 4.1896 4.9345
18 #1.8785 2.0064 4.1549 4.9122
19 #1.8755 2.0187 4.1825 4.8213
20 #1.8122 1.9955 4.1990 4.9587
22 #1.7585 2.0415 4.2003 4.8623
24 #1.6991 2.0152 4.1947 4.8767
26 #1.6834 1.9341 4.2309 4.8167
28 #1.6288 1.8899 4.2342 4.8718
30 #1.6088 1.9420 4.2265 4.8062
33 #1.6326 1.8945 4.2366 4.7899
36 #1.6814 1.8719 4.2558 4.8328
39 #1.6245 1.9316 4.2572 4.7370
42 #1.6134 1.8208 4.2685 4.7616
46 #1.5828 1.8925 4.2967 4.7479
50 #1.6267 1.8098 4.2987 4.7306
55 #1.5909 1.8558 4.3017 4.7041
60 #1.5614 1.7818 4.3019 4.7552
66 #1.5966 1.7615 4.3055 4.7255
72 #1.5596 1.7880 4.3126 4.7393
79 #1.5617 1.8006 4.3277 4.6949
86 #1.5702 1.7883 4.3290 4.7031
94 #1.5441 1.7743 4.3321 4.7018
So there's definitely some room for improvement.
Regards,
/Niels
-------------8<---------------
diff -r 6a2372c2f54f tune/common.c
--- a/tune/common.c Thu Oct 07 01:23:31 2021 +0200
+++ b/tune/common.c Thu Oct 07 07:56:07 2021 +0200
@@ -546,6 +546,13 @@
}
#endif
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+double
+speed_mpn_addaddmul_1msb0 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_ADDADDMUL1_MSB0 (mpn_addaddmul_1msb0);
+}
+#endif
double
speed_mpn_mul_1 (struct speed_params *s)
{
diff -r 6a2372c2f54f tune/speed.c
--- a/tune/speed.c Thu Oct 07 01:23:31 2021 +0200
+++ b/tune/speed.c Thu Oct 07 07:56:07 2021 +0200
@@ -191,6 +191,9 @@
#if HAVE_NATIVE_mpn_addmul_8
{ "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL },
#endif
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+ { "mpn_addaddmul_1msb0", speed_mpn_addaddmul_1msb0, FLAG_R_OPTIONAL },
+#endif
{ "mpn_mul_1", speed_mpn_mul_1, FLAG_R },
{ "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
#if HAVE_NATIVE_mpn_mul_2
diff -r 6a2372c2f54f tune/speed.h
--- a/tune/speed.h Thu Oct 07 01:23:31 2021 +0200
+++ b/tune/speed.h Thu Oct 07 07:56:07 2021 +0200
@@ -181,6 +181,7 @@
double speed_mpn_addmul_6 (struct speed_params *);
double speed_mpn_addmul_7 (struct speed_params *);
double speed_mpn_addmul_8 (struct speed_params *);
+double speed_mpn_addaddmul_1msb0 (struct speed_params *);
double speed_mpn_cnd_add_n (struct speed_params *);
double speed_mpn_cnd_sub_n (struct speed_params *);
double speed_mpn_com (struct speed_params *);
@@ -1076,6 +1077,36 @@
#define SPEED_ROUTINE_MPN_UNARY_8(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 8)
+#define SPEED_ROUTINE_MPN_ADDADDMUL1_MSB0(function) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ mp_limb_t r; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ r = s->r != 0 ? s->r : MP_BASES_BIG_BASE_10; \
+ r &= ~GMP_NUMB_HIGHBIT; \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, s->xp, s->yp, s->size, r, r); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
/* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
#define SPEED_ROUTINE_MPN_MUL(function) \
--
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list