Please update addaddmul_1msb0.asm to support ABI in mingw64

Niels Möller nisse at lysator.liu.se
Thu Oct 7 06:13:48 UTC 2021


Marco Bodrato <bodrato at mail.dm.unipi.it> writes:

> Well, the question is also if the current code beats mul_1 + addmul_1
> :-)

See below patch to add speed support (does it look right? I get a lot of
"speed_measure() could not get 4 results within 1.0%" with the default
precision).

Gave it a run on my closest x86_64 (intel broadwell, no mulx)), and
numbers for mpn_addaddmul_1msb0 are not impressing. Also, it appears
mpn_addmul_2 is significantly slower than two addmul_1.

$ ~/build/gmp/tune/speed -p 10000000 -C mpn_mul_1.0x123456789abcdef0 mpn_addmul_1.0x123456789abcdef0 mpn_addaddmul_1msb0 mpn_addmul_2 -s 1-100 -f 1.1
overhead 5.00 cycles, precision 10000000 units of 1.25e-09 secs, CPU freq 798.59 MHz
        mpn_mul_1.0x123456789abcdef0 mpn_addmul_1.0x123456789abcdef0 mpn_addaddmul_1msb0  mpn_addmul_2
1             #8.0042        9.0095       10.0050           n/a
2             #4.5050        5.0527        6.0068        7.5036
3             #3.6689        4.3355        5.0216        6.3365
4             #2.7513        3.2515        4.7523        5.6555
5             #2.4016        3.0013        4.4757        5.5103
6             #2.3616        2.6682        4.3433        5.4663
7             #2.1452        2.5732        4.2189        5.0895
8             #2.0055        2.3760        4.3061        5.3977
9             #1.7788        2.2374        4.1520        5.1461
10            #1.8663        2.2163        4.1433        5.1280
11            #1.8191        2.2130        4.1799        4.9452
12            #1.7510        2.1759        4.1168        5.1707
13            #1.6931        2.2332        4.1418        5.0084
14            #1.6452        2.2315        4.1852        4.9592
15            #1.8189        2.1533        4.1467        4.8717
16            #1.7478        2.1749        4.1757        5.0281
17            #1.7248        2.0152        4.1896        4.9345
18            #1.8785        2.0064        4.1549        4.9122
19            #1.8755        2.0187        4.1825        4.8213
20            #1.8122        1.9955        4.1990        4.9587
22            #1.7585        2.0415        4.2003        4.8623
24            #1.6991        2.0152        4.1947        4.8767
26            #1.6834        1.9341        4.2309        4.8167
28            #1.6288        1.8899        4.2342        4.8718
30            #1.6088        1.9420        4.2265        4.8062
33            #1.6326        1.8945        4.2366        4.7899
36            #1.6814        1.8719        4.2558        4.8328
39            #1.6245        1.9316        4.2572        4.7370
42            #1.6134        1.8208        4.2685        4.7616
46            #1.5828        1.8925        4.2967        4.7479
50            #1.6267        1.8098        4.2987        4.7306
55            #1.5909        1.8558        4.3017        4.7041
60            #1.5614        1.7818        4.3019        4.7552
66            #1.5966        1.7615        4.3055        4.7255
72            #1.5596        1.7880        4.3126        4.7393
79            #1.5617        1.8006        4.3277        4.6949
86            #1.5702        1.7883        4.3290        4.7031
94            #1.5441        1.7743        4.3321        4.7018

So there's definitely some room for improvement.

Regards,
/Niels

-------------8<---------------

diff -r 6a2372c2f54f tune/common.c
--- a/tune/common.c	Thu Oct 07 01:23:31 2021 +0200
+++ b/tune/common.c	Thu Oct 07 07:56:07 2021 +0200
@@ -546,6 +546,13 @@
 }
 #endif
 
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+double
+speed_mpn_addaddmul_1msb0 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_ADDADDMUL1_MSB0 (mpn_addaddmul_1msb0);
+}
+#endif
 double
 speed_mpn_mul_1 (struct speed_params *s)
 {
diff -r 6a2372c2f54f tune/speed.c
--- a/tune/speed.c	Thu Oct 07 01:23:31 2021 +0200
+++ b/tune/speed.c	Thu Oct 07 07:56:07 2021 +0200
@@ -191,6 +191,9 @@
 #if HAVE_NATIVE_mpn_addmul_8
   { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
 #endif
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+  { "mpn_addaddmul_1msb0",      speed_mpn_addaddmul_1msb0,  FLAG_R_OPTIONAL },
+#endif
   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
   { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
 #if HAVE_NATIVE_mpn_mul_2
diff -r 6a2372c2f54f tune/speed.h
--- a/tune/speed.h	Thu Oct 07 01:23:31 2021 +0200
+++ b/tune/speed.h	Thu Oct 07 07:56:07 2021 +0200
@@ -181,6 +181,7 @@
 double speed_mpn_addmul_6 (struct speed_params *);
 double speed_mpn_addmul_7 (struct speed_params *);
 double speed_mpn_addmul_8 (struct speed_params *);
+double speed_mpn_addaddmul_1msb0 (struct speed_params *);
 double speed_mpn_cnd_add_n (struct speed_params *);
 double speed_mpn_cnd_sub_n (struct speed_params *);
 double speed_mpn_com (struct speed_params *);
@@ -1076,6 +1077,36 @@
 #define SPEED_ROUTINE_MPN_UNARY_8(function)				\
   SPEED_ROUTINE_MPN_UNARY_N (function, 8)
 
+#define SPEED_ROUTINE_MPN_ADDADDMUL1_MSB0(function)			\
+  {									\
+    mp_ptr     wp;							\
+    unsigned   i;							\
+    double     t;							\
+    mp_limb_t  r;							\
+    TMP_DECL;								\
+									\
+    SPEED_RESTRICT_COND (s->size >= 1);					\
+									\
+    TMP_MARK;								\
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
+    speed_operand_src (s, s->xp, s->size);				\
+    speed_operand_src (s, s->yp, s->size);				\
+    speed_operand_dst (s, wp, s->size);					\
+    speed_cache_fill (s);						\
+									\
+    r = s->r != 0 ? s->r : MP_BASES_BIG_BASE_10;			\
+    r &= ~GMP_NUMB_HIGHBIT;						\
+									\
+    speed_starttime ();							\
+    i = s->reps;							\
+    do									\
+      function (wp, s->xp, s->yp, s->size, r, r);			\
+    while (--i != 0);							\
+    t = speed_endtime ();						\
+									\
+    TMP_FREE;								\
+    return t;								\
+  }
 
 /* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
 #define SPEED_ROUTINE_MPN_MUL(function)					\

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.


More information about the gmp-devel mailing list