What's a reasonable size ratio for toom32?
Niels Möller
nisse at lysator.liu.se
Mon Sep 18 18:23:13 CEST 2023
Paul Zimmermann <Paul.Zimmermann at inria.fr> writes:
> Dear Niels,
>
> ./speed -p 1000000 -c -s 10-200 -f1.1 mpn_mul.0.6 would be more readable,
> although the change in speed.h would be larger.
See below patch, to support mpn_mul/0.6. Example output:
$ ./speed -c -r -s 10-50 -t 5 -p 1000000 mpn_mul mpn_mul/0.6
overhead 5.00 cycles, precision 1000000 units of 1.25e-09 secs, CPU freq 798.28 MHz
mpn_mul mpn_mul/0.6
10 215.48 #0.6295
15 443.16 #0.6066
20 784.50 #0.6138
25 1209.57 #0.5957
30 1490.59 #0.6934
35 1986.51 #0.6918
40 2547.99 #0.6981
45 3189.27 #0.6633
50 3827.87 #0.6734
What do you think? (Also deleted FLAG_RSIZE, which appeared unused).
(One potential bug is missing initialization of the new .size_ratio in
struct choice_t to 0.0, but I don't see initialization of .r either, I'm
probably missing something).
> Or maybe ./speed -p 1000000 -c -s 10-200 -f1.1 -r 0.6 mpn_mul ?
I think it's nicer to be able to specify it separately for each function
under test.
Regards,
/Niels
diff -r 510152c4ca97 tune/speed.c
--- a/tune/speed.c Tue Aug 22 10:20:40 2023 +0200
+++ b/tune/speed.c Mon Sep 18 18:10:04 2023 +0200
@@ -130,7 +130,7 @@
#define FLAG_R (1<<0) /* require ".r" */
#define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */
-#define FLAG_RSIZE (1<<2)
+#define FLAG_SR_OPTIONAL (1<<2) /* optional ".r" or "/r" */
#define FLAG_NODATA (1<<3) /* don't alloc xp, yp */
const struct routine_t {
@@ -328,8 +328,8 @@
{ "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 },
{ "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 },
- { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL },
- { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
+ { "mpn_mul", speed_mpn_mul, FLAG_SR_OPTIONAL },
+ { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_SR_OPTIONAL },
{ "mpn_sqr_basecase", speed_mpn_sqr_basecase },
#if HAVE_NATIVE_mpn_sqr_diagonal
{ "mpn_sqr_diagonal", speed_mpn_sqr_diagonal },
@@ -346,22 +346,22 @@
{ "mpn_toom4_sqr", speed_mpn_toom4_sqr },
{ "mpn_toom6_sqr", speed_mpn_toom6_sqr },
{ "mpn_toom8_sqr", speed_mpn_toom8_sqr },
- { "mpn_toom22_mul", speed_mpn_toom22_mul },
- { "mpn_toom33_mul", speed_mpn_toom33_mul },
- { "mpn_toom44_mul", speed_mpn_toom44_mul },
- { "mpn_toom6h_mul", speed_mpn_toom6h_mul },
- { "mpn_toom8h_mul", speed_mpn_toom8h_mul },
- { "mpn_toom32_mul", speed_mpn_toom32_mul },
- { "mpn_toom42_mul", speed_mpn_toom42_mul },
- { "mpn_toom43_mul", speed_mpn_toom43_mul },
- { "mpn_toom63_mul", speed_mpn_toom63_mul },
- { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul },
+ { "mpn_toom22_mul", speed_mpn_toom22_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom33_mul", speed_mpn_toom33_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom44_mul", speed_mpn_toom44_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom6h_mul", speed_mpn_toom6h_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom8h_mul", speed_mpn_toom8h_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom32_mul", speed_mpn_toom32_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom42_mul", speed_mpn_toom42_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom43_mul", speed_mpn_toom43_mul, FLAG_SR_OPTIONAL },
+ { "mpn_toom63_mul", speed_mpn_toom63_mul, FLAG_SR_OPTIONAL },
+ { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul, FLAG_SR_OPTIONAL},
{ "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
#if WANT_OLD_FFT_FULL
- { "mpn_mul_fft_full", speed_mpn_mul_fft_full },
+ { "mpn_mul_fft_full", speed_mpn_mul_fft_full, FLAG_SR_OPTIONAL},
{ "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
#endif
- { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
+ { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_SR_OPTIONAL },
{ "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
{ "mpn_sqrlo", speed_mpn_sqrlo },
@@ -576,6 +576,7 @@
struct choice_t {
const struct routine_t *p;
mp_limb_t r;
+ double size_ratio;
double scale;
double time;
int no_time;
@@ -670,6 +671,7 @@
for (i = 0; i < num_choices; i++)
{
s->r = choice[i].r;
+ s->size_ratio = choice[i].size_ratio;
choice[i].time = speed_measure (choice[i].p->fun, s);
choice[i].no_time = (choice[i].time == -1.0);
if (! choice[i].no_time)
@@ -1011,6 +1013,17 @@
return n;
}
+double slash_r_string (const char *s)
+{
+ char *end;
+ double r = strtod(s, &end);
+ if (s[0] == '\0' || end[0] != '\0' || r > 1.0 || r < 0.0)
+ {
+ fprintf (stderr, "invalid /r parameter: %s\n", s);
+ exit (1);
+ }
+ return r;
+}
void
routine_find (struct choice_t *c, const char *s_orig)
@@ -1054,6 +1067,22 @@
c->r = r_string (s + nlen + 1);
return;
}
+ if (s[nlen] == '/')
+ {
+ /* match, with a /r parameter */
+
+ if (! (routine[i].flag & (FLAG_SR_OPTIONAL)))
+ {
+ fprintf (stderr,
+ "Choice %s bad: doesn't take a \"/<r>\" parameter\n",
+ s_orig);
+ exit (1);
+ }
+
+ c->p = &routine[i];
+ c->size_ratio = slash_r_string (s + nlen + 1);
+ return;
+ }
if (s[nlen] == '\0')
{
@@ -1125,6 +1154,8 @@
printf ("\t%s.r\n", routine[i].name);
else if (routine[i].flag & FLAG_R_OPTIONAL)
printf ("\t%s (optional .r)\n", routine[i].name);
+ else if (routine[i].flag & FLAG_SR_OPTIONAL)
+ printf ("\t%s (optional .r or /r)\n", routine[i].name);
else
printf ("\t%s\n", routine[i].name);
}
diff -r 510152c4ca97 tune/speed.h
--- a/tune/speed.h Tue Aug 22 10:20:40 2023 +0200
+++ b/tune/speed.h Mon Sep 18 18:10:04 2023 +0200
@@ -113,6 +113,7 @@
mp_ptr yp; /* second argument */
mp_size_t size; /* size of both arguments */
mp_limb_t r; /* user supplied parameter */
+ double size_ratio; /* ratio for smaller to larger size, e.g., for mpn_mul */
mp_size_t align_xp; /* alignment of xp */
mp_size_t align_yp; /* alignment of yp */
mp_size_t align_wp; /* intended alignment of wp */
@@ -1122,9 +1123,13 @@
double t; \
TMP_DECL; \
\
- size1 = (s->r == 0 ? s->size : s->r); \
- if (size1 < 0) size1 = -size1 - s->size; \
- \
+ if (s->size_ratio > 0.0) \
+ size1 = s->size_ratio * s->size; \
+ else \
+ { \
+ size1 = (s->r == 0 ? s->size : s->r); \
+ if (size1 < 0) size1 = -size1 - s->size; \
+ } \
SPEED_RESTRICT_COND (size1 >= 1); \
SPEED_RESTRICT_COND (s->size >= size1); \
\
--
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list