[Gmp-commit] /var/hg/gmp: Revert recent speed changes, since they break tuneup.
mercurial at gmplib.org
mercurial at gmplib.org
Mon Oct 16 08:16:30 CEST 2023
details: /var/hg/gmp/rev/f6073853d16a
changeset: 18458:f6073853d16a
user: Niels Möller <nisse at lysator.liu.se>
date: Mon Oct 16 08:16:06 2023 +0200
description:
Revert recent speed changes, since they break tuneup.
diffstat:
ChangeLog | 32 --------------
tune/common.c | 10 ++--
tune/speed.c | 65 +++++++---------------------
tune/speed.h | 132 +++++++++++++++++++++------------------------------------
4 files changed, 69 insertions(+), 170 deletions(-)
diffs (truncated from 388 to 300 lines):
diff -r c3333f0d502a -r f6073853d16a ChangeLog
--- a/ChangeLog Mon Oct 02 15:32:45 2023 +0200
+++ b/ChangeLog Mon Oct 16 08:16:06 2023 +0200
@@ -1,35 +1,3 @@
-2023-09-27 Niels Möller <nisse at lysator.liu.se>
-
- * tune/speed.h (SPEED_ROUTINE_MPN_MUL_TSPACE): New macro,
- supporting .r and /r for benchmarking unbalanced operation.
- (SPEED_ROUTINE_MPN_TOOM22_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE,
- and renamed from...
- (SPEED_ROUTINE_MPN_TOOM22_MUL_N): ... old name.
- (SPEED_ROUTINE_MPN_TOOM33_MUL, SPEED_ROUTINE_MPN_TOOM44_MUL)
- (SPEED_ROUTINE_MPN_TOOM6H_MUL, SPEED_ROUTINE_MPN_TOOM8H_MUL):
- Analogous change and rename.
- (SPEED_ROUTINE_MPN_TOOM32_MUL, SPEED_ROUTINE_MPN_TOOM42_MUL)
- (SPEED_ROUTINE_MPN_TOOM43_MUL)
- (SPEED_ROUTINE_MPN_TOOM63_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE.
-
- * tune/common.c (speed_mpn_toom22_mul, speed_mpn_toom33_mul)
- (speed_mpn_toom44_mul, speed_mpn_toom6h_mul)
- (speed_mpn_toom8h_mul): Updated for rename of corresponding macros.
-
-2023-09-24 Niels Möller <nisse at lysator.liu.se>
-
- * tune/speed.h (struct speed_params): Add size_ratio field.
- (SPEED_ROUTINE_MPN_MUL): If size_ratio > 0.0, use it to set size1.
-
- * tune/speed.c (FLAG_SR_OPTIONAL): New flag.
- (FLAG_RSIZE): Deleted unused flag.
- (routine): Add FLAG_SR_OPTIONAL to all unbalanced multiplication
- functions.
- (slash_r_string): New function, parse float ratio.
- (run_one): Copy size_ratio field.
- (routine_find): Handle optional /r.
- (usage): Deisplay help on /r option.
-
2023-09-05 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/toom{54,63,6h,8h}_mul.c, mpn/generic/toom_couple_handling.c,
diff -r c3333f0d502a -r f6073853d16a tune/common.c
--- a/tune/common.c Mon Oct 02 15:32:45 2023 +0200
+++ b/tune/common.c Mon Oct 16 08:16:06 2023 +0200
@@ -1347,27 +1347,27 @@
double
speed_mpn_toom22_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM22_MUL (mpn_toom22_mul);
+ SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
}
double
speed_mpn_toom33_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM33_MUL (mpn_toom33_mul);
+ SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
}
double
speed_mpn_toom44_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM44_MUL (mpn_toom44_mul);
+ SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
}
double
speed_mpn_toom6h_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM6H_MUL (mpn_toom6h_mul);
+ SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
}
double
speed_mpn_toom8h_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM8H_MUL (mpn_toom8h_mul);
+ SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
}
double
diff -r c3333f0d502a -r f6073853d16a tune/speed.c
--- a/tune/speed.c Mon Oct 02 15:32:45 2023 +0200
+++ b/tune/speed.c Mon Oct 16 08:16:06 2023 +0200
@@ -130,7 +130,7 @@
#define FLAG_R (1<<0) /* require ".r" */
#define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */
-#define FLAG_SR_OPTIONAL (1<<2) /* optional ".r" or "/r" */
+#define FLAG_RSIZE (1<<2)
#define FLAG_NODATA (1<<3) /* don't alloc xp, yp */
const struct routine_t {
@@ -328,8 +328,8 @@
{ "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 },
{ "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 },
- { "mpn_mul", speed_mpn_mul, FLAG_SR_OPTIONAL },
- { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_SR_OPTIONAL },
+ { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL },
+ { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
{ "mpn_sqr_basecase", speed_mpn_sqr_basecase },
#if HAVE_NATIVE_mpn_sqr_diagonal
{ "mpn_sqr_diagonal", speed_mpn_sqr_diagonal },
@@ -346,22 +346,22 @@
{ "mpn_toom4_sqr", speed_mpn_toom4_sqr },
{ "mpn_toom6_sqr", speed_mpn_toom6_sqr },
{ "mpn_toom8_sqr", speed_mpn_toom8_sqr },
- { "mpn_toom22_mul", speed_mpn_toom22_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom33_mul", speed_mpn_toom33_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom44_mul", speed_mpn_toom44_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom6h_mul", speed_mpn_toom6h_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom8h_mul", speed_mpn_toom8h_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom32_mul", speed_mpn_toom32_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom42_mul", speed_mpn_toom42_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom43_mul", speed_mpn_toom43_mul, FLAG_SR_OPTIONAL },
- { "mpn_toom63_mul", speed_mpn_toom63_mul, FLAG_SR_OPTIONAL },
- { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul, FLAG_SR_OPTIONAL},
+ { "mpn_toom22_mul", speed_mpn_toom22_mul },
+ { "mpn_toom33_mul", speed_mpn_toom33_mul },
+ { "mpn_toom44_mul", speed_mpn_toom44_mul },
+ { "mpn_toom6h_mul", speed_mpn_toom6h_mul },
+ { "mpn_toom8h_mul", speed_mpn_toom8h_mul },
+ { "mpn_toom32_mul", speed_mpn_toom32_mul },
+ { "mpn_toom42_mul", speed_mpn_toom42_mul },
+ { "mpn_toom43_mul", speed_mpn_toom43_mul },
+ { "mpn_toom63_mul", speed_mpn_toom63_mul },
+ { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul },
{ "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
#if WANT_OLD_FFT_FULL
- { "mpn_mul_fft_full", speed_mpn_mul_fft_full, FLAG_SR_OPTIONAL},
+ { "mpn_mul_fft_full", speed_mpn_mul_fft_full },
{ "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
#endif
- { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_SR_OPTIONAL },
+ { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
{ "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
{ "mpn_sqrlo", speed_mpn_sqrlo },
@@ -576,7 +576,6 @@
struct choice_t {
const struct routine_t *p;
mp_limb_t r;
- double size_ratio;
double scale;
double time;
int no_time;
@@ -671,7 +670,6 @@
for (i = 0; i < num_choices; i++)
{
s->r = choice[i].r;
- s->size_ratio = choice[i].size_ratio;
choice[i].time = speed_measure (choice[i].p->fun, s);
choice[i].no_time = (choice[i].time == -1.0);
if (! choice[i].no_time)
@@ -1013,17 +1011,6 @@
return n;
}
-double slash_r_string (const char *s)
-{
- char *end;
- double r = strtod(s, &end);
- if (s[0] == '\0' || end[0] != '\0' || r > 1.0 || r < 0.0)
- {
- fprintf (stderr, "invalid /r parameter: %s\n", s);
- exit (1);
- }
- return r;
-}
void
routine_find (struct choice_t *c, const char *s_orig)
@@ -1055,7 +1042,7 @@
{
/* match, with a .r parameter */
- if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL|FLAG_SR_OPTIONAL)))
+ if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
{
fprintf (stderr,
"Choice %s bad: doesn't take a \".<r>\" parameter\n",
@@ -1067,22 +1054,6 @@
c->r = r_string (s + nlen + 1);
return;
}
- if (s[nlen] == '/')
- {
- /* match, with a /r parameter */
-
- if (! (routine[i].flag & (FLAG_SR_OPTIONAL)))
- {
- fprintf (stderr,
- "Choice %s bad: doesn't take a \"/<r>\" parameter\n",
- s_orig);
- exit (1);
- }
-
- c->p = &routine[i];
- c->size_ratio = slash_r_string (s + nlen + 1);
- return;
- }
if (s[nlen] == '\0')
{
@@ -1154,8 +1125,6 @@
printf ("\t%s.r\n", routine[i].name);
else if (routine[i].flag & FLAG_R_OPTIONAL)
printf ("\t%s (optional .r)\n", routine[i].name);
- else if (routine[i].flag & FLAG_SR_OPTIONAL)
- printf ("\t%s (optional .r or /r)\n", routine[i].name);
else
printf ("\t%s\n", routine[i].name);
}
@@ -1166,8 +1135,6 @@
printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
printf ("\n");
- printf ("Routines with an optional \"/r\" take a decimal ratio, for example mpn_mul/0.7.\n");
- printf ("\n");
printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
printf ("The fastest routine at each size is marked with a # (free form output only).\n");
printf ("\n");
diff -r c3333f0d502a -r f6073853d16a tune/speed.h
--- a/tune/speed.h Mon Oct 02 15:32:45 2023 +0200
+++ b/tune/speed.h Mon Oct 16 08:16:06 2023 +0200
@@ -113,7 +113,6 @@
mp_ptr yp; /* second argument */
mp_size_t size; /* size of both arguments */
mp_limb_t r; /* user supplied parameter */
- double size_ratio; /* ratio for smaller to larger size, e.g., for mpn_mul */
mp_size_t align_xp; /* alignment of xp */
mp_size_t align_yp; /* alignment of yp */
mp_size_t align_wp; /* intended alignment of wp */
@@ -1123,12 +1122,9 @@
double t; \
TMP_DECL; \
\
- size1 = s->size_ratio * s->size; \
- if (size1 == 0) \
- { \
- size1 = (s->r == 0 ? s->size : s->r); \
- if (size1 < 0) size1 = -size1 - s->size; \
- } \
+ size1 = (s->r == 0 ? s->size : s->r); \
+ if (size1 < 0) size1 = -size1 - s->size; \
+ \
SPEED_RESTRICT_COND (size1 >= 1); \
SPEED_RESTRICT_COND (s->size >= size1); \
\
@@ -1474,47 +1470,6 @@
return t; \
}
-#define SPEED_ROUTINE_MPN_MUL_TSPACE(function, itch, default_bn, valid) \
- { \
- mp_ptr wp, tspace; \
- mp_size_t an, bn, tn; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- an = s->size; \
- bn = s->size_ratio * s->size; \
- if (bn == 0) \
- { \
- bn = (s->r == 0 ? default_bn : s->r); \
- if (bn < 0) bn = -bn - an; \
- } \
- SPEED_RESTRICT_COND (bn >= 1); \
- SPEED_RESTRICT_COND (an >= bn); \
- SPEED_RESTRICT_COND (valid); \
- tn = itch(an, bn); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, an + bn, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tspace, tn, s->align_wp2); \
- \
- speed_operand_src (s, s->xp, an); \
- speed_operand_src (s, s->yp, bn); \
- speed_operand_dst (s, wp, an + bn); \
- speed_operand_dst (s, tspace, tn); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function(wp, s->xp, an, s->yp, bn, tspace); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \
{ \
mp_ptr wp, tspace; \
@@ -1545,50 +1500,59 @@
return t; \
}
-#define SPEED_ROUTINE_MPN_TOOM22_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_TSPACE \
- (function, mpn_toom22_mul_itch, \
- an, 5*bn > 4*an)
More information about the gmp-commit
mailing list