[Gmp-commit] /var/hg/gmp: Revert recent speed changes, since they break tuneup.

mercurial at gmplib.org mercurial at gmplib.org
Mon Oct 16 08:16:30 CEST 2023


details:   /var/hg/gmp/rev/f6073853d16a
changeset: 18458:f6073853d16a
user:      Niels Möller <nisse at lysator.liu.se>
date:      Mon Oct 16 08:16:06 2023 +0200
description:
Revert recent speed changes, since they break tuneup.

diffstat:

 ChangeLog     |   32 --------------
 tune/common.c |   10 ++--
 tune/speed.c  |   65 +++++++---------------------
 tune/speed.h  |  132 +++++++++++++++++++++------------------------------------
 4 files changed, 69 insertions(+), 170 deletions(-)

diffs (truncated from 388 to 300 lines):

diff -r c3333f0d502a -r f6073853d16a ChangeLog
--- a/ChangeLog	Mon Oct 02 15:32:45 2023 +0200
+++ b/ChangeLog	Mon Oct 16 08:16:06 2023 +0200
@@ -1,35 +1,3 @@
-2023-09-27  Niels Möller  <nisse at lysator.liu.se>
-
-	* tune/speed.h (SPEED_ROUTINE_MPN_MUL_TSPACE): New macro,
-	supporting .r and /r for benchmarking unbalanced operation.
-	(SPEED_ROUTINE_MPN_TOOM22_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE,
-	and renamed from...
-	(SPEED_ROUTINE_MPN_TOOM22_MUL_N): ... old name.
-	(SPEED_ROUTINE_MPN_TOOM33_MUL, SPEED_ROUTINE_MPN_TOOM44_MUL)
-	(SPEED_ROUTINE_MPN_TOOM6H_MUL, SPEED_ROUTINE_MPN_TOOM8H_MUL):
-	Analogous change and rename.
-	(SPEED_ROUTINE_MPN_TOOM32_MUL, SPEED_ROUTINE_MPN_TOOM42_MUL)
-	(SPEED_ROUTINE_MPN_TOOM43_MUL)
-	(SPEED_ROUTINE_MPN_TOOM63_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE.
-
-	* tune/common.c (speed_mpn_toom22_mul, speed_mpn_toom33_mul)
-	(speed_mpn_toom44_mul, speed_mpn_toom6h_mul)
-	(speed_mpn_toom8h_mul): Updated for rename of corresponding macros.
-
-2023-09-24  Niels Möller  <nisse at lysator.liu.se>
-
-	* tune/speed.h (struct speed_params): Add size_ratio field.
-	(SPEED_ROUTINE_MPN_MUL): If size_ratio > 0.0, use it to set size1.
-
-	* tune/speed.c (FLAG_SR_OPTIONAL): New flag.
-	(FLAG_RSIZE): Deleted unused flag.
-	(routine): Add FLAG_SR_OPTIONAL to all unbalanced multiplication
-	functions.
-	(slash_r_string): New function, parse float ratio.
-	(run_one): Copy size_ratio field.
-	(routine_find): Handle optional /r.
-	(usage): Deisplay help on /r option.
-
 2023-09-05 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/toom{54,63,6h,8h}_mul.c, mpn/generic/toom_couple_handling.c,
diff -r c3333f0d502a -r f6073853d16a tune/common.c
--- a/tune/common.c	Mon Oct 02 15:32:45 2023 +0200
+++ b/tune/common.c	Mon Oct 16 08:16:06 2023 +0200
@@ -1347,27 +1347,27 @@
 double
 speed_mpn_toom22_mul (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM22_MUL (mpn_toom22_mul);
+  SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
 }
 double
 speed_mpn_toom33_mul (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM33_MUL (mpn_toom33_mul);
+  SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
 }
 double
 speed_mpn_toom44_mul (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM44_MUL (mpn_toom44_mul);
+  SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
 }
 double
 speed_mpn_toom6h_mul (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM6H_MUL (mpn_toom6h_mul);
+  SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
 }
 double
 speed_mpn_toom8h_mul (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM8H_MUL (mpn_toom8h_mul);
+  SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
 }
 
 double
diff -r c3333f0d502a -r f6073853d16a tune/speed.c
--- a/tune/speed.c	Mon Oct 02 15:32:45 2023 +0200
+++ b/tune/speed.c	Mon Oct 16 08:16:06 2023 +0200
@@ -130,7 +130,7 @@
 
 #define FLAG_R            (1<<0)  /* require ".r" */
 #define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
-#define FLAG_SR_OPTIONAL  (1<<2)  /* optional ".r" or "/r" */
+#define FLAG_RSIZE        (1<<2)
 #define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
 
 const struct routine_t {
@@ -328,8 +328,8 @@
   { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
   { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
 
-  { "mpn_mul",           speed_mpn_mul,         FLAG_SR_OPTIONAL },
-  { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_SR_OPTIONAL },
+  { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
+  { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
 #if HAVE_NATIVE_mpn_sqr_diagonal
   { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
@@ -346,22 +346,22 @@
   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
   { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
   { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
-  { "mpn_toom22_mul",    speed_mpn_toom22_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom33_mul",    speed_mpn_toom33_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom44_mul",    speed_mpn_toom44_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom32_mul",    speed_mpn_toom32_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom42_mul",    speed_mpn_toom42_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom43_mul",    speed_mpn_toom43_mul, FLAG_SR_OPTIONAL },
-  { "mpn_toom63_mul",    speed_mpn_toom63_mul, FLAG_SR_OPTIONAL },
-  { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul, FLAG_SR_OPTIONAL},
+  { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
+  { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
+  { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
+  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
+  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
+  { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
+  { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
+  { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
+  { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
+  { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
   { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
 #if WANT_OLD_FFT_FULL
-  { "mpn_mul_fft_full",      speed_mpn_mul_fft_full, FLAG_SR_OPTIONAL},
+  { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
 #endif
-  { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_SR_OPTIONAL },
+  { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
 
   { "mpn_sqrlo",          speed_mpn_sqrlo           },
@@ -576,7 +576,6 @@
 struct choice_t {
   const struct routine_t  *p;
   mp_limb_t               r;
-  double                  size_ratio;
   double                  scale;
   double                  time;
   int                     no_time;
@@ -671,7 +670,6 @@
   for (i = 0; i < num_choices; i++)
     {
       s->r = choice[i].r;
-      s->size_ratio = choice[i].size_ratio;
       choice[i].time = speed_measure (choice[i].p->fun, s);
       choice[i].no_time = (choice[i].time == -1.0);
       if (! choice[i].no_time)
@@ -1013,17 +1011,6 @@
   return n;
 }
 
-double slash_r_string (const char *s)
-{
-  char *end;
-  double r = strtod(s, &end);
-  if (s[0] == '\0' || end[0] != '\0' || r > 1.0 || r < 0.0)
-    {
-      fprintf (stderr, "invalid /r parameter: %s\n", s);
-      exit (1);
-    }
-  return r;
-}
 
 void
 routine_find (struct choice_t *c, const char *s_orig)
@@ -1055,7 +1042,7 @@
         {
           /* match, with a .r parameter */
 
-          if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL|FLAG_SR_OPTIONAL)))
+          if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
             {
               fprintf (stderr,
                        "Choice %s bad: doesn't take a \".<r>\" parameter\n",
@@ -1067,22 +1054,6 @@
           c->r = r_string (s + nlen + 1);
           return;
         }
-      if (s[nlen] == '/')
-        {
-          /* match, with a /r parameter */
-
-          if (! (routine[i].flag & (FLAG_SR_OPTIONAL)))
-            {
-              fprintf (stderr,
-                       "Choice %s bad: doesn't take a \"/<r>\" parameter\n",
-                       s_orig);
-              exit (1);
-            }
-
-          c->p = &routine[i];
-          c->size_ratio = slash_r_string (s + nlen + 1);
-          return;
-        }
 
       if (s[nlen] == '\0')
         {
@@ -1154,8 +1125,6 @@
         printf ("\t%s.r\n", routine[i].name);
       else if (routine[i].flag & FLAG_R_OPTIONAL)
         printf ("\t%s (optional .r)\n", routine[i].name);
-      else if (routine[i].flag & FLAG_SR_OPTIONAL)
-        printf ("\t%s (optional .r or /r)\n", routine[i].name);
       else
         printf ("\t%s\n", routine[i].name);
     }
@@ -1166,8 +1135,6 @@
   printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
   printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
   printf ("\n");
-  printf ("Routines with an optional \"/r\" take a decimal ratio, for example mpn_mul/0.7.\n");
-  printf ("\n");
   printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
   printf ("The fastest routine at each size is marked with a # (free form output only).\n");
   printf ("\n");
diff -r c3333f0d502a -r f6073853d16a tune/speed.h
--- a/tune/speed.h	Mon Oct 02 15:32:45 2023 +0200
+++ b/tune/speed.h	Mon Oct 16 08:16:06 2023 +0200
@@ -113,7 +113,6 @@
   mp_ptr     yp;	/* second argument */
   mp_size_t  size;	/* size of both arguments */
   mp_limb_t  r;		/* user supplied parameter */
-  double     size_ratio; /* ratio for smaller to larger size, e.g., for mpn_mul */
   mp_size_t  align_xp;	/* alignment of xp */
   mp_size_t  align_yp;	/* alignment of yp */
   mp_size_t  align_wp;	/* intended alignment of wp */
@@ -1123,12 +1122,9 @@
     double    t;							\
     TMP_DECL;								\
 									\
-    size1 = s->size_ratio * s->size;					\
-    if (size1 == 0)							\
-      {									\
-	size1 = (s->r == 0 ? s->size : s->r);				\
-	if (size1 < 0) size1 = -size1 - s->size;			\
-      }									\
+    size1 = (s->r == 0 ? s->size : s->r);				\
+    if (size1 < 0) size1 = -size1 - s->size;				\
+									\
     SPEED_RESTRICT_COND (size1 >= 1);					\
     SPEED_RESTRICT_COND (s->size >= size1);				\
 									\
@@ -1474,47 +1470,6 @@
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_MUL_TSPACE(function, itch, default_bn, valid)	\
-  {									\
-    mp_ptr    wp, tspace;						\
-    mp_size_t an, bn, tn;						\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    an = s->size;							\
-    bn = s->size_ratio * s->size;					\
-    if (bn == 0)							\
-      {									\
-	bn = (s->r == 0 ? default_bn : s->r);				\
-	if (bn < 0) bn = -bn - an;					\
-      }									\
-    SPEED_RESTRICT_COND (bn >= 1);					\
-    SPEED_RESTRICT_COND (an >= bn);					\
-    SPEED_RESTRICT_COND (valid);					\
-    tn = itch(an, bn);							\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, an + bn, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tspace, tn, s->align_wp2);			\
-									\
-    speed_operand_src (s, s->xp, an);					\
-    speed_operand_src (s, s->yp, bn);					\
-    speed_operand_dst (s, wp, an + bn);					\
-    speed_operand_dst (s, tspace, tn);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function(wp, s->xp, an, s->yp, bn, tspace);			\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
 #define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize)		\
   {									\
     mp_ptr    wp, tspace;						\
@@ -1545,50 +1500,59 @@
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_TOOM22_MUL(function)				\
-  SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function, mpn_toom22_mul_itch,					\
-     an, 5*bn > 4*an)


More information about the gmp-commit mailing list