[Gmp-commit] /var/hg/gmp: Update benchmark and tuning of toom*_mul to use siz...

mercurial at gmplib.org mercurial at gmplib.org
Wed Dec 27 19:35:45 CET 2023


details:   /var/hg/gmp/rev/1c566525476a
changeset: 18464:1c566525476a
user:      Niels Möller <nisse at lysator.liu.se>
date:      Wed Dec 27 19:35:36 2023 +0100
description:
Update benchmark and tuning of toom*_mul to use size_ratio.

diffstat:

 ChangeLog     |   30 ++++++++++++
 tune/common.c |   50 ++++----------------
 tune/speed.c  |   20 ++++---
 tune/speed.h  |  141 ++++++++++++++++++++-------------------------------------
 tune/tuneup.c |   26 +++++++---
 5 files changed, 120 insertions(+), 147 deletions(-)

diffs (truncated from 400 to 300 lines):

diff -r 693b9f23a22b -r 1c566525476a ChangeLog
--- a/ChangeLog	Sat Dec 23 17:06:07 2023 +0100
+++ b/ChangeLog	Wed Dec 27 19:35:36 2023 +0100
@@ -1,3 +1,33 @@
+2023-12-27  Niels Möller  <nisse at lysator.liu.se>
+
+	* tune/tuneup.c (tune_mul): Updated to measure all
+	MUL_TOOM*_TO_TOOM*_THRESHOLD using size_ratio.
+
+	* tune/speed.h: Update declarations.
+	(SPEED_ROUTINE_MPN_MUL_TSPACE): Add support for size_ratio. New
+	arguments default_bn and valid.
+	(SPEED_ROUTINE_MPN_TOOM22_MUL, SPEED_ROUTINE_MPN_TOOM33_MUL)
+	(SPEED_ROUTINE_MPN_TOOM44_MUL, SPEED_ROUTINE_MPN_TOOM6H_MUL)
+	(SPEED_ROUTINE_MPN_TOOM8H_MUL, SPEED_ROUTINE_MPN_TOOM32_MUL)
+	(SPEED_ROUTINE_MPN_TOOM42_MUL, SPEED_ROUTINE_MPN_TOOM43_MUL)
+	(SPEED_ROUTINE_MPN_TOOM63_MUL): Updated for
+	SPEED_ROUTINE_MPN_MUL_TSPACE changes.
+	(SPEED_ROUTINE_MPN_TOOM53_MUL, SPEED_ROUTINE_MPN_TOOM54_MUL): New.
+	(SPEED_ROUTINE_MPN_TOOM*_FOR_TOOM*_MUL): All deleted.
+
+	* tune/speed.c (routine): Add FLAG_SR_OPTIONAL to mpn_toom*_mul
+	functions. Add mpn_toom53_mul and mpn_toom54_mul,
+
+	* tune/common.c (speed_mpn_toom53_mul, speed_mpn_toom54_mul): New functions.
+	(speed_mpn_toom32_for_toom43_mul)
+	(speed_mpn_toom43_for_toom32_mul)
+	(speed_mpn_toom32_for_toom53_mul)
+	(speed_mpn_toom53_for_toom32_mul)
+	(speed_mpn_toom42_for_toom53_mul)
+	(speed_mpn_toom53_for_toom42_mul)
+	(speed_mpn_toom43_for_toom54_mul)
+	(speed_mpn_toom54_for_toom43_mul): Deleted functions.
+
 2023-11-22  Niels Möller  <nisse at lysator.liu.se>
 
 	* tune/speed.h (SPEED_ROUTINE_MPN_MUL_TSPACE): Rename macro, was
diff -r 693b9f23a22b -r 1c566525476a tune/common.c
--- a/tune/common.c	Sat Dec 23 17:06:07 2023 +0100
+++ b/tune/common.c	Wed Dec 27 19:35:36 2023 +0100
@@ -1386,50 +1386,20 @@
   SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul);
 }
 double
+speed_mpn_toom53_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM53_MUL (mpn_toom53_mul);
+}
+double
+speed_mpn_toom54_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM54_MUL (mpn_toom54_mul);
+}
+double
 speed_mpn_toom63_mul (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
 }
-double
-speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
-}
-double
-speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
-}
-double
-speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
-}
-double
-speed_mpn_toom43_for_toom54_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom54_for_toom43_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul);
-}
 
 double
 speed_mpn_nussbaumer_mul (struct speed_params *s)
diff -r 693b9f23a22b -r 1c566525476a tune/speed.c
--- a/tune/speed.c	Sat Dec 23 17:06:07 2023 +0100
+++ b/tune/speed.c	Wed Dec 27 19:35:36 2023 +0100
@@ -346,15 +346,17 @@
   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
   { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
   { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
-  { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
-  { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
-  { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
-  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
-  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
-  { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
-  { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
-  { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
-  { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
+  { "mpn_toom22_mul",    speed_mpn_toom22_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom33_mul",    speed_mpn_toom33_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom44_mul",    speed_mpn_toom44_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom32_mul",    speed_mpn_toom32_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom42_mul",    speed_mpn_toom42_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom43_mul",    speed_mpn_toom43_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom53_mul",    speed_mpn_toom53_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom54_mul",    speed_mpn_toom54_mul, FLAG_SR_OPTIONAL },
+  { "mpn_toom63_mul",    speed_mpn_toom63_mul, FLAG_SR_OPTIONAL },
   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
   { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
 #if WANT_OLD_FFT_FULL
diff -r 693b9f23a22b -r 1c566525476a tune/speed.h
--- a/tune/speed.h	Sat Dec 23 17:06:07 2023 +0100
+++ b/tune/speed.h	Wed Dec 27 19:35:36 2023 +0100
@@ -375,15 +375,9 @@
 double speed_mpn_toom32_mul (struct speed_params *);
 double speed_mpn_toom42_mul (struct speed_params *);
 double speed_mpn_toom43_mul (struct speed_params *);
+double speed_mpn_toom53_mul (struct speed_params *);
+double speed_mpn_toom54_mul (struct speed_params *);
 double speed_mpn_toom63_mul (struct speed_params *);
-double speed_mpn_toom32_for_toom43_mul (struct speed_params *);
-double speed_mpn_toom43_for_toom32_mul (struct speed_params *);
-double speed_mpn_toom32_for_toom53_mul (struct speed_params *);
-double speed_mpn_toom53_for_toom32_mul (struct speed_params *);
-double speed_mpn_toom42_for_toom53_mul (struct speed_params *);
-double speed_mpn_toom53_for_toom42_mul (struct speed_params *);
-double speed_mpn_toom43_for_toom54_mul (struct speed_params *);
-double speed_mpn_toom54_for_toom43_mul (struct speed_params *);
 double speed_mpn_toom42_mulmid (struct speed_params *);
 double speed_mpn_mulmod_bnm1 (struct speed_params *);
 double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
@@ -1474,29 +1468,40 @@
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_MUL_TSPACE(call, tsize, minsize)		\
+#define SPEED_ROUTINE_MPN_MUL_TSPACE(function, itch, default_bn, valid)	\
   {									\
     mp_ptr    wp, tspace;						\
+    mp_size_t an, bn, tn;						\
     unsigned  i;							\
     double    t;							\
     TMP_DECL;								\
 									\
-    SPEED_RESTRICT_COND (s->size >= minsize);				\
+    an = s->size;							\
+    bn = s->size_ratio * s->size;					\
+    if (bn == 0)							\
+      {									\
+	bn = (s->r == 0 ? default_bn : s->r);				\
+	if (bn < 0) bn = -bn - an;					\
+      }									\
+    SPEED_RESTRICT_COND (bn >= 1);					\
+    SPEED_RESTRICT_COND (an >= bn);					\
+    SPEED_RESTRICT_COND (valid);					\
+    tn = itch(an, bn);							\
 									\
     TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2);		\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, wp, 2*s->size);				\
-    speed_operand_dst (s, tspace, tsize);				\
+    SPEED_TMP_ALLOC_LIMBS (wp, an + bn, s->align_wp);			\
+    SPEED_TMP_ALLOC_LIMBS (tspace, tn, s->align_wp2);			\
+									\
+    speed_operand_src (s, s->xp, an);					\
+    speed_operand_src (s, s->yp, bn);					\
+    speed_operand_dst (s, wp, an + bn);					\
+    speed_operand_dst (s, tspace, tn);					\
     speed_cache_fill (s);						\
 									\
     speed_starttime ();							\
     i = s->reps;							\
     do									\
-      call;								\
+      function (wp, s->xp, an, s->yp, bn, tspace);			\
     while (--i != 0);							\
     t = speed_endtime ();						\
 									\
@@ -1506,102 +1511,58 @@
 
 #define SPEED_ROUTINE_MPN_TOOM22_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom22_mul_itch (s->size, s->size),				\
-     MPN_TOOM22_MUL_MINSIZE)
+    (function, mpn_toom22_mul_itch,					\
+     an, 5*bn > 4*an)
 
 #define SPEED_ROUTINE_MPN_TOOM33_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom33_mul_itch (s->size, s->size),				\
-     MPN_TOOM33_MUL_MINSIZE)
+    (function, mpn_toom33_mul_itch,					\
+     an, bn > 2 * ((an+2) / 3))
 
 #define SPEED_ROUTINE_MPN_TOOM44_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom44_mul_itch (s->size, s->size),				\
-     MPN_TOOM44_MUL_MINSIZE)
+    (function, mpn_toom44_mul_itch,					\
+     an, bn > 3*((an + 3) >> 2))
 
 #define SPEED_ROUTINE_MPN_TOOM6H_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom6h_mul_itch (s->size, s->size),				\
-     MPN_TOOM6H_MUL_MINSIZE)
+  (function, mpn_toom6h_mul_itch,					\
+   an, bn >= 42 && ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17)))
 
 #define SPEED_ROUTINE_MPN_TOOM8H_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom8h_mul_itch (s->size, s->size),				\
-     MPN_TOOM8H_MUL_MINSIZE)
+    (function, mpn_toom8h_mul_itch,					\
+     an, (bn >= 86) && an*4 <= bn*11)
 
 #define SPEED_ROUTINE_MPN_TOOM32_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace),		\
-     mpn_toom32_mul_itch (s->size, 2*s->size/3),			\
-     MPN_TOOM32_MUL_MINSIZE)
+    (function, mpn_toom32_mul_itch,					\
+     2*an / 3, bn + 2 <= an && an + 6 <= 3*bn)
 
 #define SPEED_ROUTINE_MPN_TOOM42_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
-     mpn_toom42_mul_itch (s->size, s->size/2),				\
-     MPN_TOOM42_MUL_MINSIZE)
+  (function, mpn_toom42_mul_itch,					\
+   an / 2, an >= 7 && bn >= 2 && an > 3*((bn+1)/2) && bn > ((an+3)/4))
 
 #define SPEED_ROUTINE_MPN_TOOM43_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace),		\
-     mpn_toom43_mul_itch (s->size, s->size*3/4),			\
-     MPN_TOOM43_MUL_MINSIZE)
+    (function, mpn_toom43_mul_itch,					\
+     an*3/4, an >= 7 && bn >= 5 && an > 3 * ((bn+2)/3) && bn > 2 * ((an+3)/4))
+
+#define SPEED_ROUTINE_MPN_TOOM53_MUL(function)				\
+  SPEED_ROUTINE_MPN_MUL_TSPACE						\
+    (function, mpn_toom53_mul_itch,					\
+     an*3/5, an >= 17 && bn >= 5 && an > 4 * ((bn+2)/3) && bn > 2 * ((an+4)/5))
+
+#define SPEED_ROUTINE_MPN_TOOM54_MUL(function)				\
+  SPEED_ROUTINE_MPN_MUL_TSPACE						\
+    (function, mpn_toom54_mul_itch,					\
+     an*4/5, an >= 17 && bn >= 10 && an > 4 * ((bn+3)/4) && bn > 3 * ((an+4)/5))
 
 #define SPEED_ROUTINE_MPN_TOOM63_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
-     mpn_toom63_mul_itch (s->size, s->size/2),				\
-     MPN_TOOM63_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),	\
-     mpn_toom32_mul_itch (s->size, 17*s->size/24),			\
-     MPN_TOOM32_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),	\
-     mpn_toom43_mul_itch (s->size, 17*s->size/24),			\
-     MPN_TOOM43_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_TSPACE						\
-    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),	\
-     mpn_toom32_mul_itch (s->size, 19*s->size/30),			\


More information about the gmp-commit mailing list