[Gmp-commit] /var/hg/gmp: Proper support for .r and /r for benchmarks of mpn_...
mercurial at gmplib.org
mercurial at gmplib.org
Wed Sep 27 20:59:00 CEST 2023
details: /var/hg/gmp/rev/031654cf3098
changeset: 18456:031654cf3098
user: Niels Möller <nisse at lysator.liu.se>
date: Wed Sep 27 20:58:47 2023 +0200
description:
Proper support for .r and /r for benchmarks of mpn_toom*_mul.
diffstat:
ChangeLog | 18 ++++++++
tune/common.c | 10 ++--
tune/speed.h | 122 ++++++++++++++++++++++++++++++++++++---------------------
3 files changed, 100 insertions(+), 50 deletions(-)
diffs (208 lines):
diff -r 2d0b13e266c7 -r 031654cf3098 ChangeLog
--- a/ChangeLog Wed Sep 27 08:55:14 2023 +0200
+++ b/ChangeLog Wed Sep 27 20:58:47 2023 +0200
@@ -1,3 +1,21 @@
+2023-09-27 Niels Möller <nisse at lysator.liu.se>
+
+ * tune/speed.h (SPEED_ROUTINE_MPN_MUL_TSPACE): New macro,
+ supporting .r and /r for benchmarking unbalanced operation.
+ (SPEED_ROUTINE_MPN_TOOM22_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE,
+ and renamed from...
+ (SPEED_ROUTINE_MPN_TOOM22_MUL_N): ... old name.
+ (SPEED_ROUTINE_MPN_TOOM33_MUL, SPEED_ROUTINE_MPN_TOOM44_MUL)
+ (SPEED_ROUTINE_MPN_TOOM6H_MUL, SPEED_ROUTINE_MPN_TOOM8H_MUL):
+ Analogous change and rename.
+ (SPEED_ROUTINE_MPN_TOOM32_MUL, SPEED_ROUTINE_MPN_TOOM42_MUL)
+ (SPEED_ROUTINE_MPN_TOOM43_MUL)
+ (SPEED_ROUTINE_MPN_TOOM63_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE.
+
+ * tune/common.c (speed_mpn_toom22_mul, speed_mpn_toom33_mul)
+ (speed_mpn_toom44_mul, speed_mpn_toom6h_mul)
+ (speed_mpn_toom8h_mul): Updated for rename of corresponding macros.
+
2023-09-24 Niels Möller <nisse at lysator.liu.se>
* tune/speed.h (struct speed_params): Add size_ratio field.
diff -r 2d0b13e266c7 -r 031654cf3098 tune/common.c
--- a/tune/common.c Wed Sep 27 08:55:14 2023 +0200
+++ b/tune/common.c Wed Sep 27 20:58:47 2023 +0200
@@ -1347,27 +1347,27 @@
double
speed_mpn_toom22_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
+ SPEED_ROUTINE_MPN_TOOM22_MUL (mpn_toom22_mul);
}
double
speed_mpn_toom33_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
+ SPEED_ROUTINE_MPN_TOOM33_MUL (mpn_toom33_mul);
}
double
speed_mpn_toom44_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
+ SPEED_ROUTINE_MPN_TOOM44_MUL (mpn_toom44_mul);
}
double
speed_mpn_toom6h_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
+ SPEED_ROUTINE_MPN_TOOM6H_MUL (mpn_toom6h_mul);
}
double
speed_mpn_toom8h_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
+ SPEED_ROUTINE_MPN_TOOM8H_MUL (mpn_toom8h_mul);
}
double
diff -r 2d0b13e266c7 -r 031654cf3098 tune/speed.h
--- a/tune/speed.h Wed Sep 27 08:55:14 2023 +0200
+++ b/tune/speed.h Wed Sep 27 20:58:47 2023 +0200
@@ -1474,6 +1474,47 @@
return t; \
}
+#define SPEED_ROUTINE_MPN_MUL_TSPACE(function, itch, default_bn, valid) \
+ { \
+ mp_ptr wp, tspace; \
+ mp_size_t an, bn, tn; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ an = s->size; \
+ bn = s->size_ratio * s->size; \
+ if (bn == 0) \
+ { \
+ bn = (s->r == 0 ? default_bn : s->r); \
+ if (bn < 0) bn = -bn - an; \
+ } \
+ SPEED_RESTRICT_COND (bn >= 1); \
+ SPEED_RESTRICT_COND (an >= bn); \
+ SPEED_RESTRICT_COND (valid); \
+ tn = itch(an, bn); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, an + bn, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tspace, tn, s->align_wp2); \
+ \
+ speed_operand_src (s, s->xp, an); \
+ speed_operand_src (s, s->yp, bn); \
+ speed_operand_dst (s, wp, an + bn); \
+ speed_operand_dst (s, tspace, tn); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function(wp, s->xp, an, s->yp, bn, tspace); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \
{ \
mp_ptr wp, tspace; \
@@ -1504,59 +1545,50 @@
return t; \
}
-#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom22_mul_itch (s->size, s->size), \
- MPN_TOOM22_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom33_mul_itch (s->size, s->size), \
- MPN_TOOM33_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom44_mul_itch (s->size, s->size), \
- MPN_TOOM44_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom6h_mul_itch (s->size, s->size), \
- MPN_TOOM6H_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom8h_mul_itch (s->size, s->size), \
- MPN_TOOM8H_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM22_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom22_mul_itch, \
+ an, 5*bn > 4*an)
+
+#define SPEED_ROUTINE_MPN_TOOM33_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom33_mul_itch, \
+ an, bn > 2 * ((an+2) / 3))
+
+#define SPEED_ROUTINE_MPN_TOOM44_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom44_mul_itch, \
+ an, bn > 3*((an + 3) >> 2))
+
+#define SPEED_ROUTINE_MPN_TOOM6H_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom6h_mul_itch, \
+ an, bn >= 42 && ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17)))
+
+#define SPEED_ROUTINE_MPN_TOOM8H_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom8h_mul_itch, \
+ an, (bn >= 86) && an*4 <= bn*11)
#define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace), \
- mpn_toom32_mul_itch (s->size, 2*s->size/3), \
- MPN_TOOM32_MUL_MINSIZE)
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom32_mul_itch, \
+ 2*an / 3, bn + 2 <= an && an + 6 <= 3*bn)
#define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
- mpn_toom42_mul_itch (s->size, s->size/2), \
- MPN_TOOM42_MUL_MINSIZE)
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom42_mul_itch, \
+ an / 2, an >= 7 && bn >= 2 && an > 3*((bn+1)/2) && bn > ((an+3)/4))
#define SPEED_ROUTINE_MPN_TOOM43_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace), \
- mpn_toom43_mul_itch (s->size, s->size*3/4), \
- MPN_TOOM43_MUL_MINSIZE)
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom43_mul_itch, \
+ an*3/4, an >= 7 && bn >= 5 && an > 3 * ((bn+2)/3) && bn > 2 * ((an+3)/4))
#define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
- mpn_toom63_mul_itch (s->size, s->size/2), \
- MPN_TOOM63_MUL_MINSIZE)
+ SPEED_ROUTINE_MPN_MUL_TSPACE \
+ (function, mpn_toom63_mul_itch, \
+ an/2, an >= 26 && bn >= 5 && an > 5*((bn+2)/3) && bn > 2*((an+5)/6))
#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
More information about the gmp-commit
mailing list