From mercurial at gmplib.org Tue Sep 5 18:33:02 2023 From: mercurial at gmplib.org (mercurial at gmplib.org) Date: Tue, 05 Sep 2023 18:33:02 +0200 Subject: [Gmp-commit] /var/hg/gmp: 2 new changesets Message-ID: details: /var/hg/gmp/rev/ef441e461f42 changeset: 18452:ef441e461f42 user: Marco Bodrato date: Tue Sep 05 18:16:53 2023 +0200 description: mpn/generic/toom*: unsigned to handle sign for evaluated couples details: /var/hg/gmp/rev/8225bdfc499f changeset: 18453:8225bdfc499f user: Marco Bodrato date: Tue Sep 05 18:32:26 2023 +0200 description: ChangeLog diffstat: ChangeLog | 7 +++++++ gmp-impl.h | 14 +++++++------- mpn/generic/toom54_mul.c | 2 +- mpn/generic/toom63_mul.c | 14 ++++++++------ mpn/generic/toom6h_mul.c | 2 +- mpn/generic/toom8h_mul.c | 2 +- mpn/generic/toom_couple_handling.c | 2 +- mpn/generic/toom_eval_dgr3_pm1.c | 8 +++++--- mpn/generic/toom_eval_dgr3_pm2.c | 8 +++++--- mpn/generic/toom_eval_pm1.c | 7 ++++--- mpn/generic/toom_eval_pm2.c | 11 ++++++----- mpn/generic/toom_eval_pm2exp.c | 7 ++++--- mpn/generic/toom_eval_pm2rexp.c | 7 ++++--- 13 files changed, 54 insertions(+), 37 deletions(-) diffs (truncated from 328 to 300 lines): diff -r e3cc6f9e9753 -r 8225bdfc499f ChangeLog --- a/ChangeLog Sun Aug 27 20:47:01 2023 +0200 +++ b/ChangeLog Tue Sep 05 18:32:26 2023 +0200 @@ -1,3 +1,10 @@ +2023-09-05 Marco Bodrato + + * mpn/generic/toom{54,63,6h,8h}_mul.c, mpn/generic/toom_couple_handling.c, + mpn/generic/toom_eval_{dgr3_pm1,dgr3_pm2,pm1,pm2,pm2exp,pm2rexp}.c: + Use unsigned to handle sign for evaluated couples of value in Toom code. + * gmp-impl.h: Update prototypes accordingly. + 2023-08-21 FX Coudert Marc Glisse diff -r e3cc6f9e9753 -r 8225bdfc499f gmp-impl.h --- a/gmp-impl.h Sun Aug 27 20:47:01 2023 +0200 +++ b/gmp-impl.h Tue Sep 05 18:32:26 2023 +0200 @@ -1481,25 +1481,25 @@ __GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr); #define mpn_toom_couple_handling __MPN(toom_couple_handling) -__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int); +__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, unsigned, mp_size_t, int, int); #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1) -__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); +__GMP_DECLSPEC unsigned mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2) -__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); +__GMP_DECLSPEC unsigned mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1) -__GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); +__GMP_DECLSPEC unsigned mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2) -__GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); +__GMP_DECLSPEC unsigned mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp) -__GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); +__GMP_DECLSPEC unsigned mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp) -__GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); +__GMP_DECLSPEC unsigned mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); #define mpn_toom22_mul __MPN(toom22_mul) __GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom54_mul.c --- a/mpn/generic/toom54_mul.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom54_mul.c Tue Sep 05 18:32:26 2023 +0200 @@ -61,7 +61,7 @@ mp_srcptr bp, mp_size_t bn, mp_ptr scratch) { mp_size_t n, s, t; - int sign; + unsigned sign; /***************************** decomposition *******************************/ #define a4 (ap + 4 * n) diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom63_mul.c --- a/mpn/generic/toom63_mul.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom63_mul.c Tue Sep 05 18:32:26 2023 +0200 @@ -37,8 +37,9 @@ #include "gmp-impl.h" -/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */ -static int +/* Stores |{ap,n}-{bp,n}| in {rp,n}. */ +/* It returns 0 or ~0, depending on the sign of the result. */ +static unsigned abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) { mp_limb_t x, y; @@ -57,7 +58,7 @@ else { mpn_sub_n (rp, bp, ap, n); - return ~0; + return ~ (unsigned) 0; } } rp[n] = 0; @@ -65,9 +66,10 @@ return 0; } -static int +/* It returns 0 or ~0, depending on the sign of the result rm. */ +static unsigned abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) { - int result; + unsigned result; result = abs_sub_n (rm, rp, rs, n); ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n)); return result; @@ -99,7 +101,7 @@ { mp_size_t n, s, t; mp_limb_t cy; - int sign; + unsigned sign; /***************************** decomposition *******************************/ #define a5 (ap + 5 * n) diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom6h_mul.c --- a/mpn/generic/toom6h_mul.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom6h_mul.c Tue Sep 05 18:32:26 2023 +0200 @@ -109,7 +109,7 @@ { mp_size_t n, s, t; int p, q, half; - int sign; + unsigned sign; /***************************** decomposition *******************************/ diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom8h_mul.c --- a/mpn/generic/toom8h_mul.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom8h_mul.c Tue Sep 05 18:32:26 2023 +0200 @@ -119,7 +119,7 @@ { mp_size_t n, s, t; int p, q, half; - int sign; + unsigned sign; /***************************** decomposition *******************************/ diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom_couple_handling.c --- a/mpn/generic/toom_couple_handling.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom_couple_handling.c Tue Sep 05 18:32:26 2023 +0200 @@ -45,7 +45,7 @@ */ void mpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np, - int nsign, mp_size_t off, int ps, int ns) + unsigned nsign, mp_size_t off, int ps, int ns) { if (nsign) { #ifdef HAVE_NATIVE_mpn_rsh1sub_n diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom_eval_dgr3_pm1.c --- a/mpn/generic/toom_eval_dgr3_pm1.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom_eval_dgr3_pm1.c Tue Sep 05 18:32:26 2023 +0200 @@ -37,11 +37,13 @@ #include "gmp-impl.h" -int +/* Evaluates a polynomial of degree 3, in the points +1 and -1. */ +/* It returns 0 or ~0, depending on the sign of the result xm1. */ +unsigned mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp) { - int neg; + unsigned neg; ASSERT (x3n > 0); ASSERT (x3n <= n); @@ -49,7 +51,7 @@ xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n); tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n); - neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0; + neg = - (unsigned) (mpn_cmp (xp1, tp, n + 1) < 0); #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom_eval_dgr3_pm2.c --- a/mpn/generic/toom_eval_dgr3_pm2.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom_eval_dgr3_pm2.c Tue Sep 05 18:32:26 2023 +0200 @@ -37,13 +37,15 @@ #include "gmp-impl.h" +/* Evaluates a polynomial of degree 3, in the points +2 and -2. */ /* Needs n+1 limbs of temporary storage. */ -int +/* It returns 0 or ~0, depending on the sign of the result xm2. */ +unsigned mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp) { mp_limb_t cy; - int neg; + unsigned neg; ASSERT (x3n > 0); ASSERT (x3n <= n); @@ -74,7 +76,7 @@ #endif mpn_lshift (tp, tp, n+1, 1); - neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0; + neg = - (unsigned) (mpn_cmp (xp2, tp, n + 1) < 0); #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom_eval_pm1.c --- a/mpn/generic/toom_eval_pm1.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom_eval_pm1.c Tue Sep 05 18:32:26 2023 +0200 @@ -38,12 +38,13 @@ #include "gmp-impl.h" /* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */ -int +/* It returns 0 or ~0, depending on the sign of the result xm1. */ +unsigned mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp) { unsigned i; - int neg; + unsigned neg; ASSERT (k >= 4); @@ -66,7 +67,7 @@ else ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn)); - neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0; + neg = - (unsigned) (mpn_cmp (xp1, tp, n + 1) < 0); #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom_eval_pm2.c --- a/mpn/generic/toom_eval_pm2.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom_eval_pm2.c Tue Sep 05 18:32:26 2023 +0200 @@ -65,12 +65,13 @@ /* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the points +2 and -2. */ -int +/* It returns 0 or ~0, depending on the sign of the result xm2. */ +unsigned mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp) { int i; - int neg; + unsigned neg; mp_limb_t cy; ASSERT (k >= 3); @@ -103,7 +104,7 @@ else ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1)); - neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0; + neg = (mpn_cmp (xp2, tp, n + 1) < 0); #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) @@ -122,9 +123,9 @@ ASSERT (xp2[n] < (1<<(k+2))-1); ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3); - neg ^= ((k & 1) - 1); + neg ^= 1 ^ k & 1; - return neg; + return - neg; } #undef DO_addlsh2 diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom_eval_pm2exp.c --- a/mpn/generic/toom_eval_pm2exp.c Sun Aug 27 20:47:01 2023 +0200 +++ b/mpn/generic/toom_eval_pm2exp.c Tue Sep 05 18:32:26 2023 +0200 @@ -38,13 +38,14 @@ #include "gmp-impl.h" /* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */ -int +/* It returns 0 or ~0, depending on the sign of the result xm2. */ +unsigned mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift, mp_ptr tp) { unsigned i; - int neg; + unsigned neg; #if HAVE_NATIVE_mpn_addlsh_n mp_limb_t cy; #endif @@ -101,7 +102,7 @@ mpn_add (xp2, xp2, n+1, xm2, hn+1); #endif /* !HAVE_NATIVE_mpn_addlsh_n */ - neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0; + neg = - (unsigned) (mpn_cmp (xp2, tp, n + 1) < 0); #if HAVE_NATIVE_mpn_add_n_sub_n if (neg) diff -r e3cc6f9e9753 -r 8225bdfc499f mpn/generic/toom_eval_pm2rexp.c From mercurial at gmplib.org Sun Sep 24 17:22:05 2023 From: mercurial at gmplib.org (mercurial at gmplib.org) Date: Sun, 24 Sep 2023 17:22:05 +0200 Subject: [Gmp-commit] /var/hg/gmp: Add optional /r (for size ratio) to speed program. Message-ID: details: /var/hg/gmp/rev/a12813260de7 changeset: 18454:a12813260de7 user: Niels M?ller date: Sun Sep 24 17:21:44 2023 +0200 description: Add optional /r (for size ratio) to speed program. diffstat: ChangeLog | 14 +++++++++++++ tune/speed.c | 63 +++++++++++++++++++++++++++++++++++++++++++++-------------- tune/speed.h | 11 +++++++-- 3 files changed, 70 insertions(+), 18 deletions(-) diffs (182 lines): diff -r 8225bdfc499f -r a12813260de7 ChangeLog --- a/ChangeLog Tue Sep 05 18:32:26 2023 +0200 +++ b/ChangeLog Sun Sep 24 17:21:44 2023 +0200 @@ -1,3 +1,17 @@ +2023-09-24 Niels M?ller + + * tune/speed.h (struct speed_params): Add size_ratio field. + (SPEED_ROUTINE_MPN_MUL): If size_ratio > 0.0, use it to set size1. + + * tune/speed.c (FLAG_SR_OPTIONAL): New flag. + (FLAG_RSIZE): Deleted unused flag. + (routine): Add FLAG_SR_OPTIONAL to all unbalanced multiplication + functions. + (slash_r_string): New function, parse float ratio. + (run_one): Copy size_ratio field. + (routine_find): Handle optional /r. + (usage): Deisplay help on /r option. + 2023-09-05 Marco Bodrato * mpn/generic/toom{54,63,6h,8h}_mul.c, mpn/generic/toom_couple_handling.c, diff -r 8225bdfc499f -r a12813260de7 tune/speed.c --- a/tune/speed.c Tue Sep 05 18:32:26 2023 +0200 +++ b/tune/speed.c Sun Sep 24 17:21:44 2023 +0200 @@ -130,7 +130,7 @@ #define FLAG_R (1<<0) /* require ".r" */ #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ -#define FLAG_RSIZE (1<<2) +#define FLAG_SR_OPTIONAL (1<<2) /* optional ".r" or "/r" */ #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ const struct routine_t { @@ -328,8 +328,8 @@ { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, - { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, - { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, + { "mpn_mul", speed_mpn_mul, FLAG_SR_OPTIONAL }, + { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_SR_OPTIONAL }, { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, #if HAVE_NATIVE_mpn_sqr_diagonal { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, @@ -346,22 +346,22 @@ { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, - { "mpn_toom22_mul", speed_mpn_toom22_mul }, - { "mpn_toom33_mul", speed_mpn_toom33_mul }, - { "mpn_toom44_mul", speed_mpn_toom44_mul }, - { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, - { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, - { "mpn_toom32_mul", speed_mpn_toom32_mul }, - { "mpn_toom42_mul", speed_mpn_toom42_mul }, - { "mpn_toom43_mul", speed_mpn_toom43_mul }, - { "mpn_toom63_mul", speed_mpn_toom63_mul }, - { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, + { "mpn_toom22_mul", speed_mpn_toom22_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom33_mul", speed_mpn_toom33_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom44_mul", speed_mpn_toom44_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom6h_mul", speed_mpn_toom6h_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom8h_mul", speed_mpn_toom8h_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom32_mul", speed_mpn_toom32_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom42_mul", speed_mpn_toom42_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom43_mul", speed_mpn_toom43_mul, FLAG_SR_OPTIONAL }, + { "mpn_toom63_mul", speed_mpn_toom63_mul, FLAG_SR_OPTIONAL }, + { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul, FLAG_SR_OPTIONAL}, { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, #if WANT_OLD_FFT_FULL - { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, + { "mpn_mul_fft_full", speed_mpn_mul_fft_full, FLAG_SR_OPTIONAL}, { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, #endif - { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, + { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_SR_OPTIONAL }, { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, { "mpn_sqrlo", speed_mpn_sqrlo }, @@ -576,6 +576,7 @@ struct choice_t { const struct routine_t *p; mp_limb_t r; + double size_ratio; double scale; double time; int no_time; @@ -670,6 +671,7 @@ for (i = 0; i < num_choices; i++) { s->r = choice[i].r; + s->size_ratio = choice[i].size_ratio; choice[i].time = speed_measure (choice[i].p->fun, s); choice[i].no_time = (choice[i].time == -1.0); if (! choice[i].no_time) @@ -1011,6 +1013,17 @@ return n; } +double slash_r_string (const char *s) +{ + char *end; + double r = strtod(s, &end); + if (s[0] == '\0' || end[0] != '\0' || r > 1.0 || r < 0.0) + { + fprintf (stderr, "invalid /r parameter: %s\n", s); + exit (1); + } + return r; +} void routine_find (struct choice_t *c, const char *s_orig) @@ -1054,6 +1067,22 @@ c->r = r_string (s + nlen + 1); return; } + if (s[nlen] == '/') + { + /* match, with a /r parameter */ + + if (! (routine[i].flag & (FLAG_SR_OPTIONAL))) + { + fprintf (stderr, + "Choice %s bad: doesn't take a \"/\" parameter\n", + s_orig); + exit (1); + } + + c->p = &routine[i]; + c->size_ratio = slash_r_string (s + nlen + 1); + return; + } if (s[nlen] == '\0') { @@ -1125,6 +1154,8 @@ printf ("\t%s.r\n", routine[i].name); else if (routine[i].flag & FLAG_R_OPTIONAL) printf ("\t%s (optional .r)\n", routine[i].name); + else if (routine[i].flag & FLAG_SR_OPTIONAL) + printf ("\t%s (optional .r or /r)\n", routine[i].name); else printf ("\t%s\n", routine[i].name); } @@ -1135,6 +1166,8 @@ printf ("Special forms for r are \"bits\" for a random N bit number, \"ones\" for\n"); printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); printf ("\n"); + printf ("Routines with an optional \"/r\" take a decimal ratio, for example mpn_mul/0.7.\n"); + printf ("\n"); printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); printf ("The fastest routine at each size is marked with a # (free form output only).\n"); printf ("\n"); diff -r 8225bdfc499f -r a12813260de7 tune/speed.h --- a/tune/speed.h Tue Sep 05 18:32:26 2023 +0200 +++ b/tune/speed.h Sun Sep 24 17:21:44 2023 +0200 @@ -113,6 +113,7 @@ mp_ptr yp; /* second argument */ mp_size_t size; /* size of both arguments */ mp_limb_t r; /* user supplied parameter */ + double size_ratio; /* ratio for smaller to larger size, e.g., for mpn_mul */ mp_size_t align_xp; /* alignment of xp */ mp_size_t align_yp; /* alignment of yp */ mp_size_t align_wp; /* intended alignment of wp */ @@ -1122,9 +1123,13 @@ double t; \ TMP_DECL; \ \ - size1 = (s->r == 0 ? s->size : s->r); \ - if (size1 < 0) size1 = -size1 - s->size; \ - \ + if (s->size_ratio > 0.0) \ + size1 = s->size_ratio * s->size; \ + else \ + { \ + size1 = (s->r == 0 ? s->size : s->r); \ + if (size1 < 0) size1 = -size1 - s->size; \ + } \ SPEED_RESTRICT_COND (size1 >= 1); \ SPEED_RESTRICT_COND (s->size >= size1); \ \ From mercurial at gmplib.org Wed Sep 27 08:55:20 2023 From: mercurial at gmplib.org (mercurial at gmplib.org) Date: Wed, 27 Sep 2023 08:55:20 +0200 Subject: [Gmp-commit] /var/hg/gmp: Fix bugs in previous change (speed /r option). Message-ID: details: /var/hg/gmp/rev/2d0b13e266c7 changeset: 18455:2d0b13e266c7 user: Niels M?ller date: Wed Sep 27 08:55:14 2023 +0200 description: Fix bugs in previous change (speed /r option). diffstat: tune/speed.c | 2 +- tune/speed.h | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diffs (27 lines): diff -r a12813260de7 -r 2d0b13e266c7 tune/speed.c --- a/tune/speed.c Sun Sep 24 17:21:44 2023 +0200 +++ b/tune/speed.c Wed Sep 27 08:55:14 2023 +0200 @@ -1055,7 +1055,7 @@ { /* match, with a .r parameter */ - if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) + if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL|FLAG_SR_OPTIONAL))) { fprintf (stderr, "Choice %s bad: doesn't take a \".\" parameter\n", diff -r a12813260de7 -r 2d0b13e266c7 tune/speed.h --- a/tune/speed.h Sun Sep 24 17:21:44 2023 +0200 +++ b/tune/speed.h Wed Sep 27 08:55:14 2023 +0200 @@ -1123,9 +1123,8 @@ double t; \ TMP_DECL; \ \ - if (s->size_ratio > 0.0) \ - size1 = s->size_ratio * s->size; \ - else \ + size1 = s->size_ratio * s->size; \ + if (size1 == 0) \ { \ size1 = (s->r == 0 ? s->size : s->r); \ if (size1 < 0) size1 = -size1 - s->size; \ From mercurial at gmplib.org Wed Sep 27 20:59:00 2023 From: mercurial at gmplib.org (mercurial at gmplib.org) Date: Wed, 27 Sep 2023 20:59:00 +0200 Subject: [Gmp-commit] /var/hg/gmp: Proper support for .r and /r for benchmarks of mpn_... Message-ID: details: /var/hg/gmp/rev/031654cf3098 changeset: 18456:031654cf3098 user: Niels M?ller date: Wed Sep 27 20:58:47 2023 +0200 description: Proper support for .r and /r for benchmarks of mpn_toom*_mul. diffstat: ChangeLog | 18 ++++++++ tune/common.c | 10 ++-- tune/speed.h | 122 ++++++++++++++++++++++++++++++++++++--------------------- 3 files changed, 100 insertions(+), 50 deletions(-) diffs (208 lines): diff -r 2d0b13e266c7 -r 031654cf3098 ChangeLog --- a/ChangeLog Wed Sep 27 08:55:14 2023 +0200 +++ b/ChangeLog Wed Sep 27 20:58:47 2023 +0200 @@ -1,3 +1,21 @@ +2023-09-27 Niels M?ller + + * tune/speed.h (SPEED_ROUTINE_MPN_MUL_TSPACE): New macro, + supporting .r and /r for benchmarking unbalanced operation. + (SPEED_ROUTINE_MPN_TOOM22_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE, + and renamed from... + (SPEED_ROUTINE_MPN_TOOM22_MUL_N): ... old name. + (SPEED_ROUTINE_MPN_TOOM33_MUL, SPEED_ROUTINE_MPN_TOOM44_MUL) + (SPEED_ROUTINE_MPN_TOOM6H_MUL, SPEED_ROUTINE_MPN_TOOM8H_MUL): + Analogous change and rename. + (SPEED_ROUTINE_MPN_TOOM32_MUL, SPEED_ROUTINE_MPN_TOOM42_MUL) + (SPEED_ROUTINE_MPN_TOOM43_MUL) + (SPEED_ROUTINE_MPN_TOOM63_MUL): Use SPEED_ROUTINE_MPN_MUL_TSPACE. + + * tune/common.c (speed_mpn_toom22_mul, speed_mpn_toom33_mul) + (speed_mpn_toom44_mul, speed_mpn_toom6h_mul) + (speed_mpn_toom8h_mul): Updated for rename of corresponding macros. + 2023-09-24 Niels M?ller * tune/speed.h (struct speed_params): Add size_ratio field. diff -r 2d0b13e266c7 -r 031654cf3098 tune/common.c --- a/tune/common.c Wed Sep 27 08:55:14 2023 +0200 +++ b/tune/common.c Wed Sep 27 20:58:47 2023 +0200 @@ -1347,27 +1347,27 @@ double speed_mpn_toom22_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul); + SPEED_ROUTINE_MPN_TOOM22_MUL (mpn_toom22_mul); } double speed_mpn_toom33_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul); + SPEED_ROUTINE_MPN_TOOM33_MUL (mpn_toom33_mul); } double speed_mpn_toom44_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul); + SPEED_ROUTINE_MPN_TOOM44_MUL (mpn_toom44_mul); } double speed_mpn_toom6h_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul); + SPEED_ROUTINE_MPN_TOOM6H_MUL (mpn_toom6h_mul); } double speed_mpn_toom8h_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul); + SPEED_ROUTINE_MPN_TOOM8H_MUL (mpn_toom8h_mul); } double diff -r 2d0b13e266c7 -r 031654cf3098 tune/speed.h --- a/tune/speed.h Wed Sep 27 08:55:14 2023 +0200 +++ b/tune/speed.h Wed Sep 27 20:58:47 2023 +0200 @@ -1474,6 +1474,47 @@ return t; \ } +#define SPEED_ROUTINE_MPN_MUL_TSPACE(function, itch, default_bn, valid) \ + { \ + mp_ptr wp, tspace; \ + mp_size_t an, bn, tn; \ + unsigned i; \ + double t; \ + TMP_DECL; \ + \ + an = s->size; \ + bn = s->size_ratio * s->size; \ + if (bn == 0) \ + { \ + bn = (s->r == 0 ? default_bn : s->r); \ + if (bn < 0) bn = -bn - an; \ + } \ + SPEED_RESTRICT_COND (bn >= 1); \ + SPEED_RESTRICT_COND (an >= bn); \ + SPEED_RESTRICT_COND (valid); \ + tn = itch(an, bn); \ + \ + TMP_MARK; \ + SPEED_TMP_ALLOC_LIMBS (wp, an + bn, s->align_wp); \ + SPEED_TMP_ALLOC_LIMBS (tspace, tn, s->align_wp2); \ + \ + speed_operand_src (s, s->xp, an); \ + speed_operand_src (s, s->yp, bn); \ + speed_operand_dst (s, wp, an + bn); \ + speed_operand_dst (s, tspace, tn); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + do \ + function(wp, s->xp, an, s->yp, bn, tspace); \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + TMP_FREE; \ + return t; \ + } + #define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \ { \ mp_ptr wp, tspace; \ @@ -1504,59 +1545,50 @@ return t; \ } -#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom22_mul_itch (s->size, s->size), \ - MPN_TOOM22_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom33_mul_itch (s->size, s->size), \ - MPN_TOOM33_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom44_mul_itch (s->size, s->size), \ - MPN_TOOM44_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom6h_mul_itch (s->size, s->size), \ - MPN_TOOM6H_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom8h_mul_itch (s->size, s->size), \ - MPN_TOOM8H_MUL_MINSIZE) +#define SPEED_ROUTINE_MPN_TOOM22_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom22_mul_itch, \ + an, 5*bn > 4*an) + +#define SPEED_ROUTINE_MPN_TOOM33_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom33_mul_itch, \ + an, bn > 2 * ((an+2) / 3)) + +#define SPEED_ROUTINE_MPN_TOOM44_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom44_mul_itch, \ + an, bn > 3*((an + 3) >> 2)) + +#define SPEED_ROUTINE_MPN_TOOM6H_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom6h_mul_itch, \ + an, bn >= 42 && ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17))) + +#define SPEED_ROUTINE_MPN_TOOM8H_MUL(function) \ + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom8h_mul_itch, \ + an, (bn >= 86) && an*4 <= bn*11) #define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace), \ - mpn_toom32_mul_itch (s->size, 2*s->size/3), \ - MPN_TOOM32_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom32_mul_itch, \ + 2*an / 3, bn + 2 <= an && an + 6 <= 3*bn) #define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \ - mpn_toom42_mul_itch (s->size, s->size/2), \ - MPN_TOOM42_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom42_mul_itch, \ + an / 2, an >= 7 && bn >= 2 && an > 3*((bn+1)/2) && bn > ((an+3)/4)) #define SPEED_ROUTINE_MPN_TOOM43_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace), \ - mpn_toom43_mul_itch (s->size, s->size*3/4), \ - MPN_TOOM43_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom43_mul_itch, \ + an*3/4, an >= 7 && bn >= 5 && an > 3 * ((bn+2)/3) && bn > 2 * ((an+3)/4)) #define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \ - mpn_toom63_mul_itch (s->size, s->size/2), \ - MPN_TOOM63_MUL_MINSIZE) + SPEED_ROUTINE_MPN_MUL_TSPACE \ + (function, mpn_toom63_mul_itch, \ + an/2, an >= 26 && bn >= 5 && an > 5*((bn+2)/3) && bn > 2*((an+5)/6)) #define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \ SPEED_ROUTINE_MPN_MUL_N_TSPACE \