[Gmp-commit] /var/hg/gmp: For hgcd2, add a div1 function handling q <= 7 spec...
mercurial at gmplib.org
mercurial at gmplib.org
Thu Sep 5 19:10:29 UTC 2019
details: /var/hg/gmp/rev/beaa1373af16
changeset: 17865:beaa1373af16
user: Niels M?ller <nisse at lysator.liu.se>
date: Thu Sep 05 21:10:05 2019 +0200
description:
For hgcd2, add a div1 function handling q <= 7 specially.
* mpn/generic/hgcd2.c (div1): Return both r and q as a
mp_double_limb_t, replacing the DIV1 macro.
(div1) [HGCD2_METHOD == 3]: New implementation handling q <= 7
specially and without branches. Based on Torbj?rn's mail to the
gmp-devel list.
* tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add
corresponding speed support.
* tune/hgcd2-3.c: New file.
* tune/tuneup.c (print_define_with_speedup): New function, to
output a comment with speedup compared to next-best method.
(tune_hgcd2): Update tuning.
diffstat:
ChangeLog | 14 ++++++++
mpn/generic/hgcd2.c | 91 ++++++++++++++++++++++++++++++++++++++++------------
tune/Makefile.am | 2 +-
tune/common.c | 5 ++
tune/hgcd2-3.c | 39 ++++++++++++++++++++++
tune/speed.c | 1 +
tune/speed.h | 3 +
tune/tuneup.c | 50 +++++++++++++++++++++++-----
8 files changed, 172 insertions(+), 33 deletions(-)
diffs (truncated from 337 to 300 lines):
diff -r e1d151705ddf -r beaa1373af16 ChangeLog
--- a/ChangeLog Wed Sep 04 21:46:41 2019 +0200
+++ b/ChangeLog Thu Sep 05 21:10:05 2019 +0200
@@ -1,3 +1,17 @@
+2019-09-05 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/hgcd2.c (div1): Return both r and q as a
+ mp_double_limb_t, replacing the DIV1 macro.
+ (div1) [HGCD2_METHOD == 3]: New implementation handling q <= 7
+ specially and without branches. Based on Torbjörn's mail to the
+ gmp-devel list.
+ * tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add
+ corresponding speed support.
+ * tune/hgcd2-3.c: New file.
+ * tune/tuneup.c (print_define_with_speedup): New function, to
+ output a comment with speedup compared to next-best method.
+ (tune_hgcd2): Update tuning.
+
2019-09-04 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/hgcd2.c (HGCD2_METHOD): New parameter.
diff -r e1d151705ddf -r beaa1373af16 mpn/generic/hgcd2.c
--- a/mpn/generic/hgcd2.c Wed Sep 04 21:46:41 2019 +0200
+++ b/mpn/generic/hgcd2.c Thu Sep 05 21:10:05 2019 +0200
@@ -36,32 +36,40 @@
#include "longlong.h"
#ifndef HGCD2_METHOD
-#define HGCD2_METHOD 2
+#define HGCD2_METHOD 3
#endif
#if GMP_NAIL_BITS != 0
#error Nails not implemented
#endif
+/* Single-limb division optimized for small quotients. Returned value
+ holds d0 = r, d1 = q */
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0);
+
#if HGCD2_METHOD == 1
-#define DIV1(q, r, a, b) do { \
- (q) = (a)/(b); \
- (r) = (a) - (q)*(b); \
- } while (0)
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+ mp_double_limb_t res;
+ res.d1 = n0 / d0;
+ res.d0 = n0 - q * d0;
+
+ return res;
+}
#elif HGCD2_METHOD == 2
/* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
the remainder. */
-/* Single-limb division optimized for small quotients. */
-static inline mp_limb_t
-div1 (mp_ptr rp,
- mp_limb_t n0,
- mp_limb_t d0)
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
{
- mp_limb_t q = 0;
+ mp_double_limb_t res;
+ mp_limb_t q;
if ((mp_limb_signed_t) n0 < 0)
{
@@ -105,14 +113,49 @@
cnt--;
}
}
- *rp = n0;
- return q;
+ res.d0 = n0;
+ res.d1 = q;
+ return res;
}
-#define DIV1(q, r, a, b) do { \
- mp_limb_t __div1_r; \
- (q) = div1 (&__div1_r, a, b); \
- (r) = __div1_r; \
- } while (0)
+
+#elif HGCD2_METHOD == 3
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+ mp_double_limb_t res;
+ mp_limb_t q;
+ if (UNLIKELY ((d0 >> (GMP_LIMB_BITS - 3)) != 0)
+ || UNLIKELY (n0 >= (d0 << 3)))
+ {
+ res.d1 = n0 / d0;
+ res.d0 = n0 - res.d1 * d0;
+ }
+ else
+ {
+ mp_limb_t q, mask;
+
+ d0 <<= 2;
+
+ mask = -(mp_limb_t) (n0 >= d0);
+ q = 4 & mask;
+ n0 -= d0 & mask;
+
+ d0 >>= 1;
+ mask = -(mp_limb_t) (n0 >= d0);
+ q += 2 & mask;
+ n0 -= d0 & mask;
+
+ d0 >>= 1;
+ mask = -(mp_limb_t) (n0 >= d0);
+ q += 1 & mask;
+ n0 -= d0 & mask;
+
+ res.d0 = n0;
+ res.d1 = q;
+ }
+ return res;
+}
#else
#error Unknown HGCD2_METHOD
#endif
@@ -374,8 +417,10 @@
}
else
{
- mp_limb_t q;
- DIV1 (q, ah, ah, bh);
+ mp_double_limb_t rq = div1 (ah, bh);
+ mp_limb_t q = rq.d1;
+ ah = rq.d0;
+
if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
{
/* A is too small, but q is correct. */
@@ -402,8 +447,10 @@
}
else
{
- mp_limb_t q;
- DIV1 (q, bh, bh, ah);
+ mp_double_limb_t rq = div1 (bh, ah);
+ mp_limb_t q = rq.d1;
+ bh = rq.d0;
+
if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
{
/* B is too small, but q is correct. */
diff -r e1d151705ddf -r beaa1373af16 tune/Makefile.am
--- a/tune/Makefile.am Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/Makefile.am Thu Sep 05 21:10:05 2019 +0200
@@ -58,7 +58,7 @@
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
- hgcd2-1.c hgcd2-2.c \
+ hgcd2-1.c hgcd2-2.c hgcd2-3.c \
mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
diff -r e1d151705ddf -r beaa1373af16 tune/common.c
--- a/tune/common.c Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/common.c Thu Sep 05 21:10:05 2019 +0200
@@ -1648,6 +1648,11 @@
{
SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2);
}
+double
+speed_mpn_hgcd2_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3);
+}
double
speed_mpn_hgcd (struct speed_params *s)
diff -r e1d151705ddf -r beaa1373af16 tune/hgcd2-3.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd2-3.c Thu Sep 05 21:10:05 2019 +0200
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 3.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_METHOD
+#define HGCD2_METHOD 3
+#define __gmpn_hgcd2 mpn_hgcd2_3
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3
+
+#include "mpn/generic/hgcd2.c"
diff -r e1d151705ddf -r beaa1373af16 tune/speed.c
--- a/tune/speed.c Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/speed.c Thu Sep 05 21:10:05 2019 +0200
@@ -288,6 +288,7 @@
{ "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA },
{ "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA },
{ "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA },
+ { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA },
{ "mpn_hgcd", speed_mpn_hgcd },
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
{ "mpn_hgcd_appr", speed_mpn_hgcd_appr },
diff -r e1d151705ddf -r beaa1373af16 tune/speed.h
--- a/tune/speed.h Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/speed.h Thu Sep 05 21:10:05 2019 +0200
@@ -217,6 +217,7 @@
double speed_mpn_hgcd2 (struct speed_params *);
double speed_mpn_hgcd2_1 (struct speed_params *);
double speed_mpn_hgcd2_2 (struct speed_params *);
+double speed_mpn_hgcd2_3 (struct speed_params *);
double speed_mpn_hgcd (struct speed_params *);
double speed_mpn_hgcd_lehmer (struct speed_params *);
double speed_mpn_hgcd_appr (struct speed_params *);
@@ -487,6 +488,8 @@
struct hgcd_matrix1 *M);
int mpn_hgcd2_2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
struct hgcd_matrix1 *M);
+int mpn_hgcd2_3 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+ struct hgcd_matrix1 *M);
mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
diff -r e1d151705ddf -r beaa1373af16 tune/tuneup.c
--- a/tune/tuneup.c Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/tuneup.c Thu Sep 05 21:10:05 2019 +0200
@@ -518,6 +518,15 @@
print_define_end_remark (name, value, remark);
}
+void
+print_define_with_margin (const char *name, mp_size_t value,
+ mp_size_t runner_up, double speedup)
+{
+ char buf[100];
+ snprintf (buf, sizeof(buf), "%.2f%% faster than %ld",
+ 100.0 * (speedup - 1), runner_up);
+ print_define_remark (name, value, buf);
+}
void
one (mp_size_t *threshold, struct param_t *param)
@@ -1902,26 +1911,47 @@
tune_hgcd2 (void)
{
static struct param_t param;
- double t1, t2;
+ double t[3+1];
int method;
+ int runner_up_method;
+ double runner_up_ratio;
s.size = 1;
- t1 = tuneup_measure (speed_mpn_hgcd2_1, ¶m, &s);
+ t[1] = tuneup_measure (speed_mpn_hgcd2_1, ¶m, &s);
+ if (option_trace >= 1)
+ printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t[1]);
+
+ t[2] = tuneup_measure (speed_mpn_hgcd2_2, ¶m, &s);
if (option_trace >= 1)
- printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t1);
-
- t2 = tuneup_measure (speed_mpn_hgcd2_2, ¶m, &s);
More information about the gmp-commit
mailing list