[Gmp-commit] /var/hg/gmp: Tuning of mpn_hgcd_appr and mpn_hgcd_reduce.
mercurial at gmplib.org
mercurial at gmplib.org
Fri Nov 11 17:23:14 CET 2011
details: /var/hg/gmp/rev/33eecaf4ebca
changeset: 14434:33eecaf4ebca
user: Niels M?ller <nisse at lysator.liu.se>
date: Fri Nov 11 14:59:14 2011 +0100
description:
Tuning of mpn_hgcd_appr and mpn_hgcd_reduce.
diffstat:
ChangeLog | 24 +++++++++++++++++++
tune/Makefile.am | 7 ++++-
tune/common.c | 16 +++++++++++++
tune/hgcd_reduce_1.c | 30 ++++++++++++++++++++++++
tune/hgcd_reduce_2.c | 29 +++++++++++++++++++++++
tune/speed.c | 4 +++
tune/speed.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
tune/tuneup.c | 25 ++++++++++++++++++++
8 files changed, 197 insertions(+), 2 deletions(-)
diffs (truncated from 301 to 300 lines):
diff -r 486e72d44afe -r 33eecaf4ebca ChangeLog
--- a/ChangeLog Fri Nov 11 14:13:49 2011 +0100
+++ b/ChangeLog Fri Nov 11 14:59:14 2011 +0100
@@ -1,5 +1,29 @@
2011-11-11 Niels Möller <nisse at lysator.liu.se>
+ * tune/hgcd_reduce_2.c: New file.
+ * tune/hgcd_reduce_1.c: New file.
+
+ * tune/tuneup.c (hgcd_appr_threshold): New threshold variable.
+ (hgcd_reduce_threshold): Likewise.
+ (tune_hgcd_appr): New function.
+ (tune_hgcd_reduce): New function.
+ (all): Call tune_hgcd_appr and tune_hgcd_reduce.
+
+ * tune/speed.h (speed_mpn_hgcd_reduce): Declaration.
+ (speed_mpn_hgcd_reduce_[12]): Likewise.
+ (mpn_hgcd_reduce_[12]): Likewise.
+ (SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL): New macro.
+
+ * tune/speed.c (routine): Added mpn_hgcd_reduce,
+ mpn_hgcd_reduce_1, and mpn_hgcd_reduce_2.
+
+ * tune/common.c (speed_mpn_hgcd_reduce): New function.
+ (speed_mpn_hgcd_reduce_[12]): Likewise.
+
+ * tune/Makefile.am (libspeed_la_SOURCES): Added hgcd_reduce_1.c
+ hgcd_reduce_2.c.
+ (TUNE_MPN_SRCS_BASIC): Added hgcd_appr.c and hgcd_reduce.c.
+
* mpn/generic/hgcd_appr.c (submul, hgcd_matrix_apply): Deleted
functions, earlier copied to hgcd_reduce.c.
(mpn_hgcd_appr): Use hgcd_reduce.
diff -r 486e72d44afe -r 33eecaf4ebca tune/Makefile.am
--- a/tune/Makefile.am Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/Makefile.am Fri Nov 11 14:59:14 2011 +0100
@@ -43,7 +43,8 @@
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
- hgcd_lehmer.c jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
+ hgcd_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
@@ -129,7 +130,9 @@
TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
- get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c \
+ get_str.c set_str.c matrix22_mul.c \
+ hgcd.c hgcd_appr.c hgcd_reduce.c \
+ mul_n.c sqr.c \
mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
mulmid.c mulmid_n.c toom42_mulmid.c \
nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
diff -r 486e72d44afe -r 33eecaf4ebca tune/common.c
--- a/tune/common.c Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/common.c Fri Nov 11 14:59:14 2011 +0100
@@ -1539,6 +1539,22 @@
}
double
+speed_mpn_hgcd_reduce (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+}
+double
+speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+}
+double
+speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
+}
+
+double
speed_mpn_gcd (struct speed_params *s)
{
SPEED_ROUTINE_MPN_GCD (mpn_gcd);
diff -r 486e72d44afe -r 33eecaf4ebca tune/hgcd_reduce_1.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd_reduce_1.c Fri Nov 11 14:59:14 2011 +0100
@@ -0,0 +1,30 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_1
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_1_itch
+
+
+#include "../mpn/generic/hgcd_reduce.c"
diff -r 486e72d44afe -r 33eecaf4ebca tune/hgcd_reduce_2.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd_reduce_2.c Fri Nov 11 14:59:14 2011 +0100
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 0
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
+
+#include "../mpn/generic/hgcd_reduce.c"
diff -r 486e72d44afe -r 33eecaf4ebca tune/speed.c
--- a/tune/speed.c Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/speed.c Fri Nov 11 14:59:14 2011 +0100
@@ -279,6 +279,10 @@
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
{ "mpn_hgcd_appr", speed_mpn_hgcd_appr },
+ { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce },
+ { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 },
+ { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 },
+
{ "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
{ "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
diff -r 486e72d44afe -r 33eecaf4ebca tune/speed.h
--- a/tune/speed.h Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/speed.h Fri Nov 11 14:59:14 2011 +0100
@@ -198,6 +198,9 @@
double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_appr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_2 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
@@ -488,6 +491,16 @@
__GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
#define MPN_HGCD_LEHMER_ITCH(n) (n)
+mp_size_t mpn_hgcd_reduce_1
+ __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_1_itch
+ __GMP_PROTO ((mp_size_t, mp_size_t));
+
+mp_size_t mpn_hgcd_reduce_2
+ __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_2_itch
+ __GMP_PROTO ((mp_size_t, mp_size_t));
+
mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
@@ -2706,6 +2719,57 @@
return t; \
}
+#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \
+ { \
+ mp_size_t hgcd_init_itch, hgcd_step_itch; \
+ mp_ptr ap, bp, wp, tmp1; \
+ struct hgcd_matrix hgcd; \
+ mp_size_t p = s->size/2; \
+ int res; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ if (s->size < 2) \
+ return -1; \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
+ \
+ s->xp[s->size - 1] |= 1; \
+ s->yp[s->size - 1] |= 1; \
+ \
+ hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
+ hgcd_step_itch = itchfunc (s->size, p); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, ap, s->size + 1); \
+ speed_operand_dst (s, bp, s->size + 1); \
+ speed_operand_dst (s, wp, hgcd_step_itch); \
+ speed_operand_dst (s, tmp1, hgcd_init_itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (bp, s->yp, s->size); \
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
+ res = func (&hgcd, ap, bp, s->size, p, wp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ TMP_FREE; \
+ return t; \
+ }
+
/* Run some GCDs of s->size limbs each. The number of different data values
is decreased as s->size**2, since GCD is a quadratic algorithm.
SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
diff -r 486e72d44afe -r 33eecaf4ebca tune/tuneup.c
--- a/tune/tuneup.c Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/tuneup.c Fri Nov 11 14:59:14 2011 +0100
@@ -195,6 +195,8 @@
mp_size_t powm_threshold = MP_SIZE_T_MAX;
mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX;
mp_size_t hgcd_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX;
mp_size_t gcd_accel_threshold = MP_SIZE_T_MAX;
mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX;
mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX;
@@ -1755,6 +1757,27 @@
}
void
+tune_hgcd_appr (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_APPR_THRESHOLD";
+ param.function = speed_mpn_hgcd_appr;
+ /* We seem to get strange results for small sizes */
+ param.min_size = 30;
+ one (&hgcd_appr_threshold, ¶m);
+}
+
+void
+tune_hgcd_reduce (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_REDUCE_THRESHOLD";
+ param.function = speed_mpn_hgcd_reduce;
+ param.min_size = 30;
+ one (&hgcd_reduce_threshold, ¶m);
+}
+
+void
tune_gcd_dc (void)
{
static struct param_t param;
@@ -2579,6 +2602,8 @@
tune_matrix22_mul ();
tune_hgcd ();
+ tune_hgcd_appr ();
+ tune_hgcd_reduce();
tune_gcd_dc ();
tune_gcdext_dc ();
More information about the gmp-commit
mailing list