[Gmp-commit] /var/hg/gmp: Tuning of mpn_hgcd_appr and mpn_hgcd_reduce.

mercurial at gmplib.org mercurial at gmplib.org
Fri Nov 11 17:23:14 CET 2011


details:   /var/hg/gmp/rev/33eecaf4ebca
changeset: 14434:33eecaf4ebca
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Fri Nov 11 14:59:14 2011 +0100
description:
Tuning of mpn_hgcd_appr and mpn_hgcd_reduce.

diffstat:

 ChangeLog            |  24 +++++++++++++++++++
 tune/Makefile.am     |   7 ++++-
 tune/common.c        |  16 +++++++++++++
 tune/hgcd_reduce_1.c |  30 ++++++++++++++++++++++++
 tune/hgcd_reduce_2.c |  29 +++++++++++++++++++++++
 tune/speed.c         |   4 +++
 tune/speed.h         |  64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 tune/tuneup.c        |  25 ++++++++++++++++++++
 8 files changed, 197 insertions(+), 2 deletions(-)

diffs (truncated from 301 to 300 lines):

diff -r 486e72d44afe -r 33eecaf4ebca ChangeLog
--- a/ChangeLog	Fri Nov 11 14:13:49 2011 +0100
+++ b/ChangeLog	Fri Nov 11 14:59:14 2011 +0100
@@ -1,5 +1,29 @@
 2011-11-11  Niels Möller  <nisse at lysator.liu.se>
 
+	* tune/hgcd_reduce_2.c: New file.
+	* tune/hgcd_reduce_1.c: New file.
+
+	* tune/tuneup.c (hgcd_appr_threshold): New threshold variable.
+	(hgcd_reduce_threshold): Likewise.
+	(tune_hgcd_appr): New function.
+	(tune_hgcd_reduce): New function.
+	(all): Call tune_hgcd_appr and tune_hgcd_reduce.
+
+	* tune/speed.h (speed_mpn_hgcd_reduce): Declaration.
+	(speed_mpn_hgcd_reduce_[12]): Likewise.
+	(mpn_hgcd_reduce_[12]): Likewise.
+	(SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL): New macro.
+
+	* tune/speed.c (routine): Added mpn_hgcd_reduce,
+	mpn_hgcd_reduce_1, and mpn_hgcd_reduce_2.
+
+	* tune/common.c (speed_mpn_hgcd_reduce): New function.
+	(speed_mpn_hgcd_reduce_[12]): Likewise.
+
+	* tune/Makefile.am (libspeed_la_SOURCES): Added hgcd_reduce_1.c
+	hgcd_reduce_2.c.
+	(TUNE_MPN_SRCS_BASIC): Added hgcd_appr.c and hgcd_reduce.c.
+
 	* mpn/generic/hgcd_appr.c (submul, hgcd_matrix_apply): Deleted
 	functions, earlier copied to hgcd_reduce.c.
 	(mpn_hgcd_appr): Use hgcd_reduce.
diff -r 486e72d44afe -r 33eecaf4ebca tune/Makefile.am
--- a/tune/Makefile.am	Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/Makefile.am	Fri Nov 11 14:59:14 2011 +0100
@@ -43,7 +43,8 @@
   common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c		\
   freq.c								\
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
-  hgcd_lehmer.c jacbase1.c jacbase2.c jacbase3.c jacbase4.c		\
+  hgcd_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c				\
+  jacbase1.c jacbase2.c jacbase3.c jacbase4.c				\
   mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
   set_strb.c set_strs.c set_strp.c time.c
@@ -129,7 +130,9 @@
 TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c			\
   dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c	\
   invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c		\
-  get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c		\
+  get_str.c set_str.c matrix22_mul.c					\
+  hgcd.c hgcd_appr.c hgcd_reduce.c					\
+  mul_n.c sqr.c								\
   mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c	\
   mulmid.c mulmid_n.c toom42_mulmid.c					\
   nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c	\
diff -r 486e72d44afe -r 33eecaf4ebca tune/common.c
--- a/tune/common.c	Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/common.c	Fri Nov 11 14:59:14 2011 +0100
@@ -1539,6 +1539,22 @@
 }
 
 double
+speed_mpn_hgcd_reduce (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+}
+double
+speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+}
+double
+speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
+}
+
+double
 speed_mpn_gcd (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_GCD (mpn_gcd);
diff -r 486e72d44afe -r 33eecaf4ebca tune/hgcd_reduce_1.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd_reduce_1.c	Fri Nov 11 14:59:14 2011 +0100
@@ -0,0 +1,30 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_reduce  mpn_hgcd_reduce_1
+#define __gmpn_hgcd_reduce_itch  mpn_hgcd_reduce_1_itch
+
+
+#include "../mpn/generic/hgcd_reduce.c"
diff -r 486e72d44afe -r 33eecaf4ebca tune/hgcd_reduce_2.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd_reduce_2.c	Fri Nov 11 14:59:14 2011 +0100
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 0
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
+
+#include "../mpn/generic/hgcd_reduce.c"
diff -r 486e72d44afe -r 33eecaf4ebca tune/speed.c
--- a/tune/speed.c	Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/speed.c	Fri Nov 11 14:59:14 2011 +0100
@@ -279,6 +279,10 @@
   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
 
+  { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
+  { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
+  { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
+  
   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
 
diff -r 486e72d44afe -r 33eecaf4ebca tune/speed.h
--- a/tune/speed.h	Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/speed.h	Fri Nov 11 14:59:14 2011 +0100
@@ -198,6 +198,9 @@
 double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_appr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_reduce_2 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
@@ -488,6 +491,16 @@
   __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
 #define MPN_HGCD_LEHMER_ITCH(n) (n)
 
+mp_size_t mpn_hgcd_reduce_1
+  __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_1_itch
+  __GMP_PROTO ((mp_size_t, mp_size_t));
+
+mp_size_t mpn_hgcd_reduce_2
+  __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
+mp_size_t mpn_hgcd_reduce_2_itch
+  __GMP_PROTO ((mp_size_t, mp_size_t));
+
 mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
 mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
 
@@ -2706,6 +2719,57 @@
     return t;								\
   }
 
+#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc)		\
+  {									\
+    mp_size_t hgcd_init_itch, hgcd_step_itch;				\
+    mp_ptr ap, bp, wp, tmp1;						\
+    struct hgcd_matrix hgcd;						\
+    mp_size_t p = s->size/2;						\
+    int res;								\
+    unsigned i;								\
+    double t;								\
+    TMP_DECL;								\
+    									\
+    if (s->size < 2)							\
+      return -1;							\
+    									\
+    TMP_MARK;								\
+    									\
+    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);		\
+    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);		\
+    									\
+    s->xp[s->size - 1] |= 1;						\
+    s->yp[s->size - 1] |= 1;						\
+    									\
+    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
+    hgcd_step_itch = itchfunc (s->size, p);				\
+    									\
+    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp);			\
+    									\
+    speed_operand_src (s, s->xp, s->size);				\
+    speed_operand_src (s, s->yp, s->size);				\
+    speed_operand_dst (s, ap, s->size + 1);				\
+    speed_operand_dst (s, bp, s->size + 1);				\
+    speed_operand_dst (s, wp, hgcd_step_itch);				\
+    speed_operand_dst (s, tmp1, hgcd_init_itch);			\
+    speed_cache_fill (s);						\
+    									\
+    speed_starttime ();							\
+    i = s->reps;							\
+    do									\
+      {									\
+	MPN_COPY (ap, s->xp, s->size);					\
+	MPN_COPY (bp, s->yp, s->size);					\
+	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
+	res = func (&hgcd, ap, bp, s->size, p, wp);			\
+      }									\
+    while (--i != 0);							\
+    t = speed_endtime ();						\
+    TMP_FREE;								\
+    return t;								\
+  }
+
 /* Run some GCDs of s->size limbs each.  The number of different data values
    is decreased as s->size**2, since GCD is a quadratic algorithm.
    SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
diff -r 486e72d44afe -r 33eecaf4ebca tune/tuneup.c
--- a/tune/tuneup.c	Fri Nov 11 14:13:49 2011 +0100
+++ b/tune/tuneup.c	Fri Nov 11 14:59:14 2011 +0100
@@ -195,6 +195,8 @@
 mp_size_t  powm_threshold               = MP_SIZE_T_MAX;
 mp_size_t  matrix22_strassen_threshold  = MP_SIZE_T_MAX;
 mp_size_t  hgcd_threshold               = MP_SIZE_T_MAX;
+mp_size_t  hgcd_appr_threshold          = MP_SIZE_T_MAX;
+mp_size_t  hgcd_reduce_threshold        = MP_SIZE_T_MAX;
 mp_size_t  gcd_accel_threshold          = MP_SIZE_T_MAX;
 mp_size_t  gcd_dc_threshold             = MP_SIZE_T_MAX;
 mp_size_t  gcdext_dc_threshold          = MP_SIZE_T_MAX;
@@ -1755,6 +1757,27 @@
 }
 
 void
+tune_hgcd_appr (void)
+{
+  static struct param_t  param;
+  param.name = "HGCD_APPR_THRESHOLD";
+  param.function = speed_mpn_hgcd_appr;
+  /* We seem to get strange results for small sizes */
+  param.min_size = 30;
+  one (&hgcd_appr_threshold, &param);
+}
+
+void
+tune_hgcd_reduce (void)
+{
+  static struct param_t  param;
+  param.name = "HGCD_REDUCE_THRESHOLD";
+  param.function = speed_mpn_hgcd_reduce;
+  param.min_size = 30;
+  one (&hgcd_reduce_threshold, &param);
+}
+
+void
 tune_gcd_dc (void)
 {
   static struct param_t  param;
@@ -2579,6 +2602,8 @@
 
   tune_matrix22_mul ();
   tune_hgcd ();
+  tune_hgcd_appr ();
+  tune_hgcd_reduce();
   tune_gcd_dc ();
   tune_gcdext_dc ();


More information about the gmp-commit mailing list