[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Dec 3 15:00:09 CET 2009


details:   /home/hgfiles/gmp/rev/b66af9e72b19
changeset: 12963:b66af9e72b19
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 03 14:59:26 2009 +0100
description:
Tune DC_DIVAPPR_Q_THRESHOLD.  Rewrite DC_DIV_QR_THRESHOLD tuning code.

details:   /home/hgfiles/gmp/rev/5f710b3ff845
changeset: 12964:5f710b3ff845
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 03 15:00:04 2009 +0100
description:
Trivial merge.

diffstat:

 ChangeLog        |  11 ++++++++++-
 doc/gmp.texi     |   4 ++--
 gmp-impl.h       |   8 ++++++++
 tune/Makefile.am |   2 +-
 tune/common.c    |  22 ++++++++++++++++++++++
 tune/speed.h     |  49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 tune/tuneup.c    |  35 +++++++++++++++++++++++------------
 7 files changed, 114 insertions(+), 17 deletions(-)

diffs (246 lines):

diff -r e31f9b520313 -r 5f710b3ff845 ChangeLog
--- a/ChangeLog	Thu Dec 03 11:19:55 2009 +0100
+++ b/ChangeLog	Thu Dec 03 15:00:04 2009 +0100
@@ -1,5 +1,14 @@
 2009-12-03  Torbjorn Granlund  <tege at gmplib.org>
 
+	* tune/tuneup.c: Tune DC_DIVAPPR_Q_THRESHOLD.  Rewrite
+	DC_DIV_QR_THRESHOLD tuning code.
+	(tune_dc_div): Rewrite.
+	* tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV): New macro.
+	* tune/common.c (speed_mpn_sbpi1_div_qr, speed_mpn_dcpi1_div_qr,
+	speed_mpn_sbpi1_divappr_q, speed_mpn_sbpi1_bdiv_qr): New functions.
+	* gmp-impl.h: Provide declarations for corresponding threshold vars.
+	* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add dcpi1_divappr_q.c.
+
 	* tune/tuneup.c (tune_binvert): Up max_size.
 
 2009-12-02  Marco Bodrato <bodrato at mail.dm.unipi.it>
@@ -17,7 +26,7 @@
 
 	* tune/tuneup.c: Tune BINV_NEWTON_THRESHOLD.
 	(tune_binvert): New function.
-	* tune/speed.h (SPEED_ROUTINE_MPN_BINVERT): New macros.
+	* tune/speed.h (SPEED_ROUTINE_MPN_BINVERT): New macro.
 	* tune/common.c (speed_mpn_binvert): New function.
 	* gmp-impl.h: Provide declarations for corresponding threshold vars.
 	* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add binvert.c.
diff -r e31f9b520313 -r 5f710b3ff845 doc/gmp.texi
--- a/doc/gmp.texi	Thu Dec 03 11:19:55 2009 +0100
+++ b/doc/gmp.texi	Thu Dec 03 15:00:04 2009 +0100
@@ -5177,9 +5177,9 @@
 Compute the square of @{@var{s1p}, @var{n}@} and write the 2*@var{n}-limb
 result to @var{rp}.
 
-The destination has to have space for 2*@var{n} limbs, even if the product's
+The destination has to have space for 2*@var{n} limbs, even if the result's
 most significant limb is zero.  No overlap is permitted between the
-destination and either source.
+destination and the source.
 @end deftypefun
 
 @deftypefun mp_limb_t mpn_mul_1 (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n}, mp_limb_t @var{s2limb})
diff -r e31f9b520313 -r 5f710b3ff845 gmp-impl.h
--- a/gmp-impl.h	Thu Dec 03 11:19:55 2009 +0100
+++ b/gmp-impl.h	Thu Dec 03 15:00:04 2009 +0100
@@ -4146,6 +4146,14 @@
 #define DC_DIV_QR_THRESHOLD          dc_div_qr_threshold
 extern mp_size_t                     dc_div_qr_threshold;
 
+#undef  DC_DIVAPPR_Q_THRESHOLD
+#define DC_DIVAPPR_Q_THRESHOLD       dc_divappr_q_threshold
+extern mp_size_t                     dc_divappr_q_threshold;
+
+#undef  DC_DIV_Q_THRESHOLD
+#define DC_DIV_Q_THRESHOLD           dc_div_q_threshold
+extern mp_size_t                     dc_div_q_threshold;
+
 #undef  DC_BDIV_Q_THRESHOLD
 #define DC_BDIV_Q_THRESHOLD          dc_bdiv_q_threshold
 extern mp_size_t                     dc_bdiv_q_threshold;
diff -r e31f9b520313 -r 5f710b3ff845 tune/Makefile.am
--- a/tune/Makefile.am	Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/Makefile.am	Thu Dec 03 15:00:04 2009 +0100
@@ -122,7 +122,7 @@
 # recompiled object will be rebuilt if that file changes.
 
 TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c	\
+TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c	\
   binvert.c divrem_2.c gcd.c gcdext.c get_str.c				\
   set_str.c matrix22_mul.c hgcd.c mul_n.c sqr_n.c			\
   mullow_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c			\
diff -r e31f9b520313 -r 5f710b3ff845 tune/common.c
--- a/tune/common.c	Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/common.c	Thu Dec 03 15:00:04 2009 +0100
@@ -738,6 +738,27 @@
 }
 
 double
+speed_mpn_sbpi1_div_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32);
+}
+double
+speed_mpn_dcpi1_div_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv);
+}
+double
+speed_mpn_sbpi1_divappr_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32);
+}
+double
+speed_mpn_dcpi1_divappr_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv);
+}
+
+double
 speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
@@ -757,6 +778,7 @@
 {
   SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
 }
+
 double
 speed_mpn_binvert (struct speed_params *s)
 {
diff -r e31f9b520313 -r 5f710b3ff845 tune/speed.h
--- a/tune/speed.h	Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/speed.h	Thu Dec 03 15:00:04 2009 +0100
@@ -235,6 +235,10 @@
 double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_div_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
@@ -1077,7 +1081,7 @@
 #define SPEED_ROUTINE_MPN_TOOM42_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
     (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
-     mpn_toom42_mul_itch (s->size, s->size/2),			\
+     mpn_toom42_mul_itch (s->size, s->size/2),				\
      MPN_TOOM42_MUL_MINSIZE)
 
 #define SPEED_ROUTINE_MPN_SQR_CALL(call)				\
@@ -1303,6 +1307,49 @@
     return speed_endtime ();						\
   }
 
+#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV)			\
+  {									\
+    unsigned   i;							\
+    mp_ptr     dp, tp, ap, qp;						\
+    gmp_pi1_t  inv;							\
+    double     t;							\
+    TMP_DECL;								\
+									\
+    SPEED_RESTRICT_COND (s->size >= 1);					\
+									\
+    TMP_MARK;								\
+    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp);			\
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
+    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2);		\
+									\
+    MPN_COPY (ap,         s->xp, s->size);				\
+    MPN_COPY (ap+s->size, s->xp, s->size);				\
+									\
+    /* normalize the data */						\
+    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
+    ap[2*s->size-1] = dp[s->size-1] - 1;				\
+									\
+    invert_pi1 (inv, dp[s->size-1], dp[s->size-2]);			\
+									\
+    speed_operand_src (s, ap, 2*s->size);				\
+    speed_operand_dst (s, tp, 2*s->size);				\
+    speed_operand_src (s, dp, s->size);					\
+    speed_operand_dst (s, qp, s->size);					\
+    speed_cache_fill (s);						\
+									\
+    speed_starttime ();							\
+    i = s->reps;							\
+    do {								\
+      MPN_COPY (tp, ap, 2*s->size);					\
+      function (qp, tp, 2*s->size, dp, s->size, INV);			\
+    } while (--i != 0);							\
+    t = speed_endtime ();						\
+									\
+    TMP_FREE;								\
+    return t;								\
+  }
+
 #define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function)				\
   {									\
     unsigned   i;							\
diff -r e31f9b520313 -r 5f710b3ff845 tune/tuneup.c
--- a/tune/tuneup.c	Thu Dec 03 11:19:55 2009 +0100
+++ b/tune/tuneup.c	Thu Dec 03 15:00:04 2009 +0100
@@ -165,8 +165,9 @@
 mp_size_t  mulmod_bnm1_threshold        = MP_SIZE_T_MAX;
 mp_size_t  div_sb_preinv_threshold      = MP_SIZE_T_MAX;
 mp_size_t  dc_div_qr_threshold          = MP_SIZE_T_MAX;
+mp_size_t  dc_divappr_q_threshold       = MP_SIZE_T_MAX;
+mp_size_t  dc_bdiv_qr_threshold         = MP_SIZE_T_MAX;
 mp_size_t  dc_bdiv_q_threshold          = MP_SIZE_T_MAX;
-mp_size_t  dc_bdiv_qr_threshold         = MP_SIZE_T_MAX;
 mp_size_t  binv_newton_threshold        = MP_SIZE_T_MAX;
 mp_size_t  redc_1_to_redc_2_threshold   = MP_SIZE_T_MAX;
 mp_size_t  redc_1_to_redc_n_threshold   = MP_SIZE_T_MAX;
@@ -998,19 +999,24 @@
 void
 tune_dc_div (void)
 {
-  static struct param_t  param;
-  param.name = "DC_DIV_QR_THRESHOLD";
-  param.function = speed_mpn_dc_tdiv_qr;
-  param.step_factor = 0.02;
-  one (&dc_div_qr_threshold, &param);
+  {
+    static struct param_t  param;
+    param.name = "DC_DIV_QR_THRESHOLD";
+    param.function = speed_mpn_sbpi1_div_qr;
+    param.function2 = speed_mpn_dcpi1_div_qr;
+    param.min_size = 4;
+    one (&dc_div_qr_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "DC_DIVAPPR_Q_THRESHOLD";
+    param.function = speed_mpn_sbpi1_divappr_q;
+    param.function2 = speed_mpn_dcpi1_divappr_q;
+    param.min_size = 4;
+    one (&dc_divappr_q_threshold, &param);
+  }
 }
 
-
-#define TUNE_REDC_2_MAX 100
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
-
 void
 tune_dc_bdiv (void)
 {
@@ -1048,6 +1054,11 @@
 void
 tune_redc (void)
 {
+#define TUNE_REDC_2_MAX 100
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
 #if WANT_REDC_2
   {
     static struct param_t  param;


More information about the gmp-commit mailing list