[Gmp-commit] /home/hgfiles/gmp: Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRES...

mercurial at gmplib.org mercurial at gmplib.org
Wed Dec 2 02:01:19 CET 2009


details:   /home/hgfiles/gmp/rev/4c2a39802af2
changeset: 12956:4c2a39802af2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Dec 02 02:01:15 2009 +0100
description:
Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRESHOLD.

diffstat:

 ChangeLog        |  13 ++++++
 gmp-impl.h       |   8 ++++
 tune/Makefile.am |   3 +-
 tune/common.c    |  22 +++++++++++
 tune/speed.h     |  82 +++++++++++++++++++++++++++++++++++++++++
 tune/tuneup.c    |  29 +++++++++++++-
 6 files changed, 153 insertions(+), 4 deletions(-)

diffs (249 lines):

diff -r 68caff9d2bf8 -r 4c2a39802af2 ChangeLog
--- a/ChangeLog	Tue Dec 01 21:17:22 2009 +0100
+++ b/ChangeLog	Wed Dec 02 02:01:15 2009 +0100
@@ -1,3 +1,16 @@
+2009-12-02  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/tuneup.c: Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRESHOLD.
+	(tune_dc_bdiv): New function.
+	(tune_dc_div): New name for tune_dc.
+	* tune/speed.h (SPEED_ROUTINE_PI1_BDIV_QR, SPEED_ROUTINE_PI1_BDIV_Q):
+	New macros.
+	* tune/common.c (speed_mpn_sbpi1_bdiv_qr, speed_mpn_dcpi1_bdiv_qr,
+	speed_mpn_sbpi1_bdiv_q, speed_mpn_dcpi1_bdiv_q): New functions.
+	* gmp-impl.h: Provide declarations for corresponding threshold vars.
+	* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add dcpi1_bdiv_qr.c and
+	dcpi1_bdiv_q.c.
+	
 2009-12-01  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/toom53_mul.c: Removed double computation of vinf.
diff -r 68caff9d2bf8 -r 4c2a39802af2 gmp-impl.h
--- a/gmp-impl.h	Tue Dec 01 21:17:22 2009 +0100
+++ b/gmp-impl.h	Wed Dec 02 02:01:15 2009 +0100
@@ -4146,6 +4146,14 @@
 #define DC_DIV_QR_THRESHOLD          dc_div_qr_threshold
 extern mp_size_t                     dc_div_qr_threshold;
 
+#undef  DC_BDIV_Q_THRESHOLD
+#define DC_BDIV_Q_THRESHOLD          dc_bdiv_q_threshold
+extern mp_size_t                     dc_bdiv_q_threshold;
+
+#undef  DC_BDIV_QR_THRESHOLD
+#define DC_BDIV_QR_THRESHOLD         dc_bdiv_qr_threshold
+extern mp_size_t                     dc_bdiv_qr_threshold;
+
 #undef  REDC_1_TO_REDC_2_THRESHOLD
 #define REDC_1_TO_REDC_2_THRESHOLD   redc_1_to_redc_2_threshold
 extern mp_size_t                     redc_1_to_redc_2_threshold;
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/Makefile.am
--- a/tune/Makefile.am	Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/Makefile.am	Wed Dec 02 02:01:15 2009 +0100
@@ -122,7 +122,8 @@
 # recompiled object will be rebuilt if that file changes.
 
 TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c divrem_2.c gcd.c gcdext.c get_str.c \
+TUNE_MPN_SRCS_BASIC = dcpi1_div_qr.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c	\
+  divrem_2.c gcd.c gcdext.c get_str.c					\
   set_str.c matrix22_mul.c hgcd.c mul_n.c sqr_n.c			\
   mullow_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c			\
   toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/common.c
--- a/tune/common.c	Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/common.c	Wed Dec 02 02:01:15 2009 +0100
@@ -736,6 +736,28 @@
 {
   SPEED_ROUTINE_MPZ_MOD (mpz_mod);
 }
+
+double
+speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
+}
+double
+speed_mpn_dcpi1_bdiv_qr (struct speed_params *s)
+{
+  SPEED_ROUTINE_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr);
+}
+double
+speed_mpn_sbpi1_bdiv_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_PI1_BDIV_Q (mpn_sbpi1_bdiv_q);
+}
+double
+speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
+{
+  SPEED_ROUTINE_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
+}
+
 double
 speed_mpn_redc_1 (struct speed_params *s)
 {
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/speed.h
--- a/tune/speed.h	Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/speed.h	Wed Dec 02 02:01:15 2009 +0100
@@ -235,6 +235,10 @@
 double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dcpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_redc_2 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_redc_n __GMP_PROTO ((struct speed_params *s));
@@ -1298,6 +1302,84 @@
     return speed_endtime ();						\
   }
 
+#define SPEED_ROUTINE_PI1_BDIV_QR(function)				\
+  {									\
+    unsigned   i;							\
+    mp_ptr     dp, tp, ap, qp;						\
+    mp_limb_t  inv;							\
+    double     t;							\
+    TMP_DECL;								\
+									\
+    SPEED_RESTRICT_COND (s->size >= 1);					\
+									\
+    TMP_MARK;								\
+    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp);			\
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
+    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2);		\
+									\
+    MPN_COPY (ap,         s->xp, s->size);				\
+    MPN_COPY (ap+s->size, s->xp, s->size);				\
+									\
+    /* divisor must be odd */						\
+    MPN_COPY (dp, s->yp, s->size);					\
+    dp[0] |= 1;								\
+    binvert_limb (inv, dp[0]);						\
+									\
+    speed_operand_src (s, ap, 2*s->size);				\
+    speed_operand_dst (s, tp, 2*s->size);				\
+    speed_operand_src (s, dp, s->size);					\
+    speed_operand_dst (s, qp, s->size);					\
+    speed_cache_fill (s);						\
+									\
+    speed_starttime ();							\
+    i = s->reps;							\
+    do {								\
+      MPN_COPY (tp, ap, 2*s->size);					\
+      function (qp, tp, 2*s->size, dp, s->size, inv);			\
+    } while (--i != 0);							\
+    t = speed_endtime ();						\
+									\
+    TMP_FREE;								\
+    return t;								\
+  }
+#define SPEED_ROUTINE_PI1_BDIV_Q(function)				\
+  {									\
+    unsigned   i;							\
+    mp_ptr     dp, tp, qp;						\
+    mp_limb_t  inv;							\
+    double     t;							\
+    TMP_DECL;								\
+									\
+    SPEED_RESTRICT_COND (s->size >= 1);					\
+									\
+    TMP_MARK;								\
+    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
+    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
+    SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2);			\
+									\
+    /* divisor must be odd */						\
+    MPN_COPY (dp, s->yp, s->size);					\
+    dp[0] |= 1;								\
+    binvert_limb (inv, dp[0]);						\
+									\
+    speed_operand_src (s, s->xp, s->size);				\
+    speed_operand_dst (s, tp, s->size);					\
+    speed_operand_src (s, dp, s->size);					\
+    speed_operand_dst (s, qp, s->size);					\
+    speed_cache_fill (s);						\
+									\
+    speed_starttime ();							\
+    i = s->reps;							\
+    do {								\
+      MPN_COPY (tp, s->xp, s->size);					\
+      function (qp, tp, s->size, dp, s->size, inv);			\
+    } while (--i != 0);							\
+    t = speed_endtime ();						\
+									\
+    TMP_FREE;								\
+    return t;								\
+  }
 
 #define SPEED_ROUTINE_REDC_1(function)					\
   {									\
diff -r 68caff9d2bf8 -r 4c2a39802af2 tune/tuneup.c
--- a/tune/tuneup.c	Tue Dec 01 21:17:22 2009 +0100
+++ b/tune/tuneup.c	Wed Dec 02 02:01:15 2009 +0100
@@ -165,6 +165,8 @@
 mp_size_t  mulmod_bnm1_threshold        = MP_SIZE_T_MAX;
 mp_size_t  div_sb_preinv_threshold      = MP_SIZE_T_MAX;
 mp_size_t  dc_div_qr_threshold          = MP_SIZE_T_MAX;
+mp_size_t  dc_bdiv_q_threshold          = MP_SIZE_T_MAX;
+mp_size_t  dc_bdiv_qr_threshold         = MP_SIZE_T_MAX;
 mp_size_t  redc_1_to_redc_2_threshold   = MP_SIZE_T_MAX;
 mp_size_t  redc_1_to_redc_n_threshold   = MP_SIZE_T_MAX;
 mp_size_t  redc_2_to_redc_n_threshold   = MP_SIZE_T_MAX;
@@ -993,7 +995,7 @@
 
 
 void
-tune_dc (void)
+tune_dc_div (void)
 {
   static struct param_t  param;
   param.name = "DC_DIV_QR_THRESHOLD";
@@ -1009,6 +1011,27 @@
 #endif
 
 void
+tune_dc_bdiv (void)
+{
+  {
+    static struct param_t  param;
+    param.name = "DC_BDIV_QR_THRESHOLD";
+    param.function = speed_mpn_sbpi1_bdiv_qr;
+    param.function2 = speed_mpn_dcpi1_bdiv_qr;
+    param.min_size = 2;
+    one (&dc_bdiv_qr_threshold, &param);
+  }
+  {
+    static struct param_t  param;
+    param.name = "DC_BDIV_Q_THRESHOLD";
+    param.function = speed_mpn_sbpi1_bdiv_q;
+    param.function2 = speed_mpn_dcpi1_bdiv_q;
+    param.min_size = 4;
+    one (&dc_bdiv_q_threshold, &param);
+  }
+}
+
+void
 tune_redc (void)
 {
 #if WANT_REDC_2
@@ -1051,7 +1074,6 @@
     one (&redc_1_to_redc_n_threshold, &param);
   }
 #endif
-
 }
 
 void
@@ -1860,7 +1882,8 @@
   tune_mulmod_bnm1 ();
   printf("\n");
 
-  tune_dc ();
+  tune_dc_div ();
+  tune_dc_bdiv ();
   tune_redc ();
   printf("\n");
 


More information about the gmp-commit mailing list