[Gmp-commit] /home/hgfiles/gmp: 5 new changesets

Thu Dec 24 23:42:22 CET 2009

details:   /home/hgfiles/gmp/rev/611a9d0b331e
changeset: 13212:611a9d0b331e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 24 11:14:38 2009 +0100
description:
(nodist_EXTRA_libmpn_la_SOURCES): Add missing division files.

details:   /home/hgfiles/gmp/rev/6e2403ec58e2
changeset: 13213:6e2403ec58e2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 24 12:17:14 2009 +0100
description:
Get ASSERT right.

details:   /home/hgfiles/gmp/rev/849ad411525a
changeset: 13214:849ad411525a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 24 23:12:44 2009 +0100
description:
Trivial merge.

details:   /home/hgfiles/gmp/rev/eba295d509c7
changeset: 13215:eba295d509c7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 24 23:34:31 2009 +0100
description:
Remove declarations of some unused speed_* variables.

details:   /home/hgfiles/gmp/rev/7443ba3eacc7
changeset: 13216:7443ba3eacc7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 24 23:42:19 2009 +0100
description:
Completely respin tuning of mod_1 function family.

diffstat:

 ChangeLog             |   31 +++++++++++++++
 gmp-impl.h            |   91 +++++++++++++++++++++++++------------------
 mpn/Makefile.am       |   10 +++-
 mpn/generic/mod_1.c   |   24 ++++++----
 mpn/generic/mod_1_2.c |    2 +-
 mpn/generic/mod_1_3.c |    2 +-
 mpn/generic/mod_1_4.c |    2 +-
 mpn/generic/mul_n.c   |   36 +++++++++--------
 mpn/generic/perfsqr.c |   24 +++++-----
 mpn/generic/sqr_n.c   |   36 +++++++++--------
 tune/mod_1_div.c      |    4 +
 tune/mod_1_inv.c      |    4 +
 tune/speed.h          |    2 -
 tune/tuneup.c         |  103 ++++++++++++++++++++++++++++++++++++++++---------
 14 files changed, 249 insertions(+), 122 deletions(-)

diffs (truncated from 703 to 300 lines):

diff -r 4689c4513d05 -r 7443ba3eacc7 ChangeLog

--- a/ChangeLog	Thu Dec 24 11:09:20 2009 +0100
+++ b/ChangeLog	Thu Dec 24 23:42:19 2009 +0100
@@ -1,5 +1,36 @@
 2009-12-24  Torbjorn Granlund  <tege at gmplib.org>
 
+	* tune/mod_1_div.c (MOD_1N_TO_MOD_1_1_THRESHOLD,
+	(MOD_1U_TO_MOD_1_1_THRESHOLD): Set.
+	* tune/mod_1_inv.c (MOD_1N_TO_MOD_1_1_THRESHOLD,
+	(MOD_1U_TO_MOD_1_1_THRESHOLD): Set.
+
+	* gmp-impl.h (USE_PREINV_MOD_1): Remove.
+	(MPN_MOD_OR_PREINV_MOD_1): Define to choose functions dynamically in
+	terms of PREINV_MOD_1_TO_MOD_1_THRESHOLD (used to choose statically
+	using USE_PREINV_MOD_1).
+	* mpn/generic/perfsqr.c (PERFSQR_MOD_PP): Corresponding updates.
+
+	* tune/tuneup.c (tune_mod_1): Rewrite.
+	* gmp-impl.h (MOD_1N_TO_MOD_1_1_THRESHOLD): New.
+	(MOD_1U_TO_MOD_1_1_THRESHOLD): New name for MOD_1_1_THRESHOLD.
+	(MOD_1_1_TO_MOD_1_2_THRESHOLD): Mew name for MOD_1_2_THRESHOLD.
+	(MOD_1_2_TO_MOD_1_4_THRESHOLD): New name for MOD_1_4_THRESHOLD.
+	* mpn/generic/mod_1.c: Corresponding updates.
+
+2009-12-24  Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/generic/mul_n.c: Use also toom6h and toom8h.
+	* mpn/generic/sqr_n.c: Use also toom6 and toom8.
+	* gmp-impl.h: Initial support for tuning of Toom-6half and Toom-8half.
+	* tune/tuneup.c: Tune Toom-6half and Toom-8half thresholds.
+
+2009-12-24  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/mod_1_4.c: Get ASSERT right.
+	* mpn/generic/mod_1_3.c: Likewise.
+	* mpn/generic/mod_1_2.c: Likewise.
+
 	* mpn/generic/powm_sec.c: Use SQR_TOOM2_THRESHOLD as limit for a native
 	mpn_sqr_basecase, not TUNE_SQR_TOOM2_MAX.
 
diff -r 4689c4513d05 -r 7443ba3eacc7 gmp-impl.h
--- a/gmp-impl.h	Thu Dec 24 11:09:20 2009 +0100
+++ b/gmp-impl.h	Thu Dec 24 23:42:19 2009 +0100
@@ -589,7 +589,6 @@
 #undef MOD_1_NORM_THRESHOLD
 #undef MOD_1_UNNORM_THRESHOLD
 #undef USE_PREINV_DIVREM_1
-#undef USE_PREINV_MOD_1
 #undef DIVREM_2_THRESHOLD
 #undef DIVEXACT_1_THRESHOLD
 #undef MODEXACT_1_ODD_THRESHOLD
@@ -598,7 +597,6 @@
 #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
 #define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
 #define USE_PREINV_DIVREM_1               0  /* no preinv */
-#define USE_PREINV_MOD_1                  0  /* no preinv */
 #define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
 
 /* mpn/generic/mul_fft.c is not nails-capable. */
@@ -1035,10 +1033,6 @@
        && (size) >= (thresh)))
 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
 
-#if WANT_FFT
-#define MPN_TOOM44_MAX_N 285405
-#endif /* WANT_FFT */
-
 #define MPN_TOOM22_MUL_MINSIZE    4
 #define MPN_TOOM2_SQR_MINSIZE     4
 
@@ -2753,7 +2747,7 @@
 
 
    NOTE: Output variables are updated multiple times. Only some inputs
-   and outputs may overlap.                                              
+   and outputs may overlap.
 */
 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
   do {									\
@@ -2789,16 +2783,12 @@
 #endif
 
 
-/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
-   the plain mpn_divrem_1.  Likewise USE_PREINV_MOD_1 chooses between
-   mpn_preinv_mod_1 and plain mpn_mod_1.  The default for both is yes, since
-   the few CISC chips where preinv is not good have defines saying so.  */
+/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
+   plain mpn_divrem_1.  The default is yes, since the few CISC chips where
+   preinv is not good have defines saying so.  */
 #ifndef USE_PREINV_DIVREM_1
 #define USE_PREINV_DIVREM_1   1
 #endif
-#ifndef USE_PREINV_MOD_1
-#define USE_PREINV_MOD_1   1
-#endif
 
 #if USE_PREINV_DIVREM_1
 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
@@ -2808,13 +2798,16 @@
   mpn_divrem_1 (qp, xsize, ap, size, d)
 #endif
 
-#if USE_PREINV_MOD_1
-#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
-  mpn_preinv_mod_1 (src, size, divisor, inverse)
-#else
-#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
-  mpn_mod_1 (src, size, divisor)
-#endif
+#ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
+#endif
+
+/* This selection may seem backwards.  The reason mpn_mod_1 typically takes
+   over for larger sizes is that it uses the mod_1_1 function.  */
+#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       	\
+  (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD)		\
+   ? mpn_preinv_mod_1 (src, size, divisor, inverse)			\
+   : mpn_mod_1 (src, size, divisor))
 
 
 #ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
@@ -4207,6 +4200,14 @@
 #define MUL_TOOM44_THRESHOLD         mul_toom44_threshold
 extern mp_size_t                     mul_toom44_threshold;
 
+#undef  MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD         mul_toom6h_threshold
+extern mp_size_t                     mul_toom6h_threshold;
+
+#undef  MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD         mul_toom8h_threshold
+extern mp_size_t                     mul_toom8h_threshold;
+
 #undef  MUL_TOOM32_TO_TOOM43_THRESHOLD
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
 extern mp_size_t                       mul_toom32_to_toom43_threshold;
@@ -4259,6 +4260,14 @@
 #define SQR_TOOM4_THRESHOLD          sqr_toom4_threshold
 extern mp_size_t                     sqr_toom4_threshold;
 
+#undef  SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD          sqr_toom6_threshold
+extern mp_size_t                     sqr_toom6_threshold;
+
+#undef  SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD          sqr_toom8_threshold
+extern mp_size_t                     sqr_toom8_threshold;
+
 #undef SQR_FFT_THRESHOLD
 #define SQR_FFT_THRESHOLD            sqr_fft_threshold
 extern mp_size_t                     sqr_fft_threshold;
@@ -4369,29 +4378,29 @@
 #define DIVREM_1_UNNORM_THRESHOLD    divrem_1_unnorm_threshold
 extern mp_size_t                     divrem_1_unnorm_threshold;
 
-#undef MOD_1_NORM_THRESHOLD
+#undef  MOD_1_NORM_THRESHOLD
 #define MOD_1_NORM_THRESHOLD         mod_1_norm_threshold
 extern mp_size_t                     mod_1_norm_threshold;
 
-#undef MOD_1_UNNORM_THRESHOLD
+#undef  MOD_1_UNNORM_THRESHOLD
 #define MOD_1_UNNORM_THRESHOLD       mod_1_unnorm_threshold
 extern mp_size_t                     mod_1_unnorm_threshold;
 
-#undef MOD_1_1_THRESHOLD
-#define MOD_1_1_THRESHOLD            mod_1_1_threshold
-extern mp_size_t                     mod_1_1_threshold;
-
-#undef MOD_1_2_THRESHOLD
-#define MOD_1_2_THRESHOLD            mod_1_2_threshold
-extern mp_size_t                     mod_1_2_threshold;
-
-#undef MOD_1_3_THRESHOLD
-#define MOD_1_3_THRESHOLD            mod_1_3_threshold
-extern mp_size_t                     mod_1_3_threshold;
-
-#undef MOD_1_4_THRESHOLD
-#define MOD_1_4_THRESHOLD            mod_1_4_threshold
-extern mp_size_t                     mod_1_4_threshold;
+#undef  MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD  mod_1n_to_mod_1_1_threshold
+extern mp_size_t                     mod_1n_to_mod_1_1_threshold;
+
+#undef  MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD  mod_1u_to_mod_1_1_threshold
+extern mp_size_t                     mod_1u_to_mod_1_1_threshold;
+
+#undef  MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold
+extern mp_size_t                     mod_1_1_to_mod_1_2_threshold;
+
+#undef  MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold
+extern mp_size_t                     mod_1_2_to_mod_1_4_threshold;
 
 #if ! UDIV_PREINV_ALWAYS
 #undef  DIVREM_2_THRESHOLD
@@ -4442,6 +4451,10 @@
 #define SQR_TOOM3_THRESHOLD_LIMIT       400
 #define MUL_TOOM44_THRESHOLD_LIMIT     1000
 #define SQR_TOOM4_THRESHOLD_LIMIT      1000
+#define MUL_TOOM6H_THRESHOLD_LIMIT     1100
+#define SQR_TOOM6_THRESHOLD_LIMIT      1100
+#define MUL_TOOM8H_THRESHOLD_LIMIT     1200
+#define SQR_TOOM8_THRESHOLD_LIMIT      1200
 #define MULLO_BASECASE_THRESHOLD_LIMIT  200
 #define GET_STR_THRESHOLD_LIMIT         150
 
@@ -4478,7 +4491,7 @@
 #define mpn_toom4_sqr_itch(an) \
   (3 * (an) + GMP_NUMB_BITS)
 
-#define mpn_toom6_sqr_itch(n)					\
+#define mpn_toom6_sqr_itch(n)						\
 ( ((n) - MUL_TOOM6H_THRESHOLD)*2 +					\
    MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,			\
        mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
diff -r 4689c4513d05 -r 7443ba3eacc7 mpn/Makefile.am
--- a/mpn/Makefile.am	Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/Makefile.am	Thu Dec 24 23:42:19 2009 +0100
@@ -36,7 +36,11 @@
   addmul_7.c addmul_8.c							    \
   and_n.c andn_n.c							    \
   cmp.c com_n.c copyd.c copyi.c						    \
-  dc_divrem_n.c dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c   \
+  dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c		    \
+  sbpi1_bdiv_qr.c sbpi1_bdiv_q.c					    \
+  sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c			    \
+  dcpi1_bdiv_qr.c dcpi1_bdiv_q.c					    \
+  dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c			    \
   dump.c fib2_ui.c gcd.c						    \
   gcd_1.c gcdext.c get_d.c get_str.c					    \
   hamdist.c hgcd2.c hgcd.c invert_limb.c				    \
@@ -50,7 +54,7 @@
   toom44_mul.c								    \
   toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c			    \
   toom_couple_handling.c						    \
-  sqr_toom2.c sqr_toom3.c sqr_toom4.c					    \
+  toom2_sqr.c toom3_sqr.c toom4_sqr.c					    \
   toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c				    \
   toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c    \
   toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c   \
@@ -58,7 +62,7 @@
   invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c		    \
   mullo_n.c mullo_basecase.c nand_n.c neg_n.c nior_n.c perfsqr.c	    \
   popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
-  rootrem.c sb_divrem_mn.c scan0.c scan1.c set_str.c			    \
+  rootrem.c scan0.c scan1.c set_str.c			    \
   sqr_basecase.c sqr_diagonal.c						    \
   sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c				    \
   tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
diff -r 4689c4513d05 -r 7443ba3eacc7 mpn/generic/mod_1.c
--- a/mpn/generic/mod_1.c	Thu Dec 24 11:09:20 2009 +0100
+++ b/mpn/generic/mod_1.c	Thu Dec 24 23:42:19 2009 +0100
@@ -42,16 +42,20 @@
 #define MOD_1_UNNORM_THRESHOLD  0
 #endif
 
-#ifndef MOD_1_1_THRESHOLD
-#define MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
 #endif
 
-#ifndef MOD_1_2_THRESHOLD
-#define MOD_1_2_THRESHOLD  10
+#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
+#define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
 #endif
 
-#ifndef MOD_1_4_THRESHOLD
-#define MOD_1_4_THRESHOLD  120
+#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
+#endif
+
+#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
 #endif
 
 
@@ -207,7 +211,7 @@
 
   if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
     {
-      if (BELOW_THRESHOLD (n, MOD_1_1_THRESHOLD))
+      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
 	{
 	  return mpn_mod_1_norm (ap, n, b);
 	}
@@ -220,17 +224,17 @@
     }
   else
     {
-      if (BELOW_THRESHOLD (n, MOD_1_1_THRESHOLD))
+      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
 	{