[Gmp-commit] /var/hg/gmp: Tune fac_ui thresholds.

mercurial at gmplib.org mercurial at gmplib.org
Sat Dec 31 10:21:24 CET 2011


details:   /var/hg/gmp/rev/e82b232a5764
changeset: 14555:e82b232a5764
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Dec 31 10:21:18 2011 +0100
description:
Tune fac_ui thresholds.

diffstat:

 ChangeLog        |  14 ++++++++++++++
 gmp-impl.h       |  17 +++++++++++++++++
 mpz/fac_ui.c     |  11 -----------
 tune/Makefile.am |  12 +++++++++---
 tune/tuneup.c    |  33 ++++++++++++++++++++++++++++++++-
 5 files changed, 72 insertions(+), 15 deletions(-)

diffs (208 lines):

diff -r 10f35b73be0d -r e82b232a5764 ChangeLog
--- a/ChangeLog	Fri Dec 30 22:10:14 2011 +0100
+++ b/ChangeLog	Sat Dec 31 10:21:18 2011 +0100
@@ -1,3 +1,17 @@
+2011-12-31 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* tune/Makefile.am (fac_ui.c): New target.
+	(nodist_tuneup_SOURCES,CLEANFILES): Add fac_ui.c.
+	* tune/tuneup.c (mpz_fac_ui_tune): Declare prototype.
+	(fac_odd_threshold,fac_dsc_threshold): New global variables.
+	(speed_mpz_fac_ui_tune,tune_fac_ui): New functions.
+	(all): Call tune_fac_ui.
+	* gmp-impl.h (FAC_ODD_THRESHOLD,FAC_DSC_THRESHOLD):
+	New thresholds: default values, and setup for tuning.
+	(FAC_DSC_THRESHOLD_LIMIT): Define (when tuning).
+	* mpz/fac_ui.h (FAC_ODD_THRESHOLD,FAC_DSC_THRESHOLD):
+	Default values removed.
+
 2011-12-30  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpz/hamdist.c: Fix typo in a return statement.
diff -r 10f35b73be0d -r e82b232a5764 gmp-impl.h
--- a/gmp-impl.h	Fri Dec 30 22:10:14 2011 +0100
+++ b/gmp-impl.h	Sat Dec 31 10:21:18 2011 +0100
@@ -2148,6 +2148,14 @@
 #define SET_STR_PRECOMPUTE_THRESHOLD   2000
 #endif
 
+#ifndef FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD   100
+#endif
+
+#ifndef FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD   300
+#endif
+
 /* Return non-zero if xp,xsize and yp,ysize overlap.
    If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
    overlap.  If both these are false, there's an overlap. */
@@ -4805,6 +4813,14 @@
 #define SET_STR_PRECOMPUTE_THRESHOLD	set_str_precompute_threshold
 extern mp_size_t			set_str_precompute_threshold;
 
+#undef  FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD		fac_odd_threshold
+extern  mp_size_t     			fac_odd_threshold;
+
+#undef  FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD		fac_dsc_threshold
+extern  mp_size_t 			fac_dsc_threshold;
+
 #undef  FFT_TABLE_ATTRS
 #define FFT_TABLE_ATTRS
 extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
@@ -4828,6 +4844,7 @@
 #define SQR_TOOM8_THRESHOLD_LIMIT      1200
 #define MULLO_BASECASE_THRESHOLD_LIMIT  200
 #define GET_STR_THRESHOLD_LIMIT         150
+#define FAC_DSC_THRESHOLD_LIMIT        2048
 
 #endif /* TUNE_PROGRAM_BUILD */
 
diff -r 10f35b73be0d -r e82b232a5764 mpz/fac_ui.c
--- a/mpz/fac_ui.c	Fri Dec 30 22:10:14 2011 +0100
+++ b/mpz/fac_ui.c	Sat Dec 31 10:21:18 2011 +0100
@@ -432,13 +432,7 @@
 /* Section oddfac: odd factorial, needed also by binomial*/
 /*********************************************************/
 
-/* FIXME: should be tuned */
-#ifndef FAC_DSC_THRESHOLD
-#define FAC_DSC_THRESHOLD 210
-#endif
-
 #if TUNE_PROGRAM_BUILD
-#define FAC_DSC_THRESHOLD_LIMIT 1024
 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
 #else
 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
@@ -507,11 +501,6 @@
 /* Section factorial: fast factorial implementations     */
 /*********************************************************/
 
-/* FIXME: should be tuned */
-#ifndef FAC_ODD_THRESHOLD
-#define FAC_ODD_THRESHOLD 93
-#endif
-
 /* mpz_dsc_oddfac_1 computes the odd part of the factorial of the parameter n.
    I.e. n! = x 2^a, the result x is an odd positive integer.
 
diff -r 10f35b73be0d -r e82b232a5764 tune/Makefile.am
--- a/tune/Makefile.am	Fri Dec 30 22:10:14 2011 +0100
+++ b/tune/Makefile.am	Sat Dec 31 10:21:18 2011 +0100
@@ -1,6 +1,7 @@
 ## Process this file with automake to generate Makefile.in
 
-# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
 #
 # This file is part of the GNU MP Library.
 #
@@ -83,7 +84,7 @@
 speed_ext_LDFLAGS = $(STATIC)
 
 tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
 tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
 tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
 tuneup_LDFLAGS = $(STATIC)
@@ -101,7 +102,7 @@
 
 # $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
 CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
-	$(TUNE_MPN_SRCS) sqr_asm.asm \
+	$(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
 	stg.gnuplot stg.data \
 	mtg.gnuplot mtg.data \
 	fibg.gnuplot fibg.data \
@@ -158,5 +159,10 @@
 	echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
 	echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
 
+# FIXME: Should it depend on $(top_builddir)/mpz/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+	echo "#define TUNE_PROGRAM_BUILD 1"          >fac_ui.c
+	echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+	echo "#include \"mpz/fac_ui.c\""             >>fac_ui.c
 
 include ../mpn/Makeasm.am
diff -r 10f35b73be0d -r e82b232a5764 tune/tuneup.c
--- a/tune/tuneup.c	Fri Dec 30 22:10:14 2011 +0100
+++ b/tune/tuneup.c	Sat Dec 31 10:21:18 2011 +0100
@@ -214,6 +214,8 @@
 mp_size_t  get_str_precompute_threshold = MP_SIZE_T_MAX;
 mp_size_t  set_str_dc_threshold         = MP_SIZE_T_MAX;
 mp_size_t  set_str_precompute_threshold = MP_SIZE_T_MAX;
+mp_size_t  fac_odd_threshold            = MP_SIZE_T_MAX;
+mp_size_t  fac_dsc_threshold            = FAC_DSC_THRESHOLD_LIMIT;
 
 mp_size_t  fft_modf_sqr_threshold = MP_SIZE_T_MAX;
 mp_size_t  fft_modf_mul_threshold = MP_SIZE_T_MAX;
@@ -366,12 +368,15 @@
 }
 
 
-/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */
+/* Measuring for recompiled mpn/generic/divrem_1.c, mpn/generic/mod_1.c
+ * and mpz/fac_ui.c */
 
 mp_limb_t mpn_divrem_1_tune
   __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
 mp_limb_t mpn_mod_1_tune
    __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+void mpz_fac_ui_tune
+   __GMP_PROTO ((mpz_ptr, unsigned long));
 
 double
 speed_mpn_mod_1_tune (struct speed_params *s)
@@ -383,6 +388,11 @@
 {
   SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
 }
+double
+speed_mpz_fac_ui_tune (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
+}
 
 
 double
@@ -2612,6 +2622,24 @@
 }
 
 void
+tune_fac_ui (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpz_fac_ui_tune;
+
+  param.name = "FAC_ODD_THRESHOLD";
+  param.min_size = 3;
+  param.min_is_always = 0;
+  one (&fac_odd_threshold, &param);
+
+  param.name = "FAC_DSC_THRESHOLD";
+  param.min_size = MAX (32, fac_odd_threshold);
+  param.max_size = FAC_DSC_THRESHOLD_LIMIT;
+  one (&fac_dsc_threshold, &param);
+}
+
+void
 all (void)
 {
   time_t  start_time, end_time;
@@ -2747,6 +2775,9 @@
   tune_set_str ();
   printf("\n");
 
+  tune_fac_ui ();
+  printf("\n");
+
   time (&end_time);
   printf ("/* Tuneup completed successfully, took %ld seconds */\n",
           (long) (end_time - start_time));


More information about the gmp-commit mailing list