[Gmp-commit] /home/hgfiles/gmp: 5 new changesets

Thu Dec 17 15:23:31 CET 2009

details:   /home/hgfiles/gmp/rev/fc2c984642b3
changeset: 13106:fc2c984642b3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 17 14:36:34 2009 +0100
description:
More files added.

details:   /home/hgfiles/gmp/rev/34a6709c3145
changeset: 13107:34a6709c3145
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 17 15:08:25 2009 +0100
description:
Trivial merge.

details:   /home/hgfiles/gmp/rev/694b9fec1869
changeset: 13108:694b9fec1869
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 17 15:10:37 2009 +0100
description:
Renew default values for all THRESHOLDs.

details:   /home/hgfiles/gmp/rev/6bf655072bec
changeset: 13109:6bf655072bec
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 17 15:16:59 2009 +0100
description:
Update References section.  Update Contributors section.  Misc updates.

details:   /home/hgfiles/gmp/rev/054bd0e275bf
changeset: 13110:054bd0e275bf
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Dec 17 15:23:26 2009 +0100
description:
Trivial merge.

diffstat:

 AUTHORS                      |    8 ++-
 ChangeLog                    |   31 +++++++++++-
 configure.in                 |    3 +-
 doc/gmp.texi                 |   45 ++++++++++++------
 gmp-impl.h                   |   77 ++++++++++++++++---------------
 mpn/generic/dcpi1_div_qr.c   |  103 ++++++++++++++++++++++++++++++++----------
 mpn/generic/nussbaumer_mul.c |   53 ++++++++++++++++++++++
 tune/Makefile.am             |    2 +-
 tune/common.c                |    7 ++
 tune/speed.c                 |    1 +
 tune/speed.h                 |    1 +
 11 files changed, 242 insertions(+), 89 deletions(-)

diffs (truncated from 605 to 300 lines):

diff -r 654a87b4c936 -r 054bd0e275bf AUTHORS

--- a/AUTHORS	Thu Dec 17 09:06:08 2009 +0100
+++ b/AUTHORS	Thu Dec 17 15:23:26 2009 +0100
@@ -34,12 +34,14 @@
 			gcdext_subdiv_step.c, gcdext_lehmer.c,
 			toom_interpolate_7pts, mulmod_bnm1.c, dcpi1_bdiv_qr.c,
 			dcpi1_bdiv_q.c, sbpi1_bdiv_qr.c, sbpi1_bdiv_q.c,
-			mpz/nextprime.c.
+			toom_eval_dgr3_pm1.c, toom_eval_dgr3_pm2.c,
+			toom_eval_pm1.c, toom_eval_pm2.c, toom_eval_pm2exp.c,
+			divexact.c, mpz/nextprime.c.
 
 Marco Bodrato		mpn/generic/toom44_mul.c, toom4_sqr.c, toom53_mul.c,
 			toom62_mul.c, toom43_mul.c, toom52_mul.c,
-			toom_interpolate_6pts.c, mulmod_bnm1.c, invert.c,
-			invertappr.c.
+			toom_interpolate_6pts.c, mulmod_bnm1.c,
+			toom_eval_pm2.c, invert.c, invertappr.c.
 
 David Harvey		mpn/x86_64/mul_basecase.asm
 
diff -r 654a87b4c936 -r 054bd0e275bf ChangeLog
--- a/ChangeLog	Thu Dec 17 09:06:08 2009 +0100
+++ b/ChangeLog	Thu Dec 17 15:23:26 2009 +0100
@@ -1,10 +1,33 @@
+2009-12-17  Torbjorn Granlund  <tege at gmplib.org>
+
+	* doc/gmp.texi: Update References section.  Update Contributors
+	section.  Misc updates.
+
+	* gmp-impl.h: Renew default values for all THRESHOLDs.
+
+2009-12-17  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/generic/dcpi1_div_qr.c (mpn_dcpi1_div_qr): Added some input
+	asserts.
+
+	* mpn/generic/dcpi1_div_qr.c (mpn_dcpi1_div_qr): In the case that
+	the initial quotient block is a single limb, use 3/2 division,
+	thereby eliminating the only use of gmp_pi1_t->inv21.
+
 2009-12-15  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/invert.c: Added some comment.
 	* mpn/generic/invertappr.c: Slightly better threshold handling.
-
 	* gmp-impl.h (INV_NEWTON_THRESHOLD): Default to 200.
 
+	* mpn/generic/nussbaumer_mul.c: New file.
+	* configure.in (gmp_mpn_functions): Add nussbaumer_mul.
+	* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add nussbaumer_mul.
+	* gmp-impl.h (mpn_nussbaumer_mul): Added prototype and name-mangling.
+	* tune/speed.h (speed_mpn_nussbaumer_mul): Declare function.
+	* tune/common.c (speed_mpn_nussbaumer_mul): New function.
+	* tune/speed.c (routine): Add speed_mpn_nussbaumer_mul.
+
 2009-12-17  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/bdiv_q.c (mpn_bdiv_q_itch): Rewrite.
@@ -119,7 +142,7 @@
 	(tune_invertappr): Min for INV_NEWTON_THRESHOLD.
 
 	* tune/speed.h (SPEED_ROUTINE_MPN_NI_INVERTAPPR): New macro.
-	(speed_mpn_ni_invertappr): New function.
+	(speed_mpn_ni_invertappr): Declare function.
 	* tune/common.c (speed_mpn_ni_invertappr): New function.
 	* tune/speed.c (routine): Add speed_mpn_ni_invertappr.
 
@@ -140,7 +163,7 @@
 	(mpn_invertappr_itch): Added prototype and name-mangling.
 	(INV_APPR_THRESHOLD): Support for a new tunable const.
 	* tune/speed.h (SPEED_ROUTINE_MPN_INVERTAPPR): New macro.
-	(speed_mpn_invertappr): New function.
+	(speed_mpn_invertappr): Declare function.
 	* tune/common.c (speed_mpn_invertappr): New function.
 	* tune/speed.c (routine): Add speed_mpn_invertappr.
 	* tune/tuneup.c (tune_invertappr): New function: was tune_invert.
@@ -280,7 +303,7 @@
 
 2009-12-06  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
-	* tests/devel/try.c(try_one): DATA_SRC0_HIGHBIT sets the high bit.
+	* tests/devel/try.c (try_one): DATA_SRC0_HIGHBIT sets the high bit.
 
 2009-12-05  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
diff -r 654a87b4c936 -r 054bd0e275bf configure.in
--- a/configure.in	Thu Dec 17 09:06:08 2009 +0100
+++ b/configure.in	Thu Dec 17 15:23:26 2009 +0100
@@ -2481,7 +2481,8 @@
   submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2     \
   fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump		   \
   mod_1_1 mod_1_2 mod_1_3 mod_1_4					   \
-  mul mul_fft mul_n sqr_n mul_basecase sqr_basecase random random2 pow_1   \
+  mul mul_fft mul_n sqr_n mul_basecase sqr_basecase nussbaumer_mul	   \
+  random random2 pow_1							   \
   rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp	   \
   perfsqr perfpow							   \
   bdivmod gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step		   \
diff -r 654a87b4c936 -r 054bd0e275bf doc/gmp.texi
--- a/doc/gmp.texi	Thu Dec 17 09:06:08 2009 +0100
+++ b/doc/gmp.texi	Thu Dec 17 15:23:26 2009 +0100
@@ -2998,7 +2998,7 @@
 @end deftypefun
 
 @deftypefun void mpz_init2 (mpz_t @var{x}, unsigned long @var{n})
-Initialize @var{x}, with space for @var{n} bit numbers, and set its value to 0.
+Initialize @var{x}, with space for @var{n}-bit numbers, and set its value to 0.
 Calling this function instead of @code{mpz_init} or @code{mpz_inits} is never
 necessary; reallocation is handled automatically by GMP when needed.
 
@@ -4190,8 +4190,8 @@
 @end deftypefun
 
 @deftypefun void mpq_clear (mpq_t @var{x})
-Free the space occupied by @var{x}.  Make sure to call this
-function for all @code{mpq_t} variables when you are done with them.
+Free the space occupied by @var{x}.  Make sure to call this function for all
+ at code{mpq_t} variables when you are done with them.
 @end deftypefun
 
 @deftypefun void mpq_clears (mpq_t @var{x}, ...)
@@ -5169,8 +5169,7 @@
 product's most significant limb is zero.  No overlap is permitted between the
 destination and either source.
 
-This function requires that @var{s1n} is greater than or equal to
- at var{s2n}.
+This function requires that @var{s1n} is greater than or equal to @var{s2n}.
 @end deftypefun
 
 @deftypefun void mpn_sqr_n (mp_limb_t *@var{rp}, const mp_limb_t *@var{s1p}, mp_size_t @var{n})
@@ -8154,14 +8153,14 @@
 high to low, either with a hardware divide instruction or a multiplication by
 inverse, whichever is best on a given CPU.
 
-The multiply by inverse follows section 8 of ``Division by Invariant Integers
-using Multiplication'' by Granlund and Montgomery (@pxref{References}) and is
-implemented as @code{udiv_qrnnd_preinv} in @file{gmp-impl.h}.  The idea is to
-have a fixed-point approximation to @math{1/d} (see @code{invert_limb}) and
-then multiply by the high limb (plus one bit) of the dividend to get a
-quotient @math{q}.  With @math{d} normalized (high bit set), @math{q} is no
-more than 1 too small.  Subtracting @m{qd,q*d} from the dividend gives a
-remainder, and reveals whether @math{q} or @math{q-1} is correct.
+The multiply by inverse follows ``Improved division by invariant integers'' by
+M@"oller and Granlund (@pxref{References}) and is implemented as
+ at code{udiv_qrnnd_preinv} in @file{gmp-impl.h}.  The idea is to have a
+fixed-point approximation to @math{1/d} (see @code{invert_limb}) and then
+multiply by the high limb (plus one bit) of the dividend to get a quotient
+ at math{q}.  With @math{d} normalized (high bit set), @math{q} is no more than 1
+too small.  Subtracting @m{qd,q*d} from the dividend gives a remainder, and
+reveals whether @math{q} or @math{q-1} is correct.
 
 The result is a division done with two multiplications and four or five
 arithmetic operations.  On CPUs with low latency multipliers this can be much
@@ -10399,6 +10398,11 @@
 Analytic Number Theory and Computational Complexity'', Wiley, 1998.
 
 @item
+Richard Crandall and Carl Pomerance, ``Prime Numbers: A Computational
+Perspective'', 2nd edition, Springer-Verlag, 2005.
+ at texlinebreak{} @uref{http://math.dartmouth.edu/~carlp/}
+
+ at item
 Henri Cohen, ``A Course in Computational Algebraic Number Theory'', Graduate
 Texts in Mathematics number 138, Springer-Verlag, 1993.
 @texlinebreak{} @uref{http://www.math.u-bordeaux.fr/~cohen/}
@@ -10417,9 +10421,10 @@
 Applied Cryptography'', @uref{http://www.cacr.math.uwaterloo.ca/hac/}
 
 @item
-Richard M. Stallman, ``Using and Porting GCC'', Free Software Foundation, 1999,
-available online @uref{http://gcc.gnu.org/onlinedocs/}, and in
-the GCC package @uref{ftp://ftp.gnu.org/gnu/gcc/}
+Richard M. Stallman and the GCC Developer Community, ``Using the GNU Compiler
+Collection'', Free Software Foundation, 2008, available online
+ at uref{http://gcc.gnu.org/onlinedocs/}, and in the GCC package
+ at uref{ftp://ftp.gnu.org/gnu/gcc/}
 @end itemize
 
 @section Papers
@@ -10443,6 +10448,14 @@
 (and .psl.gz).
 
 @item
+Niels M"oller and Torbj@"orn Granlund, ``Improved division by invariant
+integers'', to appear.
+
+ at item
+Torbj@"orn Granlund and Niels M"oller, ``Division of integers large and
+small'', to appear.
+
+ at item
 Tudor Jebelean,
 ``An algorithm for exact division'',
 Journal of Symbolic Computation,
diff -r 654a87b4c936 -r 054bd0e275bf gmp-impl.h
--- a/gmp-impl.h	Thu Dec 17 09:06:08 2009 +0100
+++ b/gmp-impl.h	Thu Dec 17 15:23:26 2009 +0100
@@ -1119,6 +1119,9 @@
 #define   mpn_mul_fft_full __MPN(mul_fft_full)
 __GMP_DECLSPEC void      mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
 
+#define   mpn_nussbaumer_mul __MPN(nussbaumer_mul)
+__GMP_DECLSPEC void      mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
 #define   mpn_fft_next_size __MPN(fft_next_size)
 __GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
 
@@ -1624,27 +1627,22 @@
 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
 
 
-/* If MUL_TOOM22_THRESHOLD is not already defined, define it to a
-   value which is good on most machines.  */
 #ifndef MUL_TOOM22_THRESHOLD
-#define MUL_TOOM22_THRESHOLD 32
+#define MUL_TOOM22_THRESHOLD             30
 #endif
 
-/* If MUL_TOOM33_THRESHOLD is not already defined, define it to a
-   value which is good on most machines.  */
 #ifndef MUL_TOOM33_THRESHOLD
-#define MUL_TOOM33_THRESHOLD 128
+#define MUL_TOOM33_THRESHOLD            100
 #endif
 
 #ifndef MUL_TOOM44_THRESHOLD
-#define MUL_TOOM44_THRESHOLD 500
+#define MUL_TOOM44_THRESHOLD            300
 #endif
 
-/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.
-   In a normal build MUL_TOOM22_THRESHOLD is a constant and we use that.
-   In a fat binary or tune program build MUL_TOOM22_THRESHOLD is a
-   variable and a separate hard limit will have been defined.  Similarly for
-   TOOM3.  */
+/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
+   normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
+   binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
+   separate hard limit will have been defined.  Similarly for TOOM3.  */
 #ifndef MUL_TOOM22_THRESHOLD_LIMIT
 #define MUL_TOOM22_THRESHOLD_LIMIT  MUL_TOOM22_THRESHOLD
 #endif
@@ -1656,30 +1654,29 @@
 #endif
 
 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
-   mpn_mul_basecase in mpn_sqr_n.  Default is to use mpn_sqr_basecase
-   always.  (Note that we certainly always want it if there's a native
-   assembler mpn_sqr_basecase.)
+   mpn_mul_basecase.  Default is to use mpn_sqr_basecase from 0.  (Note that we
+   certainly always want it if there's a native assembler mpn_sqr_basecase.)
 
    If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
-   before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
-   toom2 threshold and SQR_TOOM2_THRESHOLD is 0.  This oddity arises
-   more or less because SQR_TOOM2_THRESHOLD represents the size up to
-   which mpn_sqr_basecase should be used, and that may be never.  */
+   before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
+   threshold and SQR_TOOM2_THRESHOLD is 0.  This oddity arises more or less
+   because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
+   should be used, and that may be never.  */
 
 #ifndef SQR_BASECASE_THRESHOLD
-#define SQR_BASECASE_THRESHOLD 0
+#define SQR_BASECASE_THRESHOLD            0
 #endif
 
 #ifndef SQR_TOOM2_THRESHOLD
-#define SQR_TOOM2_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
+#define SQR_TOOM2_THRESHOLD              50
 #endif
 
 #ifndef SQR_TOOM3_THRESHOLD
-#define SQR_TOOM3_THRESHOLD 128
+#define SQR_TOOM3_THRESHOLD             120
 #endif
 
 #ifndef SQR_TOOM4_THRESHOLD
-#define SQR_TOOM4_THRESHOLD 500
+#define SQR_TOOM4_THRESHOLD             400
 #endif
 
 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT.  */
@@ -1688,66 +1685,70 @@
 #endif
 
 #ifndef DC_DIVAPPR_Q_THRESHOLD
-#define DC_DIVAPPR_Q_THRESHOLD   208
+#define DC_DIVAPPR_Q_THRESHOLD          200
 #endif
 
+#ifndef DC_DIV_QR_THRESHOLD
+#define DC_DIV_QR_THRESHOLD              50
+#endif
+
 #ifndef DC_DIV_Q_THRESHOLD
-#define DC_DIV_Q_THRESHOLD       228
+#define DC_DIV_Q_THRESHOLD              228