[Gmp-commit] /home/hgfiles/gmp: 4 new changesets

Tue Jan 12 00:18:29 CET 2010

details:   /home/hgfiles/gmp/rev/326b78c81569
changeset: 13363:326b78c81569
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Jan 11 23:53:07 2010 +0100
description:
Fix typos.

details:   /home/hgfiles/gmp/rev/8963f9df7de4
changeset: 13364:8963f9df7de4
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jan 12 00:00:21 2010 +0100
description:
Declare mpn_dcpi1* and mpn_scpi1* size restrictions.

details:   /home/hgfiles/gmp/rev/a8d8fb5c86ac
changeset: 13365:a8d8fb5c86ac
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jan 12 00:01:16 2010 +0100
description:
*** empty log message ***

details:   /home/hgfiles/gmp/rev/bc2c19e257f1
changeset: 13366:bc2c19e257f1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jan 12 00:18:22 2010 +0100
description:
Fix fat build problems.

diffstat:

 ChangeLog          |   16 +++++++
 doc/gmp.texi       |   12 ++--
 mpn/generic/powm.c |  121 ++++++++++++++++++++++++++++++++++------------------
 tune/common.c      |    8 +-
 tune/speed.c       |    4 +-
 tune/speed.h       |    6 +-
 6 files changed, 110 insertions(+), 57 deletions(-)

diffs (truncated from 346 to 300 lines):

diff -r a9d6e23c045b -r bc2c19e257f1 ChangeLog

--- a/ChangeLog	Fri Jan 08 11:19:53 2010 +0100
+++ b/ChangeLog	Tue Jan 12 00:18:22 2010 +0100
@@ -1,3 +1,19 @@
+2010-01-12  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/powm.c: Change some #if to plain 'if' to avoid fat build
+	problems.
+
+2010-01-11  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV): Accept arguments for size
+	restrictions.
+	* tune/common.c (speed_mpn_sbpi1_div_qr, speed_mpn_dcpi1_div_qr,
+	(speed_mpn_sbpi1_divappr_q, speed_mpn_dcpi1_divappr_q): Pass size
+	limits for SPEED_ROUTINE_MPN_PI1_DIV.
+
+	* tune/speed.c (routine): Allow .r argument for mpn_sbpi1_divappr_q and
+	mpn_dcpi1_divappr_q.
+
 2010-01-08  Torbjorn Granlund  <tege at gmplib.org>
 
 	* Version 5.0.0 released.
diff -r a9d6e23c045b -r bc2c19e257f1 doc/gmp.texi
--- a/doc/gmp.texi	Fri Jan 08 11:19:53 2010 +0100
+++ b/doc/gmp.texi	Tue Jan 12 00:18:22 2010 +0100
@@ -8013,13 +8013,13 @@
 @math{2^k-1} where @m{g=2^{2N'/2^k}, g=2^(2N'/2^k)}.  @math{g} is a
 @m{2^k,2^k'}th root of unity mod @m{2^{N'}+1,2^N'+1}, which produces necessary
 cancellations at the interpolation stage, and it's also a power of 2 so the
-fast fourier transforms used for the evaluation and interpolation do only
+fast Fourier transforms used for the evaluation and interpolation do only
 shifts, adds and negations.
 
 The pointwise multiplications are done modulo @m{2^{N'}+1, 2^N'+1} and either
 recurse into a further FFT or use a plain multiplication (Toom-3, Karatsuba or
 basecase), whichever is optimal at the size @math{N'}.  The interpolation is
-an inverse fast fourier transform.  The resulting set of sums of @m{x_iy_j,
+an inverse fast Fourier transform.  The resulting set of sums of @m{x_iy_j,
 x[i]*y[j]} are added at appropriate offsets to give the final result.
 
 Squaring is the same, but @math{x} is the only input so it's one transform at
@@ -8111,10 +8111,10 @@
 thought of as using @math{-1} as a square root of unity.  If a 4th root of
 unity was available then a further split and speedup would be possible, but no
 such root exists for plain integers.  Going to complex integers with
- at m{i=\sqrt{-1}, i=sqrt(-1)} doesn't help, essentially because in cartesian
+ at m{i=\sqrt{-1}, i=sqrt(-1)} doesn't help, essentially because in Cartesian
 form it takes three real multiplies to do a complex multiply.  The existence
 of @m{2^k,2^k'}th roots of unity in a suitable ring or field lets the fast
-fourier transform keep splitting and get to @m{O(N \log r), O(N*log(r))}.
+Fourier transform keep splitting and get to @m{O(N \log r), O(N*log(r))}.
 
 Floating point FFTs use complex numbers approximating Nth roots of unity.
 Some processors have special support for such FFTs.  But these are not used in
@@ -8313,7 +8313,7 @@
 divisions are saved, or if Q is small then the crossproducts reduce to a small
 number.
 
-The modular inverse used is calculated efficiently by @code{modlimb_invert} in
+The modular inverse used is calculated efficiently by @code{binvert_limb} in
 @file{gmp-impl.h}.  This does four multiplies for a 32-bit limb, or six for a
 64-bit limb.  @file{tune/modlinv.c} has some alternate implementations that
 might suit processors better at bit twiddling than multiplying.
@@ -8377,7 +8377,7 @@
 than a normal division.
 
 The factor of @math{b^n} on @math{r} can be ignored in a GCD when @math{d} is
-odd, hencethe use of @code{mpn_modexact_1_odd} by @code{mpn_gcd_1} and
+odd, hence the use of @code{mpn_modexact_1_odd} by @code{mpn_gcd_1} and
 @code{mpz_kronecker_ui} etc (@pxref{Greatest Common Divisor Algorithms}).
 
 Montgomery's REDC method for modular multiplications uses operands of the form
diff -r a9d6e23c045b -r bc2c19e257f1 mpn/generic/powm.c
--- a/mpn/generic/powm.c	Fri Jan 08 11:19:53 2010 +0100
+++ b/mpn/generic/powm.c	Tue Jan 12 00:18:22 2010 +0100
@@ -300,123 +300,160 @@
 
 
 #if WANT_REDC_2
-#if REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
     {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
-    {
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
-      INNERLOOP;
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
     }
-#else
-  if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+  else
     {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-  else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    {
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-#endif  /* REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD */
-  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    {
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
-      INNERLOOP;
-    }
-  else
-    {
+	  INNERLOOP;
+	}
+      else
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-      INNERLOOP;
+	  INNERLOOP;
+	}
     }
 
 #else  /* WANT_REDC_2 */
-#if REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+
+  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
     {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
-    {
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-      INNERLOOP;
+	  INNERLOOP;
+	}
+      else
+	{
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP;
+	}
     }
-#else
-  if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+  else
     {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-  else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    {
+	  INNERLOOP;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-#endif
-  else
-    {
+	  INNERLOOP;
+	}
+      else
+	{
 #undef MPN_MUL_N
 #undef MPN_SQR
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-      INNERLOOP;
+	  INNERLOOP;
+	}
     }
 #endif  /* WANT_REDC_2 */
 
diff -r a9d6e23c045b -r bc2c19e257f1 tune/common.c
--- a/tune/common.c	Fri Jan 08 11:19:53 2010 +0100
+++ b/tune/common.c	Tue Jan 12 00:18:22 2010 +0100
@@ -765,22 +765,22 @@
 double
 speed_mpn_sbpi1_div_qr (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32);
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0);
 }
 double
 speed_mpn_dcpi1_div_qr (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv);
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3);
 }
 double
 speed_mpn_sbpi1_divappr_q (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32);
+  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0);