[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

Wed Dec 16 11:14:32 CET 2009

details:   /home/hgfiles/gmp/rev/e2c7ba84e61e
changeset: 13086:e2c7ba84e61e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Dec 16 11:13:29 2009 +0100
description:
Split invert.c into two files for better incremental linking.

details:   /home/hgfiles/gmp/rev/348d56624275
changeset: 13087:348d56624275
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Dec 16 11:14:10 2009 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                  |   13 +
 configure.in               |    2 +-
 gmp-impl.h                 |    6 +-
 mpn/generic/invert.c       |  311 +--------------------------------------------
 mpn/generic/invertappr.c   |  289 +++++++++++++++++++++++++++++++++++++++++
 mpn/generic/sbpi1_div_qr.c |    2 +-
 tune/Makefile.am           |    7 +-
 7 files changed, 313 insertions(+), 317 deletions(-)

diffs (truncated from 731 to 300 lines):

diff -r 716b4fb9bc78 -r 348d56624275 ChangeLog

--- a/ChangeLog	Tue Dec 15 20:53:19 2009 +0100
+++ b/ChangeLog	Wed Dec 16 11:14:10 2009 +0100
@@ -1,7 +1,20 @@
+2009-12-16  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/invertappr.c: New file, meat from invert.c.
+	* mpn/generic/invert.c: Leave just mpn_invert.c.
+	* configure.in (gmp_mpn_functions): Add invertappr.
+	* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add invertappr.c.
+	* gmp-impl.h (mpn_invert_itch, mpn_invertappr_itch): New macros.
+
 2009-12-15  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/gcdext_subdiv_step.c: Get an ASSERT right.
 
+2009-12-15  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/generic/sbpi1_div_qr.c (mpn_sbpi1_div_qr): A very small step
+	towards nail support.
+
 2009-12-15  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* gmp-impl.h (mpn_ni_invertappr): Added prototype and name-mangling.
diff -r 716b4fb9bc78 -r 348d56624275 configure.in
--- a/configure.in	Tue Dec 15 20:53:19 2009 +0100
+++ b/configure.in	Wed Dec 16 11:14:10 2009 +0100
@@ -2495,7 +2495,7 @@
   toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 				   \
   toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp	   			   \
   toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts	   \
-  invert binvert mulmod_bnm1						   \
+  invertappr invert binvert mulmod_bnm1					   \
   sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q				   \
   dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q				   \
   mu_div_qr mu_divappr_q mu_div_q					   \
diff -r 716b4fb9bc78 -r 348d56624275 gmp-impl.h
--- a/gmp-impl.h	Tue Dec 15 20:53:19 2009 +0100
+++ b/gmp-impl.h	Wed Dec 16 11:14:10 2009 +0100
@@ -1169,15 +1169,13 @@
 
 #define   mpn_invert __MPN(invert)
 __GMP_DECLSPEC void      mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-#define   mpn_invert_itch __MPN(invert_itch)
-__GMP_DECLSPEC mp_size_t mpn_invert_itch __GMP_PROTO ((mp_size_t));
+#define mpn_invert_itch(n)  mpn_invertappr_itch(n)
 
 #define   mpn_ni_invertappr __MPN(ni_invertappr)
 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 #define   mpn_invertappr __MPN(invertappr)
 __GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-#define   mpn_invertappr_itch __MPN(invertappr_itch)
-__GMP_DECLSPEC mp_size_t mpn_invertappr_itch __GMP_PROTO ((mp_size_t));
+#define mpn_invertappr_itch(n)  (3 * (n) + 2)
 
 #define   mpn_binvert __MPN(binvert)
 __GMP_DECLSPEC void      mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
diff -r 716b4fb9bc78 -r 348d56624275 mpn/generic/invert.c
--- a/mpn/generic/invert.c	Tue Dec 15 20:53:19 2009 +0100
+++ b/mpn/generic/invert.c	Wed Dec 16 11:14:10 2009 +0100
@@ -1,5 +1,4 @@
-/* Compute {up,n}^(-1). Inversion using ApproximateReciprocal
-   algorithm, returning either the correct result, or one less.
+/* invert.c -- Compute floor((B^{2n}-1)/U).
 
    Contributed to the GNU project by Marco Bodrato.
 
@@ -24,311 +23,14 @@
 You should have received a copy of the GNU Lesser General Public License
 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define DEBUG 0
-#include <stdlib.h>
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#if DEBUG
-#include <stdio.h>
-int count = 0, cc = 0 ;
-#endif
-
-#ifndef INV_MULMOD_BNM1_THRESHOLD
-#define INV_MULMOD_BNM1_THRESHOLD (5*MULMOD_BNM1_THRESHOLD)
-#endif
-
 #ifndef INV_APPR_THRESHOLD
 #define INV_APPR_THRESHOLD (INV_NEWTON_THRESHOLD)
 #endif
 
-/* FIXME: The iterative version splits the operand in two slighty
-   unbalanced parts, the use of log_2 (or counting the bits)
-   underestimate the maximum number of iterations.
-*/
-
-/* This is intended for constant THRESHOLDs only, where the compiler
-   can completely fold the result.  */
-#define LOG2C(n) \
- (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
-  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
-  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
-  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
-
-#if TUNE_PROGRAM_BUILD
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
-#define MAYBE_dcpi1_divappr   1
-#else
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
-#define MAYBE_dcpi1_divappr \
-  (INV_NEWTON_THRESHOLD < 2 * DC_DIVAPPR_Q_THRESHOLD)
-#endif
-
-mp_size_t
-mpn_invertappr_itch (mp_size_t n)
-{
-  return 3 * n + 2;
-}
-
-/*
- All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch),
- take the strictly normalised value {dp,n} (i.e. most significant bit
- must be set) as an input, and compute {ip,n}: the approximate
- reciprocal of {dp,n}.
-
- Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
- following conditions are satisfied by the output:
-   0 <= e <= 1;
-   {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
- I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
-      e=1 means that the result _may_ be one less than expected.
-
- The _bc version returns e=1 most of the times.
- The _ni version should return e=0 most of the time; only 1% of
- possible random input should give e=1.
-
- When the exact approximation is needed, i.e. e=0 in the relation above:
-   {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
- the function mpn_invert (ip, dp, n, scratch) should be used instead.
-*/
-
-/* Maximum scratch needed by this branch (at tp): 3*n + 2 */
-static mp_limb_t
-mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr tp)
-{
-  mp_ptr xp;
-
-  ASSERT (n > 0);
-  ASSERT (dp[n-1] & GMP_LIMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, tp, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, tp, mpn_invertappr_itch(n)));
-
-  /* Compute a base value of r limbs. */
-  if (n == 1)
-    invert_limb (*(ip),*(dp));
-  else {
-    mp_size_t i;
-    xp = tp + n + 2;				/* 2 * n limbs */
-    for (i = n - 1; i >= 0; i--)
-      xp[i] = ~CNST_LIMB(0);
-    mpn_com_n (xp + n, dp, n);
-    if (n == 2) {
-      mpn_tdiv_qr (tp, ip, 0, xp, 2 * n, dp, n);
-      MPN_COPY (ip, tp, n);
-    } else {
-      gmp_pi1_t inv;
-      invert_pi1 (inv, dp[n-1], dp[n-2]);
-      if (! MAYBE_dcpi1_divappr
-	  || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
-	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
-      else
-	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
-      MPN_DECR_U(ip, n, 1);
-      return 1;
-    }
-  }
-  return 0;
-}
-
-/*
- mpn_ni_invertappr: computes the approximate reciprocal using Newton's
- iterations (at least one).
-
- Inspired by Algorithm "ApproximateReciprocal", published in
- "Modern Computer Arithmetic" by Richard P. Brent and Paul Zimmermann,
- algorithm 3.5, page 121 in version 0.4 of the book.
-
- Some adaptation were introduced, to allow product mod B^m-1 and
- return the value e.
-
- The iterative structure is copied from T.Granlund's binvert.c.
-
- With USE_MUL_N = 0 and WRAP_MOD_BNM1 = 0, the iteration is conformant
- to the algorithm described in the book.
- 
- USE_MUL_N = 1 introduce a correction in such a way that "the value of
- B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
- "2B^n-1"). This correction should not require to modify the proof.
-
- WRAP_MOD_BNM1 = 1 enables the wrapped product modulo B^m-1.
- NOTE: is there any normalisation problem for the [0] class?
- It shouldn't: we compute 2*|A*X_h - B^{n+h}| < B^m-1.
- We may get [0] if and only if we get AX_h = B^{n+h}.
- This can happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1
- i.e. AX_h = B^{n+h} - A, then we get into the "negative" branch,
- where X_h is not incremented (because A < B^n).
-
- FIXME: the scratch for mulmod_bnm1 does not currently fit in the
- scratch, it is allocated apart.
-
- Acknowledgements: Thanks to Paul Zimmermann for his very valuable
- suggestions on all the theoretical aspects.
-*/
-
-#define USE_MUL_N 1
-#define WRAP_MOD_BNM1 1
-
-mp_limb_t
-mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
-{
-  mp_limb_t cy;
-  mp_ptr xp;
-  mp_size_t rn, mn;
-  mp_size_t sizes[NPOWS], *sizp;
-  mp_ptr tp;
-  TMP_DECL;
-#define rp scratch
-
-  ASSERT (n > 2);
-  ASSERT (dp[n-1] & GMP_LIMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
-  /* Compute the computation precisions from highest to lowest, leaving the
-     base case size in 'rn'.  */
-  sizp = sizes;
-  rn = n;
-  do {
-    *sizp = rn;
-    rn = ((rn) >> 1) + 1;
-    sizp ++;
-  } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
-
-  /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
-  dp += n;
-  ip += n;
-
-  /* Compute a base value of rn limbs. */
-  mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
-
-  TMP_MARK;
-
-  if (WRAP_MOD_BNM1 && ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)) 
-    tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mpn_mulmod_bnm1_next_size (n + 1)));
-
-  /* Use Newton's iterations to get the desired precision.*/
-
-  /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3  limbs */
-  /* Maximum scratch needed by this branch <= 3*n + 2	*/
-  xp = scratch + n + 3;				/*  n + rn limbs */
-  while (1) {
-    mp_limb_t method;
-
-    n = *--sizp;
-    /*
-      v    n  v
-      +----+--+
-      ^ rn ^
-    */
-
-    /* Compute i_jd . */
-    if (!WRAP_MOD_BNM1 || BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
-	|| ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
-      /* FIXME: We do only need {xp,n+1}*/
-      mpn_mul (xp, dp - n, n, ip - rn, rn);
-      mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
-      method = 1; /* Remember we used (truncated) product */
-      /* We computed cy.{xp,rn+n} <- 1.{ip,rn} * 0.{dp,n} */
-    } else { /* Use B^n-1 wraparound */
-      mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
-      /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
-      /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
-      /* Add dp*B^rn mod (B^mn-1) */
-      ASSERT (n >= mn - rn);
-      xp[mn] = 1 + mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
-      cy = mpn_add_n (xp, xp, dp - (n - (mn - rn)), n - (mn - rn));
-      MPN_INCR_U (xp + n - (mn - rn), mn + 1 - n + (mn - rn), cy);
-      ASSERT (n + rn >=  mn);
-      /* Subtract B^{rn+n}	*/
-      MPN_DECR_U (xp + rn + n - mn, 2*mn + 1 - rn - n, 1);
-      if (xp[mn])
-	MPN_INCR_U (xp, mn, xp[mn] - 1);
-      else
-	MPN_DECR_U (xp, mn, 1);
-      method = 0; /* Remember we are working Mod B^m-1 */
-    }
-#if WRAP_MOD_BNP1 && WANT_FFT && 0