[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 16 11:14:32 CET 2009
details: /home/hgfiles/gmp/rev/e2c7ba84e61e
changeset: 13086:e2c7ba84e61e
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Dec 16 11:13:29 2009 +0100
description:
Split invert.c into two files for better incremental linking.
details: /home/hgfiles/gmp/rev/348d56624275
changeset: 13087:348d56624275
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Dec 16 11:14:10 2009 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 13 +
configure.in | 2 +-
gmp-impl.h | 6 +-
mpn/generic/invert.c | 311 +--------------------------------------------
mpn/generic/invertappr.c | 289 +++++++++++++++++++++++++++++++++++++++++
mpn/generic/sbpi1_div_qr.c | 2 +-
tune/Makefile.am | 7 +-
7 files changed, 313 insertions(+), 317 deletions(-)
diffs (truncated from 731 to 300 lines):
diff -r 716b4fb9bc78 -r 348d56624275 ChangeLog
--- a/ChangeLog Tue Dec 15 20:53:19 2009 +0100
+++ b/ChangeLog Wed Dec 16 11:14:10 2009 +0100
@@ -1,7 +1,20 @@
+2009-12-16 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/invertappr.c: New file, meat from invert.c.
+ * mpn/generic/invert.c: Leave just mpn_invert.c.
+ * configure.in (gmp_mpn_functions): Add invertappr.
+ * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add invertappr.c.
+ * gmp-impl.h (mpn_invert_itch, mpn_invertappr_itch): New macros.
+
2009-12-15 Torbjorn Granlund <tege at gmplib.org>
* mpn/generic/gcdext_subdiv_step.c: Get an ASSERT right.
+2009-12-15 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/sbpi1_div_qr.c (mpn_sbpi1_div_qr): A very small step
+ towards nail support.
+
2009-12-15 Marco Bodrato <bodrato at mail.dm.unipi.it>
* gmp-impl.h (mpn_ni_invertappr): Added prototype and name-mangling.
diff -r 716b4fb9bc78 -r 348d56624275 configure.in
--- a/configure.in Tue Dec 15 20:53:19 2009 +0100
+++ b/configure.in Wed Dec 16 11:14:10 2009 +0100
@@ -2495,7 +2495,7 @@
toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 \
toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp \
toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts \
- invert binvert mulmod_bnm1 \
+ invertappr invert binvert mulmod_bnm1 \
sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \
dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q \
mu_div_qr mu_divappr_q mu_div_q \
diff -r 716b4fb9bc78 -r 348d56624275 gmp-impl.h
--- a/gmp-impl.h Tue Dec 15 20:53:19 2009 +0100
+++ b/gmp-impl.h Wed Dec 16 11:14:10 2009 +0100
@@ -1169,15 +1169,13 @@
#define mpn_invert __MPN(invert)
__GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-#define mpn_invert_itch __MPN(invert_itch)
-__GMP_DECLSPEC mp_size_t mpn_invert_itch __GMP_PROTO ((mp_size_t));
+#define mpn_invert_itch(n) mpn_invertappr_itch(n)
#define mpn_ni_invertappr __MPN(ni_invertappr)
__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_invertappr __MPN(invertappr)
__GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-#define mpn_invertappr_itch __MPN(invertappr_itch)
-__GMP_DECLSPEC mp_size_t mpn_invertappr_itch __GMP_PROTO ((mp_size_t));
+#define mpn_invertappr_itch(n) (3 * (n) + 2)
#define mpn_binvert __MPN(binvert)
__GMP_DECLSPEC void mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
diff -r 716b4fb9bc78 -r 348d56624275 mpn/generic/invert.c
--- a/mpn/generic/invert.c Tue Dec 15 20:53:19 2009 +0100
+++ b/mpn/generic/invert.c Wed Dec 16 11:14:10 2009 +0100
@@ -1,5 +1,4 @@
-/* Compute {up,n}^(-1). Inversion using ApproximateReciprocal
- algorithm, returning either the correct result, or one less.
+/* invert.c -- Compute floor((B^{2n}-1)/U).
Contributed to the GNU project by Marco Bodrato.
@@ -24,311 +23,14 @@
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define DEBUG 0
-#include <stdlib.h>
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#if DEBUG
-#include <stdio.h>
-int count = 0, cc = 0 ;
-#endif
-
-#ifndef INV_MULMOD_BNM1_THRESHOLD
-#define INV_MULMOD_BNM1_THRESHOLD (5*MULMOD_BNM1_THRESHOLD)
-#endif
-
#ifndef INV_APPR_THRESHOLD
#define INV_APPR_THRESHOLD (INV_NEWTON_THRESHOLD)
#endif
-/* FIXME: The iterative version splits the operand in two slighty
- unbalanced parts, the use of log_2 (or counting the bits)
- underestimate the maximum number of iterations.
-*/
-
-/* This is intended for constant THRESHOLDs only, where the compiler
- can completely fold the result. */
-#define LOG2C(n) \
- (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
- ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
- ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
- ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
-
-#if TUNE_PROGRAM_BUILD
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
-#define MAYBE_dcpi1_divappr 1
-#else
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
-#define MAYBE_dcpi1_divappr \
- (INV_NEWTON_THRESHOLD < 2 * DC_DIVAPPR_Q_THRESHOLD)
-#endif
-
-mp_size_t
-mpn_invertappr_itch (mp_size_t n)
-{
- return 3 * n + 2;
-}
-
-/*
- All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch),
- take the strictly normalised value {dp,n} (i.e. most significant bit
- must be set) as an input, and compute {ip,n}: the approximate
- reciprocal of {dp,n}.
-
- Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
- following conditions are satisfied by the output:
- 0 <= e <= 1;
- {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
- I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
- e=1 means that the result _may_ be one less than expected.
-
- The _bc version returns e=1 most of the times.
- The _ni version should return e=0 most of the time; only 1% of
- possible random input should give e=1.
-
- When the exact approximation is needed, i.e. e=0 in the relation above:
- {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
- the function mpn_invert (ip, dp, n, scratch) should be used instead.
-*/
-
-/* Maximum scratch needed by this branch (at tp): 3*n + 2 */
-static mp_limb_t
-mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr tp)
-{
- mp_ptr xp;
-
- ASSERT (n > 0);
- ASSERT (dp[n-1] & GMP_LIMB_HIGHBIT);
- ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
- ASSERT (! MPN_OVERLAP_P (ip, n, tp, mpn_invertappr_itch(n)));
- ASSERT (! MPN_OVERLAP_P (dp, n, tp, mpn_invertappr_itch(n)));
-
- /* Compute a base value of r limbs. */
- if (n == 1)
- invert_limb (*(ip),*(dp));
- else {
- mp_size_t i;
- xp = tp + n + 2; /* 2 * n limbs */
- for (i = n - 1; i >= 0; i--)
- xp[i] = ~CNST_LIMB(0);
- mpn_com_n (xp + n, dp, n);
- if (n == 2) {
- mpn_tdiv_qr (tp, ip, 0, xp, 2 * n, dp, n);
- MPN_COPY (ip, tp, n);
- } else {
- gmp_pi1_t inv;
- invert_pi1 (inv, dp[n-1], dp[n-2]);
- if (! MAYBE_dcpi1_divappr
- || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
- mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
- else
- mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
- MPN_DECR_U(ip, n, 1);
- return 1;
- }
- }
- return 0;
-}
-
-/*
- mpn_ni_invertappr: computes the approximate reciprocal using Newton's
- iterations (at least one).
-
- Inspired by Algorithm "ApproximateReciprocal", published in
- "Modern Computer Arithmetic" by Richard P. Brent and Paul Zimmermann,
- algorithm 3.5, page 121 in version 0.4 of the book.
-
- Some adaptation were introduced, to allow product mod B^m-1 and
- return the value e.
-
- The iterative structure is copied from T.Granlund's binvert.c.
-
- With USE_MUL_N = 0 and WRAP_MOD_BNM1 = 0, the iteration is conformant
- to the algorithm described in the book.
-
- USE_MUL_N = 1 introduce a correction in such a way that "the value of
- B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
- "2B^n-1"). This correction should not require to modify the proof.
-
- WRAP_MOD_BNM1 = 1 enables the wrapped product modulo B^m-1.
- NOTE: is there any normalisation problem for the [0] class?
- It shouldn't: we compute 2*|A*X_h - B^{n+h}| < B^m-1.
- We may get [0] if and only if we get AX_h = B^{n+h}.
- This can happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1
- i.e. AX_h = B^{n+h} - A, then we get into the "negative" branch,
- where X_h is not incremented (because A < B^n).
-
- FIXME: the scratch for mulmod_bnm1 does not currently fit in the
- scratch, it is allocated apart.
-
- Acknowledgements: Thanks to Paul Zimmermann for his very valuable
- suggestions on all the theoretical aspects.
-*/
-
-#define USE_MUL_N 1
-#define WRAP_MOD_BNM1 1
-
-mp_limb_t
-mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
-{
- mp_limb_t cy;
- mp_ptr xp;
- mp_size_t rn, mn;
- mp_size_t sizes[NPOWS], *sizp;
- mp_ptr tp;
- TMP_DECL;
-#define rp scratch
-
- ASSERT (n > 2);
- ASSERT (dp[n-1] & GMP_LIMB_HIGHBIT);
- ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
- ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
- ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
- /* Compute the computation precisions from highest to lowest, leaving the
- base case size in 'rn'. */
- sizp = sizes;
- rn = n;
- do {
- *sizp = rn;
- rn = ((rn) >> 1) + 1;
- sizp ++;
- } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
-
- /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
- dp += n;
- ip += n;
-
- /* Compute a base value of rn limbs. */
- mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
-
- TMP_MARK;
-
- if (WRAP_MOD_BNM1 && ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
- tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mpn_mulmod_bnm1_next_size (n + 1)));
-
- /* Use Newton's iterations to get the desired precision.*/
-
- /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3 limbs */
- /* Maximum scratch needed by this branch <= 3*n + 2 */
- xp = scratch + n + 3; /* n + rn limbs */
- while (1) {
- mp_limb_t method;
-
- n = *--sizp;
- /*
- v n v
- +----+--+
- ^ rn ^
- */
-
- /* Compute i_jd . */
- if (!WRAP_MOD_BNM1 || BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
- || ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
- /* FIXME: We do only need {xp,n+1}*/
- mpn_mul (xp, dp - n, n, ip - rn, rn);
- mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
- method = 1; /* Remember we used (truncated) product */
- /* We computed cy.{xp,rn+n} <- 1.{ip,rn} * 0.{dp,n} */
- } else { /* Use B^n-1 wraparound */
- mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
- /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
- /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
- /* Add dp*B^rn mod (B^mn-1) */
- ASSERT (n >= mn - rn);
- xp[mn] = 1 + mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
- cy = mpn_add_n (xp, xp, dp - (n - (mn - rn)), n - (mn - rn));
- MPN_INCR_U (xp + n - (mn - rn), mn + 1 - n + (mn - rn), cy);
- ASSERT (n + rn >= mn);
- /* Subtract B^{rn+n} */
- MPN_DECR_U (xp + rn + n - mn, 2*mn + 1 - rn - n, 1);
- if (xp[mn])
- MPN_INCR_U (xp, mn, xp[mn] - 1);
- else
- MPN_DECR_U (xp, mn, 1);
- method = 0; /* Remember we are working Mod B^m-1 */
- }
-#if WRAP_MOD_BNP1 && WANT_FFT && 0
More information about the gmp-commit
mailing list