[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri May 15 06:15:12 UTC 2015
details: /var/hg/gmp/rev/339e72465081
changeset: 16623:339e72465081
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri May 15 07:52:56 2015 +0200
description:
mpn/generic/invertappr.c (mpn_ni_invertappr): Always USE_MUL_N
details: /var/hg/gmp/rev/fbeca2813724
changeset: 16624:fbeca2813724
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri May 15 07:57:20 2015 +0200
description:
mpn/generic/invertappr.c (mpn_ni_invertappr): Use sublsh1
details: /var/hg/gmp/rev/4986f13cb779
changeset: 16625:4986f13cb779
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri May 15 08:00:27 2015 +0200
description:
mpn/generic/invertappr.c (mpn_ni_invertappr): Reduce memory used (and related updates)
details: /var/hg/gmp/rev/84ee9d5cd0d3
changeset: 16626:84ee9d5cd0d3
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri May 15 08:05:37 2015 +0200
description:
ampn/generic/mu_div*: Pass the scratch parameter to mpn_invertappr
details: /var/hg/gmp/rev/6e856cdf492b
changeset: 16627:6e856cdf492b
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri May 15 08:06:24 2015 +0200
description:
mpn/generic/invertappr.c: Stop supporting scratch==NULL
details: /var/hg/gmp/rev/ff2b31ba754f
changeset: 16628:ff2b31ba754f
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri May 15 08:12:32 2015 +0200
description:
ChangeLog
diffstat:
ChangeLog | 9 ++++
gmp-impl.h | 2 +-
mpn/generic/invertappr.c | 92 ++++++++++++++++++++--------------------------
mpn/generic/mu_div_qr.c | 11 +++--
mpn/generic/mu_divappr_q.c | 11 +++--
tune/tuneup.c | 6 +-
6 files changed, 66 insertions(+), 65 deletions(-)
diffs (truncated from 321 to 300 lines):
diff -r f904b57a39d8 -r ff2b31ba754f ChangeLog
--- a/ChangeLog Tue May 12 21:43:24 2015 +0200
+++ b/ChangeLog Fri May 15 08:12:32 2015 +0200
@@ -1,3 +1,12 @@
+2015-05-15 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/generic/invertappr.c: Reduce memory usage.
+ * gmp-impl.h (mpn_invertappr_itch): Update accordingly.
+ * tune/tuneup.c (tune_invertappr, tune_invert): Update min_size.
+
+ * mpn/generic/mu_div_qr.c: Pass scratch memory to mpn_invertappr.
+ * mpn/generic/mu_divappr_q.c: Likewise.
+
2015-05-12 Felix Janda <felix.janda at posteo.de>
* mpn/powerpc32/elf.m4 (LEA): Adopt to new ABI.
diff -r f904b57a39d8 -r ff2b31ba754f gmp-impl.h
--- a/gmp-impl.h Tue May 12 21:43:24 2015 +0200
+++ b/gmp-impl.h Fri May 15 08:12:32 2015 +0200
@@ -1466,7 +1466,7 @@
__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_invertappr __MPN(invertappr)
__GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
-#define mpn_invertappr_itch(n) (2 * (n) + 4)
+#define mpn_invertappr_itch(n) (2 * (n))
#define mpn_binvert __MPN(binvert)
__GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
diff -r f904b57a39d8 -r ff2b31ba754f mpn/generic/invertappr.c
--- a/mpn/generic/invertappr.c Tue May 12 21:43:24 2015 +0200
+++ b/mpn/generic/invertappr.c Fri May 15 08:12:32 2015 +0200
@@ -40,10 +40,6 @@
GNU Lesser General Public License along with the GNU MP Library. If not,
see https://www.gnu.org/licenses/. */
-/* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
- allocate and pass the scratch to the function. */
-#include <stdlib.h> /* for NULL */
-
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
@@ -140,9 +136,12 @@
Some adaptations were introduced, to allow product mod B^m-1 and return the
value e.
- USE_MUL_N = 1 (default) introduces a correction in such a way that "the
- value of B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
- "2B^n-1"). This correction should not require to modify the proof.
+ We introduced a correction in such a way that "the value of
+ B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
+ "2B^n-1").
+
+ Maximum scratch needed by this branch <= 2*n, but have to fit 3*rn
+ in the scratch, i.e. 3*rn <= 2*n: we require n>4.
We use a wrapped product modulo B^m-1. NOTE: is there any normalisation
problem for the [0] class? It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
@@ -152,22 +151,20 @@
incremented (because A < B^n).
FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
- is allocated apart. */
-
-#define USE_MUL_N 1
+ is allocated apart.
+ */
mp_limb_t
mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
mp_limb_t cy;
- mp_ptr rp;
mp_size_t rn, mn;
mp_size_t sizes[NPOWS], *sizp;
mp_ptr tp;
TMP_DECL;
#define xp scratch
- ASSERT (n > 2);
+ ASSERT (n > 4);
ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
@@ -199,12 +196,7 @@
}
/* Use Newton's iterations to get the desired precision.*/
- /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3 limbs */
- /* Maximum scratch needed by this branch <= 2*n + 4 - USE_MUL_N */
- rp = xp + n + 1 - USE_MUL_N; /* n + 3 limbs */
while (1) {
- mp_limb_t method;
-
n = *--sizp;
/*
v n v
@@ -218,7 +210,7 @@
/* FIXME: We do only need {xp,n+1}*/
mpn_mul (xp, dp - n, n, ip - rn, rn);
mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
- method = CNST_LIMB(1); /* Remember we truncated, Mod B^(n+1) */
+ cy = CNST_LIMB(1); /* Remember we truncated, Mod B^(n+1) */
/* We computed (truncated) {xp,n+1} <- 1.{ip,rn} * 0.{dp,n} */
} else { /* Use B^mn-1 wraparound */
mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
@@ -232,62 +224,61 @@
xp[mn] = CNST_LIMB (1); /* set a limit for DECR_U */
MPN_DECR_U (xp + rn + n - mn, 2 * mn + 1 - rn - n, CNST_LIMB (1) - cy);
MPN_DECR_U (xp, mn, CNST_LIMB (1) - xp[mn]); /* if DECR_U eroded xp[mn] */
- method = CNST_LIMB(0); /* Remember we are working Mod B^mn-1 */
+ cy = CNST_LIMB(0); /* Remember we are working Mod B^mn-1 */
}
if (xp[n] < CNST_LIMB (2)) { /* "positive" residue class */
cy = xp[n]; /* 0 <= cy <= 1 here. */
-#if ! USE_MUL_N
- xp[n] = CNST_LIMB (0);
-#endif
+#if HAVE_NATIVE_mpn_sublsh1_n
+ if (cy++) {
+ if (mpn_cmp (xp, dp - n, n) > 0) {
+ mp_limb_t chk;
+ chk = mpn_sublsh1_n (xp, xp, dp - n, n);
+ ASSERT (chk == xp[n]);
+ ++ cy;
+ } else
+ ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
+ }
+#else /* no mpn_sublsh1_n*/
if (cy++ && !mpn_sub_n (xp, xp, dp - n, n)) {
ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
++cy;
- } /* 1 <= cy <= 2 here. */
+ }
+#endif
+ /* 1 <= cy <= 3 here. */
#if HAVE_NATIVE_mpn_rsblsh1_n
if (mpn_cmp (xp, dp - n, n) > 0) {
- ASSERT_NOCARRY (mpn_rsblsh1_n (xp, xp, dp - n, n));
+ ASSERT_NOCARRY (mpn_rsblsh1_n (xp + n, xp, dp - n, n));
++cy;
} else
- ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
-#else
+ ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
+#else /* no mpn_rsblsh1_n*/
if (mpn_cmp (xp, dp - n, n) > 0) {
ASSERT_NOCARRY (mpn_sub_n (xp, xp, dp - n, n));
++cy;
}
- ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
+ ASSERT_NOCARRY (mpn_sub_nc (xp + 2 * n - rn, dp - rn, xp + n - rn, rn, mpn_cmp (xp, dp - n, n - rn) > 0));
#endif
- MPN_DECR_U(ip - rn, rn, cy); /* 1 <= cy <= 3 here. */
+ MPN_DECR_U(ip - rn, rn, cy); /* 1 <= cy <= 4 here. */
} else { /* "negative" residue class */
- MPN_DECR_U(xp, n + 1, method);
-#if USE_MUL_N
+ ASSERT (xp[n] >= GMP_NUMB_MAX - CNST_LIMB(1));
+ MPN_DECR_U(xp, n + 1, cy);
if (xp[n] != GMP_NUMB_MAX) {
MPN_INCR_U(ip - rn, rn, CNST_LIMB (1));
ASSERT_CARRY (mpn_add_n (xp, xp, dp - n, n));
}
-#endif
- mpn_com (xp + n - rn, xp + n - rn, rn + 1 - USE_MUL_N);
- ASSERT (USE_MUL_N || xp[n] <= CNST_LIMB (1));
+ mpn_com (xp + 2 * n - rn, xp + n - rn, rn);
}
- /* Compute x_ju_j. FIXME:We need {rp+rn,rn}, mulhi? */
-#if USE_MUL_N
- mpn_mul_n (rp, xp + n - rn, ip - rn, rn);
-#else
- rp[2*rn] = 0;
- mpn_mul (rp, xp + n - rn, rn + xp[n], ip - rn, rn);
-#endif
- cy = mpn_add_n (rp + rn, rp + rn, xp + n - rn, 2*rn - n);
- cy = mpn_add_nc (ip - n, rp + 3*rn - n, xp + rn, n - rn, cy);
-#if USE_MUL_N
+ /* Compute x_ju_j. FIXME:We need {xp+rn,rn}, mulhi? */
+ mpn_mul_n (xp, xp + 2 * n - rn, ip - rn, rn);
+ cy = mpn_add_n (xp + rn, xp + rn, xp + 2 * n - rn, 2 * rn - n);
+ cy = mpn_add_nc (ip - n, xp + 3 * rn - n, xp + n + rn, n - rn, cy);
MPN_INCR_U (ip - rn, rn, cy);
-#else
- MPN_INCR_U (ip - rn, rn, cy + rp[2*rn] + xp[n]);
-#endif
if (sizp == sizes) { /* Get out of the cycle */
/* Check for possible carry propagation from below. */
- cy = rp[3*rn - n - 1] > GMP_NUMB_MAX - CNST_LIMB (7); /* Be conservative. */
-/* cy = mpn_add_1 (rp + rn, rp + rn, 2*rn - n, 4); */
+ cy = xp[3 * rn - n - 1] > GMP_NUMB_MAX - CNST_LIMB (7); /* Be conservative. */
+ /* cy = mpn_add_1 (xp + rn, xp + rn, 2*rn - n, 4); */
break;
}
rn = n;
@@ -295,7 +286,7 @@
TMP_FREE;
return cy;
-#undef rp
+#undef xp
}
mp_limb_t
@@ -306,9 +297,6 @@
TMP_MARK;
- if (scratch == NULL)
- scratch = TMP_ALLOC_LIMBS (mpn_invertappr_itch (n));
-
ASSERT (n > 0);
ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
diff -r f904b57a39d8 -r ff2b31ba754f mpn/generic/mu_div_qr.c
--- a/mpn/generic/mu_div_qr.c Tue May 12 21:43:24 2015 +0200
+++ b/mpn/generic/mu_div_qr.c Fri May 15 08:12:32 2015 +0200
@@ -187,7 +187,7 @@
{
MPN_COPY (tp + 1, dp, in);
tp[0] = 1;
- mpn_invertappr (ip, tp, in + 1, NULL);
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
MPN_COPY_INCR (ip, ip + 1, in);
}
else
@@ -197,7 +197,7 @@
MPN_ZERO (ip, in);
else
{
- mpn_invertappr (ip, tp, in + 1, NULL);
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
MPN_COPY_INCR (ip, ip + 1, in);
}
}
@@ -399,11 +399,12 @@
mp_size_t
mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
{
- mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
- mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+ mp_size_t itch_preinv = mpn_preinv_mu_div_qr_itch (nn, dn, in);
+ mp_size_t itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
- return in + itch_local + itch_out;
+ ASSERT (itch_preinv >= itch_invapp);
+ return in + MAX (itch_invapp, itch_preinv);
}
mp_size_t
diff -r f904b57a39d8 -r ff2b31ba754f mpn/generic/mu_divappr_q.c
--- a/mpn/generic/mu_divappr_q.c Tue May 12 21:43:24 2015 +0200
+++ b/mpn/generic/mu_divappr_q.c Fri May 15 08:12:32 2015 +0200
@@ -113,7 +113,7 @@
{
MPN_COPY (tp + 1, dp, in);
tp[0] = 1;
- mpn_invertappr (ip, tp, in + 1, NULL);
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
MPN_COPY_INCR (ip, ip + 1, in);
}
else
@@ -123,7 +123,7 @@
MPN_ZERO (ip, in);
else
{
- mpn_invertappr (ip, tp, in + 1, NULL);
+ mpn_invertappr (ip, tp, in + 1, tp + in + 1);
MPN_COPY_INCR (ip, ip + 1, in);
}
}
@@ -348,7 +348,7 @@
mp_size_t
mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
{
- mp_size_t qn, in, itch_local, itch_out;
+ mp_size_t qn, in, itch_local, itch_out, itch_invapp;
qn = nn - dn;
if (qn + 1 < dn)
@@ -359,5 +359,8 @@
itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
- return in + dn + itch_local + itch_out;
+ itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
+
+ ASSERT (dn + itch_local + itch_out >= itch_invapp);
+ return in + MAX (dn + itch_local + itch_out, itch_invapp);
}
diff -r f904b57a39d8 -r ff2b31ba754f tune/tuneup.c
--- a/tune/tuneup.c Tue May 12 21:43:24 2015 +0200
+++ b/tune/tuneup.c Fri May 15 08:12:32 2015 +0200
@@ -1694,12 +1694,12 @@
param.function = speed_mpn_ni_invertappr;
More information about the gmp-commit
mailing list