[Gmp-commit] /home/hgfiles/gmp: Reduce memory usage for {mul, sqr}mod_bnm1.
mercurial at gmplib.org
mercurial at gmplib.org
Tue Jan 26 21:48:16 CET 2010
details: /home/hgfiles/gmp/rev/962b60c834c7
changeset: 13400:962b60c834c7
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue Jan 26 21:48:01 2010 +0100
description:
Reduce memory usage for {mul,sqr}mod_bnm1.
diffstat:
ChangeLog | 25 ++++++++
gmp-impl.h | 19 ++++++-
mpn/generic/binvert.c | 7 ++-
mpn/generic/invertappr.c | 6 +-
mpn/generic/mu_bdiv_q.c | 14 ++++-
mpn/generic/mu_bdiv_qr.c | 14 ++++-
mpn/generic/mu_div_qr.c | 7 ++-
mpn/generic/mu_divappr_q.c | 7 ++-
mpn/generic/mulmod_bnm1.c | 121 +++++++++++++++++++++++-------------------
mpn/generic/nussbaumer_mul.c | 15 +++-
mpn/generic/redc_n.c | 4 +-
mpn/generic/sqrmod_bnm1.c | 93 +++++++++++++++++++++-----------
tests/mpn/t-mulmod_bnm1.c | 6 +-
tests/mpn/t-sqrmod_bnm1.c | 40 +++++++------
tune/speed.h | 4 +-
15 files changed, 252 insertions(+), 130 deletions(-)
diffs (truncated from 734 to 300 lines):
diff -r 24e8cb9950c5 -r 962b60c834c7 ChangeLog
--- a/ChangeLog Tue Jan 26 12:26:32 2010 +0100
+++ b/ChangeLog Tue Jan 26 21:48:01 2010 +0100
@@ -1,3 +1,28 @@
+2010-01-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): Partial rewrite to
+ reduce memory usage.
+ * mpn/generic/sqrmod_bnm1.c (mpn_sqrmod_bnm1): Likewise.
+ (mpn_sqrmod_bnm1_next_size): New function.
+
+ * gmp-impl.h (mpn_mulmod_bnm1_itch): Accepts 3 parameters now.
+ (mpn_sqrmod_bnm1_itch): New inline function.
+ (mpn_sqrmod_bnm1_next_size): Declaration and mangling.
+ * mpn/generic/nussbaumer_mul.c: Use the new functions.
+
+ * mpn/generic/invertappr.c (mpn_ni_invertappr): Use new syntax for
+ mpn_mulmod_bnm1_itch.
+ * mpn/generic/mu_divappr_q.c (mpn_mu_divappr_q_itch): Likewise.
+ * mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Likewise.
+ * mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Likewise.
+ * mpn/generic/mu_div_qr.c (mpn_mu_div_qr_itch): Likewise.
+ * mpn/generic/binvert.c (mpn_binvert_itch): Likewise.
+ * tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL): Likewise.
+ (SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED): Likewise.
+
+ * tests/mpn/t-sqrmod_bnm1.c, tests/mpn/t-mulmod_bnm1.c: Test
+ reduced memory usage.
+
2010-01-25 Torbjorn Granlund <tege at gmplib.org>
* tune/tuneup.c (INSERT_FFTTAB): New macro, like old insertion code but
diff -r 24e8cb9950c5 -r 962b60c834c7 gmp-impl.h
--- a/gmp-impl.h Tue Jan 26 12:26:32 2010 +0100
+++ b/gmp-impl.h Tue Jan 26 21:48:01 2010 +0100
@@ -948,10 +948,27 @@
__GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
-#define mpn_mulmod_bnm1_itch(n) (2*(n) + 2*GMP_LIMB_BITS +2)
+static inline mp_size_t
+mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
+ mp_size_t n, itch;
+ n = rn >> 1;
+ itch = rn + 4 +
+ (an > n ? (bn > n ? rn : n) : 0);
+ return itch;
+}
#define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
__GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
+__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+static inline mp_size_t
+mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
+ mp_size_t n, itch;
+ n = rn >> 1;
+ itch = rn + 3 +
+ (an > n ? an : 0);
+ return itch;
+}
typedef __gmp_randstate_struct *gmp_randstate_ptr;
typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/binvert.c
--- a/mpn/generic/binvert.c Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/binvert.c Tue Jan 26 21:48:01 2010 +0100
@@ -52,7 +52,12 @@
mpn_binvert_itch (mp_size_t n)
{
mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
- mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local);
+ /* FIXME: check for the correct estimate and remove #if */
+#if 0
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
+#else
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, itch_local, itch_local);
+#endif
return itch_local + itch_out;
}
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/invertappr.c
--- a/mpn/generic/invertappr.c Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/invertappr.c Tue Jan 26 21:48:01 2010 +0100
@@ -190,8 +190,10 @@
TMP_MARK;
if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
- tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mpn_mulmod_bnm1_next_size (n + 1)));
-
+ {
+ mn = mpn_mulmod_bnm1_next_size (n + 1);
+ tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
+ }
/* Use Newton's iterations to get the desired precision.*/
/* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3 limbs */
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_bdiv_q.c
--- a/mpn/generic/mu_bdiv_q.c Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_bdiv_q.c Tue Jan 26 21:48:01 2010 +0100
@@ -234,7 +234,12 @@
else
{
tn = mpn_mulmod_bnm1_next_size (dn);
- itch_out = mpn_mulmod_bnm1_itch (tn);
+ /* FIXME: check for the correct estimate and remove #if */
+#if 1
+ itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+#else
+ itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
}
itch_binvert = mpn_binvert_itch (in);
itches = dn + tn + itch_out;
@@ -251,7 +256,12 @@
else
{
tn = mpn_mulmod_bnm1_next_size (qn);
- itch_out = mpn_mulmod_bnm1_itch (tn);
+ /* FIXME: check for the correct estimate and remove #if */
+#if 1
+ itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
+#else
+ itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
}
itch_binvert = mpn_binvert_itch (in);
itches = tn + itch_out;
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_bdiv_qr.c
--- a/mpn/generic/mu_bdiv_qr.c Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_bdiv_qr.c Tue Jan 26 21:48:01 2010 +0100
@@ -254,7 +254,12 @@
else
{
tn = mpn_mulmod_bnm1_next_size (dn);
- itch_out = mpn_mulmod_bnm1_itch (tn);
+ /* FIXME: check for the correct estimate and remove #if */
+#if 1
+ itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+#else
+ itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
}
itch_binvert = mpn_binvert_itch (in);
itches = tn + itch_out;
@@ -271,7 +276,12 @@
else
{
tn = mpn_mulmod_bnm1_next_size (dn);
- itch_out = mpn_mulmod_bnm1_itch (tn);
+ /* FIXME: check for the correct estimate and remove #if */
+#if 1
+ itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+#else
+ itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
}
}
itch_binvert = mpn_binvert_itch (in);
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_div_qr.c
--- a/mpn/generic/mu_div_qr.c Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_div_qr.c Tue Jan 26 21:48:01 2010 +0100
@@ -403,8 +403,13 @@
mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
{
mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
- mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local);
mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+ /* FIXME: check for the correct estimate and remove #if */
+#if 1
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+#else
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, itch_local, itch_local);
+#endif
return in + itch_local + itch_out;
}
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_divappr_q.c
--- a/mpn/generic/mu_divappr_q.c Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_divappr_q.c Tue Jan 26 21:48:01 2010 +0100
@@ -356,8 +356,13 @@
mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
{
mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
- mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local);
mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+ /* FIXME: check for the correct estimate and remove #if */
+#if 1
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+#else
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, itch_local, itch_local);
+#endif
return in + dn + itch_local + itch_out;
}
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mulmod_bnm1.c
--- a/mpn/generic/mulmod_bnm1.c Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mulmod_bnm1.c Tue Jan 26 21:48:01 2010 +0100
@@ -82,9 +82,9 @@
* implies (B^an-1)(B^bn-1) < (B^rn-1) .
*
* Requires 0 < bn <= an <= rn and an + bn > rn/2
- * Scratch need: rn + 2 + (need for recursive call OR rn + 2). This gives
+ * Scratch need: rn + (need for recursive call OR rn + 4). This gives
*
- * S(n) <= rn + 2 + MAX (rn + 2, S(n/2)) <= 2rn + 2 log2 rn + 2
+ * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
*/
void
mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
@@ -139,40 +139,47 @@
#define b0 bp
#define b1 (bp + n)
- /* FIXME: See if the need for temporary storage can be reduced
- * in case an <= n or bn <= n. This matters for calls from
- * mpn_nussbaumer_mul, since then bn <= n always and an <= n for
- * balanced products. */
#define xp tp /* 2n + 2 */
/* am1 maybe in {xp, n} */
/* bm1 maybe in {xp + n, n} */
-#define so (tp + 2*n + 2)
- /* ap1 maybe in {so, n + 1} */
- /* bp1 maybe in {so + n + 1, n + 1} */
+#define sp1 (tp + 2*n + 2)
+ /* ap1 maybe in {sp1, n + 1} */
+ /* bp1 maybe in {sp1 + n + 1, n + 1} */
{
mp_srcptr am1, bm1;
mp_size_t anm, bnm;
+ mp_ptr so;
- if( an > n ) {
- am1 = xp;
- cy = mpn_add (xp, a0, n, a1, an - n);
- MPN_INCR_U (xp, n, cy);
- anm = n;
- } else {
- am1 = a0;
- anm = an;
- }
-
- if( bn > n ) {
- bm1 = xp + n;
- cy = mpn_add (xp + n, b0, n, b1, bn - n);
- MPN_INCR_U (xp + n, n, cy);
- bnm = n;
- } else {
- bm1 = b0;
- bnm = bn;
- }
+ if (LIKELY (an > n))
+ {
+ am1 = xp;
+ cy = mpn_add (xp, a0, n, a1, an - n);
+ MPN_INCR_U (xp, n, cy);
+ anm = n;
+ if (LIKELY (bn > n))
+ {
+ bm1 = xp + n;
+ cy = mpn_add (xp + n, b0, n, b1, bn - n);
+ MPN_INCR_U (xp + n, n, cy);
+ bnm = n;
+ so = xp + 2*n;
+ }
+ else
+ {
+ so = xp + n;
+ bm1 = b0;
+ bnm = bn;
+ }
+ }
+ else
+ {
+ so = xp;
+ am1 = a0;
+ anm = an;
+ bm1 = b0;
+ bnm = bn;
+ }
mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
}
@@ -182,22 +189,22 @@
mp_srcptr ap1, bp1;
mp_size_t anp, bnp;
- if( an > n ) {
- ap1 = so;
- cy = mpn_sub (so, a0, n, a1, an - n);
- so[n] = 0;
- MPN_INCR_U (so, n + 1, cy);
+ if (LIKELY (an > n)) {
+ ap1 = sp1;
+ cy = mpn_sub (sp1, a0, n, a1, an - n);
+ sp1[n] = 0;
+ MPN_INCR_U (sp1, n + 1, cy);
anp = n + ap1[n];
} else {
More information about the gmp-commit
mailing list