[Gmp-commit] /home/hgfiles/gmp: Reduce memory usage for {mul, sqr}mod_bnm1.

mercurial at gmplib.org mercurial at gmplib.org
Tue Jan 26 21:48:16 CET 2010


details:   /home/hgfiles/gmp/rev/962b60c834c7
changeset: 13400:962b60c834c7
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Jan 26 21:48:01 2010 +0100
description:
Reduce memory usage for {mul,sqr}mod_bnm1.

diffstat:

 ChangeLog                    |   25 ++++++++
 gmp-impl.h                   |   19 ++++++-
 mpn/generic/binvert.c        |    7 ++-
 mpn/generic/invertappr.c     |    6 +-
 mpn/generic/mu_bdiv_q.c      |   14 ++++-
 mpn/generic/mu_bdiv_qr.c     |   14 ++++-
 mpn/generic/mu_div_qr.c      |    7 ++-
 mpn/generic/mu_divappr_q.c   |    7 ++-
 mpn/generic/mulmod_bnm1.c    |  121 +++++++++++++++++++++++-------------------
 mpn/generic/nussbaumer_mul.c |   15 +++-
 mpn/generic/redc_n.c         |    4 +-
 mpn/generic/sqrmod_bnm1.c    |   93 +++++++++++++++++++++-----------
 tests/mpn/t-mulmod_bnm1.c    |    6 +-
 tests/mpn/t-sqrmod_bnm1.c    |   40 +++++++------
 tune/speed.h                 |    4 +-
 15 files changed, 252 insertions(+), 130 deletions(-)

diffs (truncated from 734 to 300 lines):

diff -r 24e8cb9950c5 -r 962b60c834c7 ChangeLog
--- a/ChangeLog	Tue Jan 26 12:26:32 2010 +0100
+++ b/ChangeLog	Tue Jan 26 21:48:01 2010 +0100
@@ -1,3 +1,28 @@
+2010-01-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): Partial rewrite to
+	reduce memory usage.
+	* mpn/generic/sqrmod_bnm1.c (mpn_sqrmod_bnm1): Likewise.
+	(mpn_sqrmod_bnm1_next_size): New function.
+
+	* gmp-impl.h (mpn_mulmod_bnm1_itch): Accepts 3 parameters now.
+	(mpn_sqrmod_bnm1_itch): New inline function.
+	(mpn_sqrmod_bnm1_next_size): Declaration and mangling.
+	* mpn/generic/nussbaumer_mul.c: Use the new functions.
+
+	* mpn/generic/invertappr.c (mpn_ni_invertappr): Use new syntax for
+	mpn_mulmod_bnm1_itch.	
+	* mpn/generic/mu_divappr_q.c (mpn_mu_divappr_q_itch): Likewise.
+	* mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Likewise.
+	* mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Likewise.
+	* mpn/generic/mu_div_qr.c (mpn_mu_div_qr_itch): Likewise.
+	* mpn/generic/binvert.c (mpn_binvert_itch): Likewise.
+	* tune/speed.h (SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL): Likewise.
+	(SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED): Likewise.
+
+	* tests/mpn/t-sqrmod_bnm1.c, tests/mpn/t-mulmod_bnm1.c: Test
+	reduced memory usage.
+
 2010-01-25  Torbjorn Granlund  <tege at gmplib.org>
 
 	* tune/tuneup.c (INSERT_FFTTAB): New macro, like old insertion code but
diff -r 24e8cb9950c5 -r 962b60c834c7 gmp-impl.h
--- a/gmp-impl.h	Tue Jan 26 12:26:32 2010 +0100
+++ b/gmp-impl.h	Tue Jan 26 21:48:01 2010 +0100
@@ -948,10 +948,27 @@
 __GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
-#define mpn_mulmod_bnm1_itch(n) (2*(n) + 2*GMP_LIMB_BITS +2)
+static inline mp_size_t
+mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
+  mp_size_t n, itch;
+  n = rn >> 1;
+  itch = rn + 4 +
+    (an > n ? (bn > n ? rn : n) : 0);
+  return itch;
+}
 
 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
 __GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
+__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+static inline mp_size_t
+mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
+  mp_size_t n, itch;
+  n = rn >> 1;
+  itch = rn + 3 +
+    (an > n ? an : 0);
+  return itch;
+}
 
 typedef __gmp_randstate_struct *gmp_randstate_ptr;
 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/binvert.c
--- a/mpn/generic/binvert.c	Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/binvert.c	Tue Jan 26 21:48:01 2010 +0100
@@ -52,7 +52,12 @@
 mpn_binvert_itch (mp_size_t n)
 {
   mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local);
+  /* FIXME: check for the correct estimate and remove #if */
+#if 0
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
+#else
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, itch_local, itch_local);
+#endif
   return itch_local + itch_out;
 }
 
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/invertappr.c
--- a/mpn/generic/invertappr.c	Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/invertappr.c	Tue Jan 26 21:48:01 2010 +0100
@@ -190,8 +190,10 @@
   TMP_MARK;
 
   if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
-    tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mpn_mulmod_bnm1_next_size (n + 1)));
-
+    {
+      mn = mpn_mulmod_bnm1_next_size (n + 1);
+      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
+    }
   /* Use Newton's iterations to get the desired precision.*/
 
   /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3  limbs */
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_bdiv_q.c
--- a/mpn/generic/mu_bdiv_q.c	Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_bdiv_q.c	Tue Jan 26 21:48:01 2010 +0100
@@ -234,7 +234,12 @@
       else
 	{
 	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn);
+	  /* FIXME: check for the correct estimate and remove #if */
+#if 1
+	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+#else
+	  itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
 	}
       itch_binvert = mpn_binvert_itch (in);
       itches = dn + tn + itch_out;
@@ -251,7 +256,12 @@
       else
 	{
 	  tn = mpn_mulmod_bnm1_next_size (qn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn);
+	  /* FIXME: check for the correct estimate and remove #if */
+#if 1
+	  itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
+#else
+	  itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
 	}
       itch_binvert = mpn_binvert_itch (in);
       itches = tn + itch_out;
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_bdiv_qr.c
--- a/mpn/generic/mu_bdiv_qr.c	Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_bdiv_qr.c	Tue Jan 26 21:48:01 2010 +0100
@@ -254,7 +254,12 @@
       else
 	{
 	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn);
+	  /* FIXME: check for the correct estimate and remove #if */
+#if 1
+	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+#else
+	  itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
 	}
       itch_binvert = mpn_binvert_itch (in);
       itches = tn + itch_out;
@@ -271,7 +276,12 @@
       else
 	{
 	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn);
+	  /* FIXME: check for the correct estimate and remove #if */
+#if 1
+	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
+#else
+	  itch_out = mpn_mulmod_bnm1_itch (tn, tn, tn);
+#endif
 	}
     }
   itch_binvert = mpn_binvert_itch (in);
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_div_qr.c
--- a/mpn/generic/mu_div_qr.c	Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_div_qr.c	Tue Jan 26 21:48:01 2010 +0100
@@ -403,8 +403,13 @@
 mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 {
   mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local);
   mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+  /* FIXME: check for the correct estimate and remove #if */
+#if 1
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+#else
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, itch_local, itch_local);
+#endif
 
   return in + itch_local + itch_out;
 }
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mu_divappr_q.c
--- a/mpn/generic/mu_divappr_q.c	Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mu_divappr_q.c	Tue Jan 26 21:48:01 2010 +0100
@@ -356,8 +356,13 @@
 mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 {
   mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local);
   mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
+  /* FIXME: check for the correct estimate and remove #if */
+#if 1
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+#else
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, itch_local, itch_local);
+#endif
 
   return in + dn + itch_local + itch_out;
 }
diff -r 24e8cb9950c5 -r 962b60c834c7 mpn/generic/mulmod_bnm1.c
--- a/mpn/generic/mulmod_bnm1.c	Tue Jan 26 12:26:32 2010 +0100
+++ b/mpn/generic/mulmod_bnm1.c	Tue Jan 26 21:48:01 2010 +0100
@@ -82,9 +82,9 @@
  * implies (B^an-1)(B^bn-1) < (B^rn-1) .
  *
  * Requires 0 < bn <= an <= rn and an + bn > rn/2
- * Scratch need: rn + 2 + (need for recursive call OR rn + 2). This gives
+ * Scratch need: rn + (need for recursive call OR rn + 4). This gives
  *
- * S(n) <= rn + 2 + MAX (rn + 2, S(n/2)) <= 2rn + 2 log2 rn + 2
+ * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
  */
 void
 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
@@ -139,40 +139,47 @@
 #define b0 bp
 #define b1 (bp + n)
 
-      /* FIXME: See if the need for temporary storage can be reduced
-       * in case an <= n or bn <= n. This matters for calls from
-       * mpn_nussbaumer_mul, since then bn <= n always and an <= n for
-       * balanced products. */
 #define xp  tp	/* 2n + 2 */
       /* am1  maybe in {xp, n} */
       /* bm1  maybe in {xp + n, n} */
-#define so (tp + 2*n + 2)
-      /* ap1  maybe in {so, n + 1} */
-      /* bp1  maybe in {so + n + 1, n + 1} */
+#define sp1 (tp + 2*n + 2)
+      /* ap1  maybe in {sp1, n + 1} */
+      /* bp1  maybe in {sp1 + n + 1, n + 1} */
 
       {
 	mp_srcptr am1, bm1;
 	mp_size_t anm, bnm;
+	mp_ptr so;
 
-	if( an > n ) {
-	  am1 = xp;
-	  cy = mpn_add (xp, a0, n, a1, an - n);
-	  MPN_INCR_U (xp, n, cy);
-	  anm = n;
-	} else {
-	  am1 = a0;
-	  anm = an;
-	}
-
-	if( bn > n ) {
-	  bm1 = xp + n;
-	  cy = mpn_add (xp + n, b0, n, b1, bn - n);
-	  MPN_INCR_U (xp + n, n, cy);
-	  bnm = n;
-	} else {
-	  bm1 = b0;
-	  bnm = bn;
-	}
+	if (LIKELY (an > n))
+	  {
+	    am1 = xp;
+	    cy = mpn_add (xp, a0, n, a1, an - n);
+	    MPN_INCR_U (xp, n, cy);
+	    anm = n;
+	    if (LIKELY (bn > n))
+	      {
+		bm1 = xp + n;
+		cy = mpn_add (xp + n, b0, n, b1, bn - n);
+		MPN_INCR_U (xp + n, n, cy);
+		bnm = n;
+		so = xp + 2*n;
+	      }
+	    else
+	      {
+		so = xp + n;
+		bm1 = b0;
+		bnm = bn;
+	      }
+	  }
+	else
+	  {
+	    so = xp;
+	    am1 = a0;
+	    anm = an;
+	    bm1 = b0;
+	    bnm = bn;
+	  }
 
 	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
       }
@@ -182,22 +189,22 @@
 	mp_srcptr ap1, bp1;
 	mp_size_t anp, bnp;
 
-	if( an > n ) {
-	  ap1 = so;
-	  cy = mpn_sub (so, a0, n, a1, an - n);
-	  so[n] = 0;
-	  MPN_INCR_U (so, n + 1, cy);
+	if (LIKELY (an > n)) {
+	  ap1 = sp1;
+	  cy = mpn_sub (sp1, a0, n, a1, an - n);
+	  sp1[n] = 0;
+	  MPN_INCR_U (sp1, n + 1, cy);
 	  anp = n + ap1[n];
 	} else {


More information about the gmp-commit mailing list