[Gmp-commit] /home/hgfiles/gmp: Reduce temporary allocated memory in mpn_fib2...

mercurial at gmplib.org mercurial at gmplib.org
Tue Dec 1 19:26:46 CET 2009


details:   /home/hgfiles/gmp/rev/539c0a302182
changeset: 12952:539c0a302182
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Dec 01 19:26:40 2009 +0100
description:
Reduce temporary allocated memory in mpn_fib2_ui.

diffstat:

 ChangeLog             |   2 +
 mpn/generic/fib2_ui.c |  76 +++++++++++++++++++++-----------------
 2 files changed, 44 insertions(+), 34 deletions(-)

diffs (151 lines):

diff -r f7be00af25be -r 539c0a302182 ChangeLog
--- a/ChangeLog	Tue Dec 01 18:18:17 2009 +0100
+++ b/ChangeLog	Tue Dec 01 19:26:40 2009 +0100
@@ -2,6 +2,8 @@
 
 	* mpn/generic/toom53_mul.c: Removed double computation of vinf.
 
+	* mpn/generic/fib2_ui.c: Reduce the amount of temporary storage.
+
 2009-12-01  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/dcpi1_bdiv_qr.c (mpn_dcpi1_bdiv_qr_n_itch): Add pi1 also
diff -r f7be00af25be -r 539c0a302182 mpn/generic/fib2_ui.c
--- a/mpn/generic/fib2_ui.c	Tue Dec 01 18:18:17 2009 +0100
+++ b/mpn/generic/fib2_ui.c	Tue Dec 01 19:26:40 2009 +0100
@@ -4,7 +4,7 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -50,17 +50,11 @@
    This property of F[4m+3] can be verified by induction on F[4m+3] =
    7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
    identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
-
-   Enhancements:
-
-   If there was an mpn_addlshift, it'd be possible to eliminate the yp
-   temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,
-   fp+=2*(-1)^n, etc.  */
+*/
 
 mp_size_t
 mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
 {
-  mp_ptr         xp, yp;
   mp_size_t      size;
   unsigned long  nfirst, mask;
 
@@ -82,16 +76,15 @@
   if (mask != 1)
     {
       mp_size_t  alloc;
+      mp_ptr        xp;
       TMP_DECL;
 
       TMP_MARK;
       alloc = MPN_FIB2_SIZE (n);
-      TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);
+      xp = TMP_ALLOC_LIMBS (alloc);
 
       do
 	{
-	  mp_limb_t  c;
-
 	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
 	     n&mask upwards.
 
@@ -113,50 +106,65 @@
 	     worth bothering checking for it */
 	  ASSERT (alloc >= 2*size);
 	  mpn_sqr_n (xp, fp,  size);
-	  mpn_sqr_n (yp, f1p, size);
+	  mpn_sqr_n (fp, f1p, size);
 	  size *= 2;
 
 	  /* Shrink if possible.  Since fp was normalized there'll be at
 	     most one high zero on xp (and if there is then there's one on
 	     yp too).  */
-	  ASSERT (xp[size-1] != 0 || yp[size-1] == 0);
+	  ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
 	  size -= (xp[size-1] == 0);
 	  ASSERT (xp[size-1] != 0);  /* only one xp high zero */
 
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
+	  f1p[size] = mpn_add_n (f1p, xp, fp, size);
+
 	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
 	     n&mask is the low bit of our implied k.  */
+	  /* FIXME: enable rsblsh_n if correct */
+#if HAVE_NATIVE_mpn_rsblsh2_n || (HAVE_NATIVE_mpn_rsblsh_n && 0)
 #if HAVE_NATIVE_mpn_rsblsh2_n
-	  fp[size] = mpn_rsblsh2_n (fp, yp, xp, size);
-	  if ((n & mask) == 0) MPN_INCR_U(fp, size + 1, 2);
-	  c = fp[size];
+	  fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
+#else /* HAVE_NATIVE_mpn_rsblsh_n */
+	  fp[size] = mpn_rsblsh_n (fp, fp, xp, size, 2);
+#endif
+	  if ((n & mask) == 0)
+	    MPN_INCR_U(fp, size + 1, 2);	/* possible +2 */
+	  else
+	  {
+	    ASSERT (fp[0] <= 2);
+	    fp[0] -= 2;				/* possible -2 */
+	  }
 #else
-	  c = mpn_lshift (fp, xp, size, 2);
-	  fp[0] |= (n & mask ? 0 : 2);	 /* possible +2 */
-	  c -= mpn_sub_n (fp, fp, yp, size);
-	  fp[size] = c;
+	  {
+	    mp_limb_t  c;
+
+	    c = mpn_lshift (xp, xp, size, 2);
+	    xp[0] |= (n & mask ? 0 : 2);	/* possible +2 */
+	    c -= mpn_sub_n (fp, xp, fp, size);
+	    ASSERT (n & mask ? fp[0] != 0 && fp[0] != 1 : 1);
+	    fp[0] -= (n & mask ? 2 : 0);	/* possible -2 */
+	    fp[size] = c;
+	  }
 #endif
-	  ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);
-	  fp[0] -= (n & mask ? 2 : 0);	 /* possible -2 */
 	  ASSERT (alloc >= size+1);
-	  xp[size] = 0;
-	  yp[size] = 0;
-	  size += (c != 0);
-
-	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.
-	     F[2k-1]<F[2k+1] so no carry out of "size" limbs. */
-	  ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));
+	  size += (fp[size] != 0);
 
 	  /* now n&mask is the new bit of n being considered */
 	  mask >>= 1;
 
 	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
 	     F[2k+1] and F[2k-1].  */
-	  ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));
+	  if (n & mask)
+	    ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
+	  else {
+	    ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
 
-	  /* Can have a high zero after replacing F[2k+1] with F[2k].
-	     f1p will have a high zero if fp does. */
-	  ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
-	  size -= (fp[size-1] == 0);
+	    /* Can have a high zero after replacing F[2k+1] with F[2k].
+	       f1p will have a high zero if fp does. */
+	    ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
+	    size -= (fp[size-1] == 0);
+	  }
 	}
       while (mask != 1);
 


More information about the gmp-commit mailing list