[PATCH] Fibonacci/Lucas tweaks

Fri Feb 13 05:36:23 CET 2026

Here's the results of my poking at the Lucas number evaluation code.

----8<----
Patch 1/3: mpz_lucnum_ui: Use a simpler Lucas formula.
    
The code previously used
L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*{-1)^k
        = 5*F[k-1]*F[k+2]          - 4*{-1)^k
But it's much simpler to ask for the next Fibonacci pair and use
L[2k+1] = 5*F[k]*F[k+1] + (-1)^k
    
The diff is larger that expected because this also swaps the
order of tests in the loop, so the test for the FIB_TABLE comes first.
    
The first hunk shrinks the temporary buffer size slightly, saving
one limb much of the time.  (It's still a bit generous.  Should I
write a dedicated MPN_LUCAS_SIZE() macro?)

diff --git a/mpz/lucnum_ui.c b/mpz/lucnum_ui.c
index 4213bb790..c6c4d074d 100644
--- a/mpz/lucnum_ui.c
+++ b/mpz/lucnum_ui.c
@@ -72,10 +72,10 @@ mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
       return;
     }
 
-  /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
-     since square or mul used below might need an extra limb over the true
-     size */
-  lalloc = MPN_FIB2_SIZE (n) + 2;
+  /* Inner +2 since L[n] = F[n+1] + F[n-1] < F[n+1] + F[n] = F[n+2].
+     Outer +1 since square or mul used below might need an extra limb
+     over the true size.  */
+  lalloc = MPN_FIB2_SIZE (n+2) + 1;
   lp = MPZ_NEWALLOC (ln, lalloc);
 
   TMP_MARK;
@@ -85,45 +85,46 @@ mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
   /* Strip trailing zeros from n, until either an odd number is reached
      where the L[2k+1] formula can be used, or until n fits within the
      FIB_TABLE data.  The table is preferred of course.  */
-  zeros = 0;
-  for (;;)
+  for (zeros = 0;; zeros++)
     {
-      if (n & 1)
+      if (n <= FIB_TABLE_LUCNUM_LIMIT)
 	{
-	  /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
+	  /* L[n] = F[n] + 2F[n-1] */
+	  lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
+	  lsize = 1;
+
+	  TRACE (printf ("  initial small n=%lu\n", n);
+		 mpn_trace ("  l",lp, lsize));
+	  break;
+	}
+      else if (n % 2 == 0)
+        {
+          MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
+          n /= 2;
+	}
+      else
+	{
+	  /* L[2k+1] = 5*F[k]*F[k+1] + (-1)^k */
 
 	  mp_size_t  yalloc, ysize;
 	  mp_ptr     yp;
 
 	  TRACE (printf ("  initial odd n=%lu\n", n));
 
-	  yalloc = MPN_FIB2_SIZE (n/2);
+	  yalloc = MPN_FIB2_SIZE ((n+1)/2);
 	  yp = TMP_ALLOC_LIMBS (yalloc);
 	  ASSERT (xalloc >= yalloc);
 
-	  xsize = mpn_fib2_ui (xp, yp, n/2);
+	  xsize = mpn_fib2_ui (xp, yp, (n+1)/2);
+	  ASSERT (xsize <= yalloc);
 
 	  /* possible high zero on F[k-1] */
-	  ysize = xsize;
-	  ysize -= (yp[ysize-1] == 0);
+	  ysize = xsize - (yp[xsize-1] == 0);
 	  ASSERT (yp[ysize-1] != 0);
 
-	  /* xp = 2*F[k] + F[k-1] */
-#if HAVE_NATIVE_mpn_addlsh1_n
-	  c = mpn_addlsh1_n (xp, yp, xp, xsize);
-#else
-	  c = mpn_lshift (xp, xp, xsize, 1);
-	  c += mpn_add_n (xp, xp, yp, xsize);
-#endif
-	  ASSERT (xalloc >= xsize+1);
-	  xp[xsize] = c;
-	  xsize += (c != 0);
-	  ASSERT (xp[xsize-1] != 0);
-
 	  ASSERT (lalloc >= xsize + ysize);
 	  c = mpn_mul (lp, xp, xsize, yp, ysize);
-	  lsize = xsize + ysize;
-	  lsize -= (c == 0);
+	  lsize = xsize + ysize - (c == 0);
 
 	  /* lp = 5*lp */
 #if HAVE_NATIVE_mpn_addlsh2_n
@@ -137,37 +138,22 @@ mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
 	  lp[lsize] = c;
 	  lsize += (c != 0);
 
-	  /* lp = lp - 4*(-1)^k */
+	  /* lp = lp + (-1)^k */
 	  if (n & 2)
 	    {
-	      /* no overflow, see comments above */
-	      ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
-	      lp[0] += 4;
+	      /* won't go negative */
+	      MPN_DECR_U (lp, lsize, CNST_LIMB(1));
 	    }
 	  else
 	    {
-	      /* won't go negative */
-	      MPN_DECR_U (lp, lsize, CNST_LIMB(4));
+	      /* no overflow, result is never == 0 mod 8 */
+	      ASSERT (lp[0] % 8 != 7);
+	      lp[0]++;
 	    }
 
 	  TRACE (mpn_trace ("  l",lp, lsize));
 	  break;
 	}
-
-      MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
-      zeros++;
-      n /= 2;
-
-      if (n <= FIB_TABLE_LUCNUM_LIMIT)
-	{
-	  /* L[n] = F[n] + 2F[n-1] */
-	  lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
-	  lsize = 1;
-
-	  TRACE (printf ("  initial small n=%lu\n", n);
-		 mpn_trace ("  l",lp, lsize));
-	  break;
-	}
     }
 
   for ( ; zeros != 0; zeros--)

----8<----
Patch 2/3: MPN_FIB2_SIZE: Prepare for tighter approximation.

There are a couple of places where the currenr code would
not work except that MPN_FIB2_SIZE is overgenerous:

In mpz_lucnum2_ui, the allocated result buffer was MPN_FIB2_SIZE(n).
This is large enough for F[n], but not L[n] = F[n+1] + F[n-1].
Instead, alocate MPN_FIB2_SIZE(n+2), which solves the problem.

L[n] = F[n+1] + F[n-1] < F[n+1] + F[n] = F[n+2].

More controversially, this changes the t-fib_ui self-test to not
test MPN_FIB2_SIZE(1).  The upcoming tighter approximation is
wrong in that case (it returns 0 when it should be 1), but this
is never actually used in GMP; single-limb Fibonacci and Lucas
numbers use the FIB_TABLE.

So it's not worth loosening the approximation, or adding special-
case handling for this case.

diff --git a/mpz/lucnum2_ui.c b/mpz/lucnum2_ui.c
index 3cc3d7b75..554aba0b0 100644
--- a/mpz/lucnum2_ui.c
+++ b/mpz/lucnum2_ui.c
@@ -61,7 +61,7 @@ mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n)
     }
 
   TMP_MARK;
-  size = MPN_FIB2_SIZE (n);
+  size = MPN_FIB2_SIZE (n+2);
   f1p = TMP_ALLOC_LIMBS (size);
 
   lp  = MPZ_NEWALLOC (ln,     size+1);
diff --git a/tests/mpz/t-fib_ui.c b/tests/mpz/t-fib_ui.c
index ec9425cdd..76417507f 100644
--- a/tests/mpz/t-fib_ui.c
+++ b/tests/mpz/t-fib_ui.c
@@ -102,15 +102,17 @@ main (int argc, char *argv[])
         }
 
       /* check MPN_FIB2_SIZE seems right, compared to actual size and
-         compared to our float formula */
-      if (MPN_FIB2_SIZE (n) < MPZ_FIB_SIZE_FLOAT (n))
+	 compared to our float formula.  MPN_FIB2_SIZE has a known problem
+	 when n == 1 (it returns 0 rather than 1) which doesn't affect
+	 any real caller, so we accomodate it in the test. */
+      if (MPN_FIB2_SIZE (n) + (n == 1) < MPZ_FIB_SIZE_FLOAT (n))
         {
           printf ("MPN_FIB2_SIZE wrong at n=%lu\n", n);
           printf ("  MPN_FIB2_SIZE       %ld\n", MPN_FIB2_SIZE (n));
           printf ("  MPZ_FIB_SIZE_FLOAT  %ld\n", MPZ_FIB_SIZE_FLOAT (n));
           abort ();
         }
-      if (MPN_FIB2_SIZE (n) < SIZ(want_fn))
+      if (MPN_FIB2_SIZE (n) + (n == 1) < SIZ(want_fn))
         {
           printf ("MPN_FIB2_SIZE wrong at n=%lu\n", n);
           printf ("  MPN_FIB2_SIZE  %ld\n", MPN_FIB2_SIZE (n));

----8<----
Patch 3/3:    MPN_FIB2_SIZE: Tighten approximation.

It's the same basic approximation based on F[n] ~= phi^n/sqrt(5),
so log2(F[n]) ~= n*0.69424191363 - 1.16096404744, but rather
than approximating that as n*23/32 = n*0.71787, this now uses
(n*711 - 1188)/1024 = n*0.6943359375 - 1.16015625.  Two
adjustments to this are made:
1) The return value is in limbs, which is 1+floor(log2/GMP_NUMB_BITS)
2) The fib code internally computes F[2n] via F[2n+1], so always
   set the lsbit of n to leave room for this extra.

The division by GMP_NUMB_BITS is folded into the numerator to ensure
the denominator is a power of 2, so if there are nail bits, the
multiplier will be slightly different (but still correct).

The one huge gotcha about including the negative constatnt term is
that F[1]=1 is estimated with a valus less than 1, so the log2 is
less than 0, which rounds up to 0 limbs.  Rather than fix this at
a cost in accuracy or speed, observe that this never happens (all
one-limb Fibonacci computations are special-cased to a lookup table)
and just document that the estimate is wrong in this case.

If this turns out to be unacceptable, the constant term can be reduced
to equal the negative of the linear term, which will waste 0.46582 bits
but produce the correct result.

diff --git a/gmp-impl.h b/gmp-impl.h
index c347169f1..7cf528ca2 100644
--- a/gmp-impl.h
+++ b/gmp-impl.h
@@ -2078,35 +2078,62 @@ _mpz_newalloc (mpz_ptr z, mp_size_t n)
 #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
 
 
-/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
-   f1p.
+/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp
+   and f1p.  It is WRONG (returns 0 rather than 1) for n == 1, but we
+   never actually use those the mpn functions for that; the mpz wrapper
+   takes small values come from the FIB_TABLE instead.
 
    From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
-   nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
-   number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
+   nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So
+   log_2(F[n]) = n * log_2((1+sqrt(5))/2) - log_2(sqrt(5)
+               = n * 0.6942419            - 1.160964,
+   and with two exceptions the number of bits required is 1 more than the
+   floor of that.
 
-   The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
-   without good floating point.  There's +2 for rounding up, and a further
-   +2 since at the last step x limbs are doubled into a 2x+1 limb region
-   whereas the actual F[2k] value might be only 2x-1 limbs.
+   The exceptions are F[1] = 1 and F[3] = 2, which round up to a power
+   of 2 so the logarithm of the unrounded approximation is too low.
 
-   Note that a division is done first, since on a 32-bit system it's at
-   least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
-   about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
-   whatever a multiply of two 190Mbyte numbers takes.)
+   This can't be an issue anwhere else because only odd n round up
+   (even n round down), and F[2k+1] is always congruent to 1 or 2 mod 4,
+   so no other F[2k+1] can be a power of 2.  (F[n] mod 4 is determined
+   by n mod 6, and only F[6k] == 0 mod 4.)
 
-   Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
-   worked into the multiplier.  */
+   F[3] = 2 can't be a problem because we still compute 1 bit, and since
+   GMP_NUMB_BITS > 1, that rounds up to 1 limb, which is correct.
 
-#define MPN_FIB2_SIZE(n) \
-  ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
+   F[1] = 1, however, *is* a problem.  The approximation returns 0.7236 < 1,
+   which is -0.4667 bits, and rounds up to 0 limbs.  Fortunately, we don't
+   need this macro for that; n <= FIB_TABLE_LIMIT is special-cased in the
+   code.  But it's definitely a gotcha when changing the code.
 
+   We use a rational approximation of (n*711 - 1188)/1024 = n*0.6943359375
+   - 1.16015625 for efficient calculation on CPUs without good floating
+   point.  But we also include the division by GMP_NUMB_BITS, so on a 64-bit
+   system, it's (n*711 - 1188)/65536.  For other limb sizes, the denominator
+   of 65536 is kept, and the numerator is adjusted accordingly.
 
-/* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
-   -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
+   Then we add 1 limb to correct for division rounding down, and we always
+   round n up to odd because our algorithm for computing even Fibonacci
+   numbers F[2k] computes F[2k+1] first.
 
-   FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
-   F[n] + 2*F[n-1] fits in a limb.  */
+   This must be done in two parts to avoid intermediate overflow, since
+   on a 32-bit system it's at least conceivable to go right up to
+   n==ULONG_MAX.  (F[2^32-1] would be about 373Mbytes, plus temporary
+   workspace of about 1.2Gbytes in the Fibonacci computation and whatever
+   a multiply of two 186Mbyte numbers takes.)  */
+#define MPN_FIB2_MULT \
+  ((unsigned)(0.6942419136306173017387902668986 * 65536 / GMP_NUMB_BITS) + 1)
+#define MPN_FIB2_SUB \
+   (unsigned)(1.1609640474436811739351597147447 * 65536 / GMP_NUMB_BITS)
+#define MPN_FIB2_SIZE(n) (1 + \
+   (mp_size_t)( (n)    / 65536 * MPN_FIB2_MULT) + \
+   (mp_size_t)(((n)|1) % 65536 * MPN_FIB2_MULT + 65536-MPN_FIB2_SUB) / 65536)
+
+/* FIB_TABLE(n) returns the 1-limb Fibonacci number F[n].  Must have n in
+   the range -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
+
+   FIB_TABLE_LUCNUM_LIMIT (also in fib_table.h) is the largest n for which
+   L[n] = F[n+1] + F[n-1] fits in a limb.  */
 
 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
 #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])