[Gmp-commit] /var/hg/gmp: mpn/generic/mul_fft.c: Remove an unused branch and ...

mercurial at gmplib.org mercurial at gmplib.org
Sun Mar 6 15:40:58 CET 2022


details:   /var/hg/gmp/rev/c4f5f7fe466b
changeset: 18320:c4f5f7fe466b
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sun Mar 06 15:38:39 2022 +0100
description:
mpn/generic/mul_fft.c: Remove an unused branch and slightly improve carry propagation

diffstat:

 mpn/generic/mul_fft.c |  58 +++++++++++++++++++++++++++++---------------------
 1 files changed, 34 insertions(+), 24 deletions(-)

diffs (160 lines):

diff -r 6ff25532f2a4 -r c4f5f7fe466b mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c	Fri Mar 04 10:03:38 2022 +0100
+++ b/mpn/generic/mul_fft.c	Sun Mar 06 15:38:39 2022 +0100
@@ -237,14 +237,14 @@
 
       r[n] = 0;
       /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
-      cc++;
-      mpn_incr_u (r, cc);
+      ++cc;
+      MPN_INCR_U (r, n + 1, cc);
 
-      rd++;
+      ++rd;
       /* rd might overflow when sh=GMP_NUMB_BITS-1 */
-      cc = (rd == 0) ? 1 : rd;
+      cc = rd + (rd == 0);
       r = r + m + (rd == 0);
-      mpn_incr_u (r, cc);
+      MPN_INCR_U (r, n + 1 - m - (rd == 0), cc);
     }
   else
     {
@@ -284,7 +284,10 @@
       r[n] = -mpn_sub_1 (r + m, r + m, n - m, cc);
       r[n] -= mpn_sub_1 (r + m, r + m, n - m, rd);
       if (r[n] & GMP_LIMB_HIGHBIT)
-	r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
+	{
+	  r[n] = 0;
+	  MPN_INCR_U (r, n + 1, CNST_LIMB(1));
+	}
     }
 }
 
@@ -560,7 +563,9 @@
 	      */
 	      tp[0] += cc;
 	    }
-	  a[n] = mpn_sub_n (a, tp, tpn, n) && mpn_add_1 (a, a, n, CNST_LIMB(1));
+	  cc = mpn_sub_n (a, tp, tpn, n);
+	  a[n] = 0;
+	  MPN_INCR_U (a, n + 1, cc);
 	}
     }
   TMP_FREE;
@@ -657,8 +662,7 @@
     }
 
   /* remains to subtract {ap+n, l} from {rp, n+1} */
-  cc = mpn_sub_n (rp, rp, ap + n, l);
-  rpn -= mpn_sub_1 (rp + l, rp + l, n - l, cc);
+  rpn -= mpn_sub (rp, rp, n, ap + n, l);
   if (rpn < 0) /* necessarily rpn = -1 */
     rpn = mpn_add_1 (rp, rp, n, CNST_LIMB(1));
   return rpn;
@@ -684,12 +688,18 @@
   if (nl > Kl) /* normalize {n, nl} mod 2^(Kl*GMP_NUMB_BITS)+1 */
     {
       mp_size_t dif = nl - Kl;
-      mp_limb_signed_t cy;
 
       tmp = TMP_BALLOC_LIMBS(Kl + 1);
+      tmp[Kl] = 0;
 
-      if (dif > Kl)
+#if ! WANT_OLD_FFT_FULL
+      ASSERT_ALWAYS (dif <= Kl);
+#else
+      /* The comment "We must have nl <= 2*K*l." says that
+	 ((dif = nl - Kl) > Kl) should never happen. */
+      if (UNLIKELY (dif > Kl))
 	{
+	  mp_limb_signed_t cy;
 	  int subp = 0;
 
 	  cy = mpn_sub_n (tmp, n, n + Kl, Kl);
@@ -713,16 +723,20 @@
 	  else
 	    cy -= mpn_add (tmp, tmp, Kl, n, dif);
 	  if (cy >= 0)
-	    cy = mpn_add_1 (tmp, tmp, Kl, cy);
+	    MPN_INCR_U (tmp, Kl + 1, cy);
 	  else
-	    cy = mpn_sub_1 (tmp, tmp, Kl, -cy);
+	    {
+	      tmp[Kl] = 1;
+	      MPN_DECR_U (tmp, Kl + 1, -cy - 1);
+	    }
 	}
       else /* dif <= Kl, i.e. nl <= 2 * Kl */
+#endif
 	{
+	  mp_limb_t cy;
 	  cy = mpn_sub (tmp, n, Kl, n + Kl, dif);
-	  cy = mpn_add_1 (tmp, tmp, Kl, cy);
+	  MPN_INCR_U (tmp, Kl + 1, cy);
 	}
-      tmp[Kl] = cy;
       nl = Kl + 1;
       n = tmp;
     }
@@ -755,7 +769,7 @@
 
 static mp_limb_t
 mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
-		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B,
+		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr unusedA, mp_ptr B,
 		      mp_size_t nprime, mp_size_t l, mp_size_t Mp,
 		      int **fft_l, mp_ptr T, int sqr)
 {
@@ -797,9 +811,7 @@
 
       j = (K - i) & (K - 1);
 
-      if (mpn_add_n (n, n, Bp[j], nprime + 1))
-	cc += mpn_add_1 (n + nprime + 1, n + nprime + 1,
-			  pla - sh - nprime - 1, CNST_LIMB(1));
+      cc += mpn_add (n, n, pla - sh, Bp[j], nprime + 1);
       T[2 * l] = i + 1; /* T = (i + 1)*2^(2*M) */
       if (mpn_cmp (Bp[j], T, nprime + 1) > 0)
 	{ /* subtract 2^N'+1 */
@@ -825,8 +837,7 @@
 	}
       else
 	{
-	  cc = mpn_sub_1 (p + pla - pl, p + pla - pl, pl, cc);
-	  ASSERT (cc == 0);
+	  MPN_DECR_U (p + pla - pl, pl, cc);
 	}
     }
   else
@@ -918,18 +929,17 @@
 
   A = TMP_BALLOC_LIMBS (K * (nprime + 1));
   Ap = TMP_BALLOC_MP_PTRS (K);
+  Bp = TMP_BALLOC_MP_PTRS (K);
   mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
   if (sqr)
     {
       mp_size_t pla;
       pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
       B = TMP_BALLOC_LIMBS (pla);
-      Bp = TMP_BALLOC_MP_PTRS (K);
     }
   else
     {
       B = TMP_BALLOC_LIMBS (K * (nprime + 1));
-      Bp = TMP_BALLOC_MP_PTRS (K);
       mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
     }
   h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
@@ -1039,6 +1049,6 @@
   ASSERT_MPN_ZERO_P (pad_op + pl - pl3, pl2 + pl3 - pl);
   __GMP_FREE_FUNC_LIMBS (pad_op, pl2);
   /* since the final result has at most pl limbs, no carry out below */
-  mpn_add_1 (op + pl2, op + pl2, pl - pl2, (mp_limb_t) c2);
+  MPN_INCR_U (op + pl2, pl - pl2, (mp_limb_t) c2);
 }
 #endif


More information about the gmp-commit mailing list