[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Jun 21 16:52:00 CEST 2026


details:   /var/hg/gmp/rev/d12777c7a6cf
changeset: 18519:d12777c7a6cf
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sat Jun 20 09:40:20 2026 +0200
description:
mpn/generic/bsqrtinv.c: Use the specialized step a little bit more often

details:   /var/hg/gmp/rev/4624f5b6682b
changeset: 18520:4624f5b6682b
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sun Jun 21 08:58:33 2026 +0200
description:
mpn/generic/bsqrtinv.c: ASSERTs and indentation

details:   /var/hg/gmp/rev/ad4ec26348e7
changeset: 18521:ad4ec26348e7
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sun Jun 21 16:42:48 2026 +0200
description:
mpn/generic/bsqrtinv.c: Yet another special case.

details:   /var/hg/gmp/rev/afed04e71f87
changeset: 18522:afed04e71f87
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sun Jun 21 16:51:47 2026 +0200
description:
mpn/generic/bsqrtinv.c: Rearrange branches and indent.

diffstat:

 mpn/generic/bsqrtinv.c |  116 ++++++++++++++++++++++++++++++------------------
 1 files changed, 73 insertions(+), 43 deletions(-)

diffs (154 lines):

diff -r 0d88f51aeda5 -r afed04e71f87 mpn/generic/bsqrtinv.c
--- a/mpn/generic/bsqrtinv.c	Fri Jun 19 08:59:38 2026 +0200
+++ b/mpn/generic/bsqrtinv.c	Sun Jun 21 16:51:47 2026 +0200
@@ -80,6 +80,9 @@
 {
   ASSERT (bnb > 0);
 
+#ifndef BSQRTINV_RP_NOT_ZEROED
+  ASSERT (mpn_zero_p (rp, 1 + bnb / GMP_NUMB_BITS));
+#endif
   if (bnb == 1)
     {
       if ((yp[0] & 3) != 1)
@@ -147,16 +150,17 @@
 #endif
 
       i = 0;
-      for (; bnb > GMP_NUMB_BITS; bnb = (bnb + 2) >> 1)
+      for (; bnb > GMP_NUMB_BITS + 1; bnb = (bnb + 2) >> 1)
 	order[i++] = bnb;
       if (bnb > precomputed_bits) {
-	if (bnb == GMP_NUMB_BITS) {
+	if (bnb >= GMP_NUMB_BITS) {
 	  mp_limb_t r0h = r0 >> 1;
-	  mp_limb_t r0sqm1 = r0h * (r0h + 1);
+	  /* We could gain the third bit with (r0h|1)*((r0h+1)>>1) */
+	  mp_limb_t r0sqm1 = r0h * (r0h + 1); /* r0*r0 >> 2 */
 	  mp_limb_t yh = (y0 >> 2) + (yp[1] << (GMP_NUMB_BITS - 2));
-	  mp_limb_t yrrm1d4 = y0 * r0sqm1 + yh;
+	  mp_limb_t yrrm1d4 = y0 * r0sqm1 + yh; /* (r0*r0*y0-1) >> 2 */
 	  ASSERT ((yrrm1d4 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits + 1))) == 0);
-	  mp_limb_t rt = r0 * yrrm1d4 - r0h - 1;
+	  mp_limb_t rt = r0 * yrrm1d4 - r0h - 1;  /* (r*(r0*r0*y0-1)/2 - r) >>1 */
 	  r0 = (rt << 1) ^ ((rt & GMP_LIMB_HIGHBIT) ? GMP_NUMB_MAX : CNST_LIMB(1));
 	} else {
 	  t0 = r0 * r0 * y0 >> 1;
@@ -164,53 +168,79 @@
 	  ASSERT ((t0 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits))) == 0);
 	}
       }
+
       if (i) {
 	mp_limb_t t4, t3, t2, t1, r1;
+
+	umul_ppmm (t1, t0, r0, r0); /* [t1,t0] <- r^2 */
+	if (bnb <= GMP_NUMB_BITS) {
+	  umul_ppmm (t3, t2, y0, t0);
+	  t3 += y0 * t1 + yp[1] * t0;
+	  t2 = ((t2 >> 1) | (t3 << (GMP_NUMB_BITS - 1))) & GMP_NUMB_MAX;
+	  t3 = t3 >> 1 ; /* [t3,t2] <- (r^2 y - 1) / 2 */
+
+	  /* [r1,t4] <- r (r^2 y - 1) / 2 */
+	  umul_ppmm (r1, t4, r0, t2);
+	  r1 += r0 * t3;
+
+	  /* r (r^2 y - 1) / 2 - r */
+	  sub_ddmmss(rp[1], rp[0], r1, t4, 0, r0);
+	} else {
+	  t0 = (t0 >> 2) | (t1 << (GMP_NUMB_BITS -2));
+	  t1 = (t1 >> 2); /* [t1,t0] = r0*r0 >> 2 */
+	  umul_ppmm (t3, t2, y0, t0);
+	  t3 += y0 * t1 + yp[1] * t0;
+	  t3 += (yp[1] >> 2) + (yp[2] << (GMP_NUMB_BITS - 2)) + (t2 != 0);
+	  ASSERT (t2 + (y0 >> 2) + (yp[1] << (GMP_NUMB_BITS - 2)) == 0);
+	  /* [t3,0] <- (r0^2 y - 1) / 2 / 2 */
+
+	  t4 = t3 * r0 - 1;
+	  /* [2*t4+1,-r0] <- r0*(r0^2 y-1)/2 - r0 */
+	  if (t4 & GMP_LIMB_HIGHBIT) {
+	    *rp = r0 & GMP_NUMB_MAX;
+	    rp[1] = ~t4 << 1;
+	  } else {
+	    *rp = -r0 & GMP_NUMB_MAX;
+	    rp[1] = (t4 << 1) ^ 1;
+	  }
+#ifdef BSQRTINV_RP_NOT_ZEROED
+	  rp[2] = 0;
+#endif
+	}
 	--i;
 
-	umul_ppmm (t1, t0, r0, r0);
-	umul_ppmm (t3, t2, y0, t0);
-	t3 += y0 * t1 + yp[1] * t0;
-	t2 = ((t2 >> 1) | (t3 << (GMP_NUMB_BITS - 1))) & GMP_NUMB_MAX;
-	t3 = t3 >> 1; /* [t3,t2] <- (rp^2 y - 1) / 2 */
+	for (bn = 2 + (bnb > GMP_NUMB_BITS); --i >= 0;)
+	  {
+	    mp_size_t pbn = bn;
+	    mpn_sqr (tp, rp, bn); /* tp <- r^2 */
+
+	    bnb = order[i];
+	    bn = 1 + bnb / GMP_LIMB_BITS;
+
+	    mpn_mullo_n (tp2, yp, tp, bn); /* tp2 <- rp^2 y */
+	    ASSERT (tp2[0] == CNST_LIMB (1));
+	    ASSERT (pbn == 2 || mpn_zero_p (tp2 + 1, pbn - 2));
+	    /* tp2 <- (rp^2 y - 1) / 2 (skip the lowest limbs) */
+	    ASSERT_NOCARRY (mpn_rshift (tp2 + pbn - 1, tp2 + pbn - 1, bn - pbn + 1, 1));
 
-	/* [r1,t4] <- r (r^2 y - 1) / 2 */
-	umul_ppmm (r1, t4, r0, t2);
-	r1 += r0 * t3;
+	    /* tp <- r (r^2 y - 1) / 2 (only the relevant limbs) */
+#ifdef BSQRTINV_RP_NOT_ZEROED
+	    rp [pbn] = 0;
+#endif
+	    ASSERT (pbn >= bn - pbn + 1 || (pbn == bn - pbn && rp [pbn] == 0));
+	    mpn_mullo_n (tp, rp, tp2 + pbn - 1, bn - pbn + 1);
 
-	/* r (r^2 y - 1) / 2 - r */
-	sub_ddmmss(rp[1], rp[0], r1, t4, 0, r0);
+	    if (rp[pbn - 1] < tp[0])
+	      mpn_com (rp + pbn, tp + 1, bn - pbn);
+	    else
+	      mpn_neg (rp + pbn, tp + 1, bn - pbn);
+
+	    rp[pbn - 1] -= tp[0];
+	    /* rp <- r - r (r^2 y - 1) / 2 */
+	  }
       } else {
 	*rp = r0 & GMP_NUMB_MAX;
-	return 1;
       }
-
-      for (bn = 2; --i >= 0;)
-	{
-	  mp_size_t pbn = bn;
-	  mpn_sqr (tp, rp, bn); /* tp <- r^2 */
-
-	  bnb = order[i];
-	  bn = 1 + bnb / GMP_LIMB_BITS;
-
-	  mpn_mullo_n (tp2, yp, tp, bn); /* tp2 <- rp^2 y */
-	  ASSERT (tp2[0] == CNST_LIMB (1));
-	  ASSERT (pbn == 2 || mpn_zero_p (tp2 + 1, pbn - 2));
-	  /* tp2 <- (rp^2 y - 1) / 2 (skip the lowest limbs) */
-	  mpn_rshift (tp2 + pbn - 1, tp2 + pbn - 1, bn - pbn + 1, 1);
-
-	  /* tp <- r (r^2 y - 1) / 2 (only the relevant limbs) */
-	  rp [pbn] = 0;
-	  mpn_mullo_n (tp, rp, tp2 + pbn - 1, bn - pbn + 1);
-
-	  if (rp[pbn - 1] < tp[0])
-	    mpn_com (rp + pbn, tp + 1, bn - pbn);
-	  else
-	    mpn_neg (rp + pbn, tp + 1, bn - pbn);
-
-	  rp[pbn - 1] -= tp[0];
-	  /* rp <- r - r (r^2 y - 1) / 2 */
-	}
     }
   return 1;
 }


More information about the gmp-commit mailing list