[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Jun 19 09:01:37 CEST 2026


details:   /var/hg/gmp/rev/73dc22bc5eb7
changeset: 18513:73dc22bc5eb7
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Jun 18 08:20:53 2026 +0200
description:
mpn/generic/bsqrtinv.c: Copyright and cosmetic changes

details:   /var/hg/gmp/rev/4bdafaa1f0cf
changeset: 18514:4bdafaa1f0cf
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Jun 19 07:35:09 2026 +0200
description:
tune: Copyright years

details:   /var/hg/gmp/rev/472b830c116c
changeset: 18515:472b830c116c
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Jun 19 07:59:13 2026 +0200
description:
mpn/generic/bsqrtinv.c: Use r' <-- -(r-r(r^2 y-1)/2) sometimes

details:   /var/hg/gmp/rev/eeb8830e47e2
changeset: 18516:eeb8830e47e2
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Jun 19 08:11:23 2026 +0200
description:
mpn/generic/bsqrtinv.c: Use mullo to avoid compute zeros.

details:   /var/hg/gmp/rev/1df826dbcdf1
changeset: 18517:1df826dbcdf1
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Jun 19 08:38:52 2026 +0200
description:
mpn/generic/bsqrtinv.c: Another special case for (bnb == GMP_NUMB_BITS)...

details:   /var/hg/gmp/rev/0d88f51aeda5
changeset: 18518:0d88f51aeda5
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Jun 19 08:59:38 2026 +0200
description:
ChangeLog

diffstat:

 ChangeLog              |   11 +++++
 mpn/generic/bsqrtinv.c |  102 +++++++++++++++++++++++++-----------------------
 tune/common.c          |    3 +-
 tune/speed.c           |    2 +-
 tune/speed.h           |    4 +-
 5 files changed, 69 insertions(+), 53 deletions(-)

diffs (226 lines):

diff -r 331e66376226 -r 0d88f51aeda5 ChangeLog
--- a/ChangeLog	Thu Jun 18 01:23:30 2026 +0200
+++ b/ChangeLog	Fri Jun 19 08:59:38 2026 +0200
@@ -1,3 +1,14 @@
+2026-06-19  Marco Bodrato <marco.bodrato at protonmail.com>
+
+	* mpn/generic/perfpow.c (is_kth_power): Check only one possible
+	square root.
+
+	* tune/speed.c: Recognise mpn_bsqrtinv.
+	* tune/speed.h: Declare speed_mpn_bsqrtinv and the macro.
+	* tune/common.c (speed_mpn_bsqrtinv): Implement the function.
+
+	* mpn/generic/bsqrtinv.c: Rewrite with a differen iteration.
+
 2026-04-04  Marc Glisse <marc.glisse at inria.fr>
 
 	* gmpxx.h (operator""): Remove deprecated space.
diff -r 331e66376226 -r 0d88f51aeda5 mpn/generic/bsqrtinv.c
--- a/mpn/generic/bsqrtinv.c	Thu Jun 18 01:23:30 2026 +0200
+++ b/mpn/generic/bsqrtinv.c	Fri Jun 19 08:59:38 2026 +0200
@@ -1,8 +1,9 @@
 /* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
 
-   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+   Contributed to the GNU project by Martin Boij (as part of perfpow.c),
+   optimized by Marco Bodrato.
 
-Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012, 2015, 2026 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -37,48 +38,41 @@
    Return non-zero if such an integer r exists.
 
    Iterates
-     r' <-- (3r - r^3 y) / 2
+     r' <-- r - r (r^2 y - 1) / 2 , (or its negation, sometimes)
    using Hensel lifting.  Since we divide by two, the Hensel lifting is
    somewhat degenerates.  Therefore, we lift from 2^b to 2^{b+1}-1.
 
    FIXME:
      (1) Simplify to do precision book-keeping in limbs rather than bits.
 
-     (2) Rewrite iteration as
-	   r' <-- r - r (r^2 y - 1) / 2  [Done]
-	 and take advantage of zero low part of r^2 y - 1. [Done]
+     (2) Take advantage of zero low part of r^2 y - 1.
 
      (3) Use wrap-around trick.
-
-     (4) Use a small table to get starting value. [Done]
 */
 
+#ifndef BSQRTINV_DONT_USE_TABLE
 /* Generated with GP-Pari:
    b=8;v=Vecsmall(-binary(2^2^b-1));
    forstep(i=1,2^(b+1),2,v[lift(Mod(i,2^(b+3))^-2)>>3+1]=i\2);
    for(i=1,2^b,print1(v[i],",");if(i%16==0,print(),print1(" ")))
  */
-
-#ifndef BSQRTINV_DONT_USE_TABLE
 static const unsigned char binvsqrttab[256] = /* The least significant 1 was removed */
-  {
-    0, 170, 172, 102, 184, 253, 219, 129, 240, 218, 227, 22, 168, 50, 148, 46,
-    31, 245, 115, 57, 152, 157, 4, 222, 208, 197, 3, 137, 136, 146, 139, 113,
-    63, 149, 108, 217, 120, 61, 228, 62, 176, 101, 220, 214, 104, 242, 84, 238,
-    95, 53, 179, 134, 88, 34, 59, 97, 144, 5, 67, 54, 72, 173, 203, 78,
-    127, 42, 44, 25, 56, 130, 164, 254, 112, 90, 156, 105, 40, 77, 20, 81,
-    159, 138, 243, 185, 24, 226, 123, 94, 80, 186, 131, 246, 8, 18, 244, 241,
-    191, 234, 19, 166, 7, 189, 100, 65, 48, 229, 92, 86, 23, 114, 43, 110,
-    223, 181, 204, 6, 39, 93, 187, 225, 16, 133, 195, 73, 55, 210, 180, 49,
-    255, 85, 83, 153, 71, 2, 36, 126, 15, 37, 28, 233, 87, 205, 107, 209,
-    224, 10, 140, 198, 103, 98, 251, 33, 47, 58, 252, 118, 119, 109, 116, 142,
-    192, 106, 147, 38, 135, 194, 27, 193, 79, 154, 35, 41, 151, 13, 171, 17,
-    160, 202, 76, 121, 167, 221, 196, 158, 111, 250, 188, 201, 183, 82, 52, 177,
-    128, 213, 211, 230, 199, 125, 91, 1, 143, 165, 99, 150, 215, 178, 235, 174,
-    96, 117, 12, 70, 231, 29, 132, 161, 175, 69, 124, 9, 247, 237, 11, 14,
-    64, 21, 236, 89, 248, 66, 155, 190, 207, 26, 163, 169, 232, 141, 212, 145,
-    32, 74, 51, 249, 216, 162, 68, 30, 239, 122, 60, 182, 200, 45, 75, 206,
-  };
+  {  0, 170, 172, 102, 184, 253, 219, 129, 240, 218, 227,  22, 168,  50, 148,  46,
+    31, 245, 115,  57, 152, 157,   4, 222, 208, 197,   3, 137, 136, 146, 139, 113,
+    63, 149, 108, 217, 120,  61, 228,  62, 176, 101, 220, 214, 104, 242,  84, 238,
+    95,  53, 179, 134,  88,  34,  59,  97, 144,   5,  67,  54,  72, 173, 203,  78,
+   127,  42,  44,  25,  56, 130, 164, 254, 112,  90, 156, 105,  40,  77,  20,  81,
+   159, 138, 243, 185,  24, 226, 123,  94,  80, 186, 131, 246,   8,  18, 244, 241,
+   191, 234,  19, 166,   7, 189, 100,  65,  48, 229,  92,  86,  23, 114,  43, 110,
+   223, 181, 204,   6,  39,  93, 187, 225,  16, 133, 195,  73,  55, 210, 180,  49,
+   255,  85,  83, 153,  71,   2,  36, 126,  15,  37,  28, 233,  87, 205, 107, 209,
+   224,  10, 140, 198, 103,  98, 251,  33,  47,  58, 252, 118, 119, 109, 116, 142,
+   192, 106, 147,  38, 135, 194,  27, 193,  79, 154,  35,  41, 151,  13, 171,  17,
+   160, 202,  76, 121, 167, 221, 196, 158, 111, 250, 188, 201, 183,  82,  52, 177,
+   128, 213, 211, 230, 199, 125,  91,   1, 143, 165,  99, 150, 215, 178, 235, 174,
+    96, 117,  12,  70, 231,  29, 132, 161, 175,  69, 124,   9, 247, 237,  11,  14,
+    64,  21, 236,  89, 248,  66, 155, 190, 207,  26, 163, 169, 232, 141, 212, 145,
+    32,  74,  51, 249, 216, 162,  68,  30, 239, 122,  60, 182, 200,  45,  75, 206};
 #endif
 
 int
@@ -86,7 +80,6 @@
 {
   ASSERT (bnb > 0);
 
-  rp[0] = 1;
   if (bnb == 1)
     {
       if ((yp[0] & 3) != 1)
@@ -153,17 +146,26 @@
 #endif
 #endif
 
-      mpn_zero (rp + 1, bnb / GMP_NUMB_BITS);
       i = 0;
-      for (; bnb >= GMP_NUMB_BITS; bnb = (bnb + 2) >> 1)
+      for (; bnb > GMP_NUMB_BITS; bnb = (bnb + 2) >> 1)
 	order[i++] = bnb;
       if (bnb > precomputed_bits) {
-	t0 = r0 * r0 * y0 >> 1;
-	r0 -= r0 * t0;
-	ASSERT ((t0 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits))) == 0);
+	if (bnb == GMP_NUMB_BITS) {
+	  mp_limb_t r0h = r0 >> 1;
+	  mp_limb_t r0sqm1 = r0h * (r0h + 1);
+	  mp_limb_t yh = (y0 >> 2) + (yp[1] << (GMP_NUMB_BITS - 2));
+	  mp_limb_t yrrm1d4 = y0 * r0sqm1 + yh;
+	  ASSERT ((yrrm1d4 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits + 1))) == 0);
+	  mp_limb_t rt = r0 * yrrm1d4 - r0h - 1;
+	  r0 = (rt << 1) ^ ((rt & GMP_LIMB_HIGHBIT) ? GMP_NUMB_MAX : CNST_LIMB(1));
+	} else {
+	  t0 = r0 * r0 * y0 >> 1;
+	  r0 -= r0 * t0;
+	  ASSERT ((t0 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits))) == 0);
+	}
       }
       if (i) {
-	mp_limb_t t5, t4, t3, t2, t1, r1;
+	mp_limb_t t4, t3, t2, t1, r1;
 	--i;
 
 	umul_ppmm (t1, t0, r0, r0);
@@ -172,10 +174,12 @@
 	t2 = ((t2 >> 1) | (t3 << (GMP_NUMB_BITS - 1))) & GMP_NUMB_MAX;
 	t3 = t3 >> 1; /* [t3,t2] <- (rp^2 y - 1) / 2 */
 
-	umul_ppmm (t5, t4, r0, t2);
-	t5 += r0 * t3;
+	/* [r1,t4] <- r (r^2 y - 1) / 2 */
+	umul_ppmm (r1, t4, r0, t2);
+	r1 += r0 * t3;
 
-	sub_ddmmss(rp[1], rp[0], 0, r0, t5, t4);
+	/* r (r^2 y - 1) / 2 - r */
+	sub_ddmmss(rp[1], rp[0], r1, t4, 0, r0);
       } else {
 	*rp = r0 & GMP_NUMB_MAX;
 	return 1;
@@ -190,20 +194,20 @@
 	  bn = 1 + bnb / GMP_LIMB_BITS;
 
 	  mpn_mullo_n (tp2, yp, tp, bn); /* tp2 <- rp^2 y */
-	  mpn_rshift (tp2+pbn-1, tp2+pbn-1, bn-pbn+1, 1); /* tp2 <- (rp^2 y - 1) / 2 */
+	  ASSERT (tp2[0] == CNST_LIMB (1));
+	  ASSERT (pbn == 2 || mpn_zero_p (tp2 + 1, pbn - 2));
+	  /* tp2 <- (rp^2 y - 1) / 2 (skip the lowest limbs) */
+	  mpn_rshift (tp2 + pbn - 1, tp2 + pbn - 1, bn - pbn + 1, 1);
 
-	  if (pbn <= bn - pbn)
-	    mpn_mul (tp, tp2 + pbn - 1, bn - pbn + 1, rp, pbn);
+	  /* tp <- r (r^2 y - 1) / 2 (only the relevant limbs) */
+	  rp [pbn] = 0;
+	  mpn_mullo_n (tp, rp, tp2 + pbn - 1, bn - pbn + 1);
+
+	  if (rp[pbn - 1] < tp[0])
+	    mpn_com (rp + pbn, tp + 1, bn - pbn);
 	  else
-	    mpn_mul (tp, rp, pbn, tp2 + pbn - 1, bn - pbn + 1);
-	  /* tp <- r (r^2 y - 1) / 2 */
+	    mpn_neg (rp + pbn, tp + 1, bn - pbn);
 
-	  if (bn > pbn) {
-	    if (rp[pbn - 1] < tp[0])
-	      mpn_com (rp + pbn, tp + 1, bn - pbn);
-	    else
-	      mpn_neg (rp + pbn, tp + 1, bn - pbn);
-	  }
 	  rp[pbn - 1] -= tp[0];
 	  /* rp <- r - r (r^2 y - 1) / 2 */
 	}
diff -r 331e66376226 -r 0d88f51aeda5 tune/common.c
--- a/tune/common.c	Thu Jun 18 01:23:30 2026 +0200
+++ b/tune/common.c	Fri Jun 19 08:59:38 2026 +0200
@@ -1,6 +1,7 @@
 /* Shared speed subroutines.
 
-Copyright 1999-2006, 2008-2017, 2019-2022 Free Software Foundation, Inc.
+Copyright 1999-2006, 2008-2017, 2019-2022, 2026 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
diff -r 331e66376226 -r 0d88f51aeda5 tune/speed.c
--- a/tune/speed.c	Thu Jun 18 01:23:30 2026 +0200
+++ b/tune/speed.c	Fri Jun 19 08:59:38 2026 +0200
@@ -1,6 +1,6 @@
 /* Speed measuring program.
 
-Copyright 1999-2003, 2005, 2006, 2008-2022 Free Software Foundation, Inc.
+Copyright 1999-2003, 2005, 2006, 2008-2022, 2026 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
diff -r 331e66376226 -r 0d88f51aeda5 tune/speed.h
--- a/tune/speed.h	Thu Jun 18 01:23:30 2026 +0200
+++ b/tune/speed.h	Fri Jun 19 08:59:38 2026 +0200
@@ -1,7 +1,7 @@
 /* Header for speed and threshold things.
 
-Copyright 1999-2003, 2005, 2006, 2008-2017, 2019-2022 Free Software
-Foundation, Inc.
+Copyright 1999-2003, 2005, 2006, 2008-2017, 2019-2022, 2026 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 


More information about the gmp-commit mailing list