[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Jun 19 09:01:37 CEST 2026
details: /var/hg/gmp/rev/73dc22bc5eb7
changeset: 18513:73dc22bc5eb7
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Jun 18 08:20:53 2026 +0200
description:
mpn/generic/bsqrtinv.c: Copyright and cosmetic changes
details: /var/hg/gmp/rev/4bdafaa1f0cf
changeset: 18514:4bdafaa1f0cf
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Jun 19 07:35:09 2026 +0200
description:
tune: Copyright years
details: /var/hg/gmp/rev/472b830c116c
changeset: 18515:472b830c116c
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Jun 19 07:59:13 2026 +0200
description:
mpn/generic/bsqrtinv.c: Use r' <-- -(r-r(r^2 y-1)/2) sometimes
details: /var/hg/gmp/rev/eeb8830e47e2
changeset: 18516:eeb8830e47e2
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Jun 19 08:11:23 2026 +0200
description:
mpn/generic/bsqrtinv.c: Use mullo to avoid compute zeros.
details: /var/hg/gmp/rev/1df826dbcdf1
changeset: 18517:1df826dbcdf1
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Jun 19 08:38:52 2026 +0200
description:
mpn/generic/bsqrtinv.c: Another special case for (bnb == GMP_NUMB_BITS)...
details: /var/hg/gmp/rev/0d88f51aeda5
changeset: 18518:0d88f51aeda5
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Jun 19 08:59:38 2026 +0200
description:
ChangeLog
diffstat:
ChangeLog | 11 +++++
mpn/generic/bsqrtinv.c | 102 +++++++++++++++++++++++++-----------------------
tune/common.c | 3 +-
tune/speed.c | 2 +-
tune/speed.h | 4 +-
5 files changed, 69 insertions(+), 53 deletions(-)
diffs (226 lines):
diff -r 331e66376226 -r 0d88f51aeda5 ChangeLog
--- a/ChangeLog Thu Jun 18 01:23:30 2026 +0200
+++ b/ChangeLog Fri Jun 19 08:59:38 2026 +0200
@@ -1,3 +1,14 @@
+2026-06-19 Marco Bodrato <marco.bodrato at protonmail.com>
+
+ * mpn/generic/perfpow.c (is_kth_power): Check only one possible
+ square root.
+
+ * tune/speed.c: Recognise mpn_bsqrtinv.
+ * tune/speed.h: Declare speed_mpn_bsqrtinv and the macro.
+ * tune/common.c (speed_mpn_bsqrtinv): Implement the function.
+
+ * mpn/generic/bsqrtinv.c: Rewrite with a differen iteration.
+
2026-04-04 Marc Glisse <marc.glisse at inria.fr>
* gmpxx.h (operator""): Remove deprecated space.
diff -r 331e66376226 -r 0d88f51aeda5 mpn/generic/bsqrtinv.c
--- a/mpn/generic/bsqrtinv.c Thu Jun 18 01:23:30 2026 +0200
+++ b/mpn/generic/bsqrtinv.c Fri Jun 19 08:59:38 2026 +0200
@@ -1,8 +1,9 @@
/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
- Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+ Contributed to the GNU project by Martin Boij (as part of perfpow.c),
+ optimized by Marco Bodrato.
-Copyright 2009, 2010, 2012, 2015 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012, 2015, 2026 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -37,48 +38,41 @@
Return non-zero if such an integer r exists.
Iterates
- r' <-- (3r - r^3 y) / 2
+ r' <-- r - r (r^2 y - 1) / 2 , (or its negation, sometimes)
using Hensel lifting. Since we divide by two, the Hensel lifting is
somewhat degenerates. Therefore, we lift from 2^b to 2^{b+1}-1.
FIXME:
(1) Simplify to do precision book-keeping in limbs rather than bits.
- (2) Rewrite iteration as
- r' <-- r - r (r^2 y - 1) / 2 [Done]
- and take advantage of zero low part of r^2 y - 1. [Done]
+ (2) Take advantage of zero low part of r^2 y - 1.
(3) Use wrap-around trick.
-
- (4) Use a small table to get starting value. [Done]
*/
+#ifndef BSQRTINV_DONT_USE_TABLE
/* Generated with GP-Pari:
b=8;v=Vecsmall(-binary(2^2^b-1));
forstep(i=1,2^(b+1),2,v[lift(Mod(i,2^(b+3))^-2)>>3+1]=i\2);
for(i=1,2^b,print1(v[i],",");if(i%16==0,print(),print1(" ")))
*/
-
-#ifndef BSQRTINV_DONT_USE_TABLE
static const unsigned char binvsqrttab[256] = /* The least significant 1 was removed */
- {
- 0, 170, 172, 102, 184, 253, 219, 129, 240, 218, 227, 22, 168, 50, 148, 46,
- 31, 245, 115, 57, 152, 157, 4, 222, 208, 197, 3, 137, 136, 146, 139, 113,
- 63, 149, 108, 217, 120, 61, 228, 62, 176, 101, 220, 214, 104, 242, 84, 238,
- 95, 53, 179, 134, 88, 34, 59, 97, 144, 5, 67, 54, 72, 173, 203, 78,
- 127, 42, 44, 25, 56, 130, 164, 254, 112, 90, 156, 105, 40, 77, 20, 81,
- 159, 138, 243, 185, 24, 226, 123, 94, 80, 186, 131, 246, 8, 18, 244, 241,
- 191, 234, 19, 166, 7, 189, 100, 65, 48, 229, 92, 86, 23, 114, 43, 110,
- 223, 181, 204, 6, 39, 93, 187, 225, 16, 133, 195, 73, 55, 210, 180, 49,
- 255, 85, 83, 153, 71, 2, 36, 126, 15, 37, 28, 233, 87, 205, 107, 209,
- 224, 10, 140, 198, 103, 98, 251, 33, 47, 58, 252, 118, 119, 109, 116, 142,
- 192, 106, 147, 38, 135, 194, 27, 193, 79, 154, 35, 41, 151, 13, 171, 17,
- 160, 202, 76, 121, 167, 221, 196, 158, 111, 250, 188, 201, 183, 82, 52, 177,
- 128, 213, 211, 230, 199, 125, 91, 1, 143, 165, 99, 150, 215, 178, 235, 174,
- 96, 117, 12, 70, 231, 29, 132, 161, 175, 69, 124, 9, 247, 237, 11, 14,
- 64, 21, 236, 89, 248, 66, 155, 190, 207, 26, 163, 169, 232, 141, 212, 145,
- 32, 74, 51, 249, 216, 162, 68, 30, 239, 122, 60, 182, 200, 45, 75, 206,
- };
+ { 0, 170, 172, 102, 184, 253, 219, 129, 240, 218, 227, 22, 168, 50, 148, 46,
+ 31, 245, 115, 57, 152, 157, 4, 222, 208, 197, 3, 137, 136, 146, 139, 113,
+ 63, 149, 108, 217, 120, 61, 228, 62, 176, 101, 220, 214, 104, 242, 84, 238,
+ 95, 53, 179, 134, 88, 34, 59, 97, 144, 5, 67, 54, 72, 173, 203, 78,
+ 127, 42, 44, 25, 56, 130, 164, 254, 112, 90, 156, 105, 40, 77, 20, 81,
+ 159, 138, 243, 185, 24, 226, 123, 94, 80, 186, 131, 246, 8, 18, 244, 241,
+ 191, 234, 19, 166, 7, 189, 100, 65, 48, 229, 92, 86, 23, 114, 43, 110,
+ 223, 181, 204, 6, 39, 93, 187, 225, 16, 133, 195, 73, 55, 210, 180, 49,
+ 255, 85, 83, 153, 71, 2, 36, 126, 15, 37, 28, 233, 87, 205, 107, 209,
+ 224, 10, 140, 198, 103, 98, 251, 33, 47, 58, 252, 118, 119, 109, 116, 142,
+ 192, 106, 147, 38, 135, 194, 27, 193, 79, 154, 35, 41, 151, 13, 171, 17,
+ 160, 202, 76, 121, 167, 221, 196, 158, 111, 250, 188, 201, 183, 82, 52, 177,
+ 128, 213, 211, 230, 199, 125, 91, 1, 143, 165, 99, 150, 215, 178, 235, 174,
+ 96, 117, 12, 70, 231, 29, 132, 161, 175, 69, 124, 9, 247, 237, 11, 14,
+ 64, 21, 236, 89, 248, 66, 155, 190, 207, 26, 163, 169, 232, 141, 212, 145,
+ 32, 74, 51, 249, 216, 162, 68, 30, 239, 122, 60, 182, 200, 45, 75, 206};
#endif
int
@@ -86,7 +80,6 @@
{
ASSERT (bnb > 0);
- rp[0] = 1;
if (bnb == 1)
{
if ((yp[0] & 3) != 1)
@@ -153,17 +146,26 @@
#endif
#endif
- mpn_zero (rp + 1, bnb / GMP_NUMB_BITS);
i = 0;
- for (; bnb >= GMP_NUMB_BITS; bnb = (bnb + 2) >> 1)
+ for (; bnb > GMP_NUMB_BITS; bnb = (bnb + 2) >> 1)
order[i++] = bnb;
if (bnb > precomputed_bits) {
- t0 = r0 * r0 * y0 >> 1;
- r0 -= r0 * t0;
- ASSERT ((t0 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits))) == 0);
+ if (bnb == GMP_NUMB_BITS) {
+ mp_limb_t r0h = r0 >> 1;
+ mp_limb_t r0sqm1 = r0h * (r0h + 1);
+ mp_limb_t yh = (y0 >> 2) + (yp[1] << (GMP_NUMB_BITS - 2));
+ mp_limb_t yrrm1d4 = y0 * r0sqm1 + yh;
+ ASSERT ((yrrm1d4 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits + 1))) == 0);
+ mp_limb_t rt = r0 * yrrm1d4 - r0h - 1;
+ r0 = (rt << 1) ^ ((rt & GMP_LIMB_HIGHBIT) ? GMP_NUMB_MAX : CNST_LIMB(1));
+ } else {
+ t0 = r0 * r0 * y0 >> 1;
+ r0 -= r0 * t0;
+ ASSERT ((t0 & (GMP_NUMB_MAX >> (GMP_NUMB_BITS - precomputed_bits))) == 0);
+ }
}
if (i) {
- mp_limb_t t5, t4, t3, t2, t1, r1;
+ mp_limb_t t4, t3, t2, t1, r1;
--i;
umul_ppmm (t1, t0, r0, r0);
@@ -172,10 +174,12 @@
t2 = ((t2 >> 1) | (t3 << (GMP_NUMB_BITS - 1))) & GMP_NUMB_MAX;
t3 = t3 >> 1; /* [t3,t2] <- (rp^2 y - 1) / 2 */
- umul_ppmm (t5, t4, r0, t2);
- t5 += r0 * t3;
+ /* [r1,t4] <- r (r^2 y - 1) / 2 */
+ umul_ppmm (r1, t4, r0, t2);
+ r1 += r0 * t3;
- sub_ddmmss(rp[1], rp[0], 0, r0, t5, t4);
+ /* r (r^2 y - 1) / 2 - r */
+ sub_ddmmss(rp[1], rp[0], r1, t4, 0, r0);
} else {
*rp = r0 & GMP_NUMB_MAX;
return 1;
@@ -190,20 +194,20 @@
bn = 1 + bnb / GMP_LIMB_BITS;
mpn_mullo_n (tp2, yp, tp, bn); /* tp2 <- rp^2 y */
- mpn_rshift (tp2+pbn-1, tp2+pbn-1, bn-pbn+1, 1); /* tp2 <- (rp^2 y - 1) / 2 */
+ ASSERT (tp2[0] == CNST_LIMB (1));
+ ASSERT (pbn == 2 || mpn_zero_p (tp2 + 1, pbn - 2));
+ /* tp2 <- (rp^2 y - 1) / 2 (skip the lowest limbs) */
+ mpn_rshift (tp2 + pbn - 1, tp2 + pbn - 1, bn - pbn + 1, 1);
- if (pbn <= bn - pbn)
- mpn_mul (tp, tp2 + pbn - 1, bn - pbn + 1, rp, pbn);
+ /* tp <- r (r^2 y - 1) / 2 (only the relevant limbs) */
+ rp [pbn] = 0;
+ mpn_mullo_n (tp, rp, tp2 + pbn - 1, bn - pbn + 1);
+
+ if (rp[pbn - 1] < tp[0])
+ mpn_com (rp + pbn, tp + 1, bn - pbn);
else
- mpn_mul (tp, rp, pbn, tp2 + pbn - 1, bn - pbn + 1);
- /* tp <- r (r^2 y - 1) / 2 */
+ mpn_neg (rp + pbn, tp + 1, bn - pbn);
- if (bn > pbn) {
- if (rp[pbn - 1] < tp[0])
- mpn_com (rp + pbn, tp + 1, bn - pbn);
- else
- mpn_neg (rp + pbn, tp + 1, bn - pbn);
- }
rp[pbn - 1] -= tp[0];
/* rp <- r - r (r^2 y - 1) / 2 */
}
diff -r 331e66376226 -r 0d88f51aeda5 tune/common.c
--- a/tune/common.c Thu Jun 18 01:23:30 2026 +0200
+++ b/tune/common.c Fri Jun 19 08:59:38 2026 +0200
@@ -1,6 +1,7 @@
/* Shared speed subroutines.
-Copyright 1999-2006, 2008-2017, 2019-2022 Free Software Foundation, Inc.
+Copyright 1999-2006, 2008-2017, 2019-2022, 2026 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
diff -r 331e66376226 -r 0d88f51aeda5 tune/speed.c
--- a/tune/speed.c Thu Jun 18 01:23:30 2026 +0200
+++ b/tune/speed.c Fri Jun 19 08:59:38 2026 +0200
@@ -1,6 +1,6 @@
/* Speed measuring program.
-Copyright 1999-2003, 2005, 2006, 2008-2022 Free Software Foundation, Inc.
+Copyright 1999-2003, 2005, 2006, 2008-2022, 2026 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
diff -r 331e66376226 -r 0d88f51aeda5 tune/speed.h
--- a/tune/speed.h Thu Jun 18 01:23:30 2026 +0200
+++ b/tune/speed.h Fri Jun 19 08:59:38 2026 +0200
@@ -1,7 +1,7 @@
/* Header for speed and threshold things.
-Copyright 1999-2003, 2005, 2006, 2008-2017, 2019-2022 Free Software
-Foundation, Inc.
+Copyright 1999-2003, 2005, 2006, 2008-2017, 2019-2022, 2026 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
More information about the gmp-commit
mailing list