[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon May 23 22:38:44 CEST 2011
details: /var/hg/gmp/rev/92a6ac96b9ad
changeset: 14198:92a6ac96b9ad
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon May 23 10:12:16 2011 +0200
description:
Simplified mpz_jacobi using reciprocity.
details: /var/hg/gmp/rev/38d657553015
changeset: 14199:38d657553015
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon May 23 10:57:57 2011 +0200
description:
Moved (asize < bsize) check and swap before (bsize == 0) check.
diffstat:
ChangeLog | 6 +
mpz/jacobi.c | 224 ++++++++++++++++-------------------------------------
tests/mpz/t-jac.c | 4 +
3 files changed, 79 insertions(+), 155 deletions(-)
diffs (280 lines):
diff -r 6d7da908a2ef -r 38d657553015 ChangeLog
--- a/ChangeLog Sun May 22 20:31:06 2011 +0200
+++ b/ChangeLog Mon May 23 10:57:57 2011 +0200
@@ -1,3 +1,9 @@
+2011-05-23 Niels Möller <nisse at lysator.liu.se>
+
+ * mpz/jacobi.c (mpz_jacobi): Simplied by swapping operands when
+ needed, to get asize >= bsize. Use the reciprocity law generalized
+ to work when one operand is even.
+
2011-05-22 Niels Möller <nisse at lysator.liu.se>
* mpz/jacobi.c (mpz_jacobi): Another bugfix for the asize == 1
diff -r 6d7da908a2ef -r 38d657553015 mpz/jacobi.c
--- a/mpz/jacobi.c Sun May 22 20:31:06 2011 +0200
+++ b/mpz/jacobi.c Mon May 23 10:57:57 2011 +0200
@@ -98,9 +98,9 @@
if (bsize > 1 && btwos > 0)
{
- mp_limb_t second = bsrcp[1];
- blow |= second << (GMP_NUMB_BITS - btwos);
- if (bsize == 2 && (second >> btwos) == 0)
+ mp_limb_t b1 = bsrcp[1];
+ blow |= b1 << (GMP_NUMB_BITS - btwos);
+ if (bsize == 2 && (b1 >> btwos) == 0)
bsize = 1;
}
@@ -113,6 +113,41 @@
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
+ /* Ensure asize >= bsize. Take advantage of the generalized
+ reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
+
+ if (asize < bsize)
+ {
+ MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);
+ MP_LIMB_T_SWAP (alow, blow);
+
+ /* NOTE: The value of alow (old blow) is a bit subtle. For this
+ code path, we get alow as the low, always odd, limb of
+ shifted A. Which is what we need for the reciprocity update
+ below.
+
+ However, all other uses of alow assumes that it is *not*
+ shifted. Luckily, alow matters only when either
+
+ + btwos > 0, in which case A is always odd
+
+ + asize == bsize == 1, in which case this code path is never
+ taken. */
+
+ count_trailing_zeros (btwos, blow);
+ blow >>= btwos;
+
+ if (bsize > 1 && btwos > 0)
+ {
+ mp_limb_t b1 = bsrcp[1];
+ blow |= b1 << (GMP_NUMB_BITS - btwos);
+ if (bsize == 2 && (b1 >> btwos) == 0)
+ bsize = 1;
+ }
+
+ result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
+ }
+
if (bsize == 1)
{
result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
@@ -126,163 +161,42 @@
return mpn_jacobi_base (alow, blow, result_bit1);
}
- if (asize == 1)
+ /* Allocation strategy: For A, we allocate a working copy only for A
+ % B, but when A is much larger than B, we have to allocate space
+ for the large quotient. We use the same area, pointed to by bp,
+ for both the quotient A/B and the working copy of B. */
+
+ TMP_MARK;
+
+ if (asize >= 2*bsize)
+ TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);
+ else
+ TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);
+
+ /* In the case of even B, we conceptually shift out the powers of
+ two first, and then divide A mod B. Hence, when taking those
+ powers of two into account, we must use alow *before* the the
+ division. Doing the actual division first is ok, because the
+ point is to remove multiples of B from A, and multiples of 2^k B
+ are good enough. */
+ if (asize > bsize)
+ mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);
+ else
+ MPN_COPY (ap, asrcp, bsize);
+
+ if (btwos > 0)
{
- /* Logic copied from mpz_ui_kronecker */
- if (alow == 1)
- return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */
+ result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
- if ( (alow & 1) == 0)
- {
- unsigned atwos;
- ASSERT (btwos == 0);
- count_trailing_zeros (atwos, alow);
- alow >>= atwos;
- result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
- }
-
- if (alow == 1)
- return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */
-
- /* (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
- result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
- JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
- return mpn_jacobi_base (blow, alow, result_bit1);
- }
- result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
-
- bits = mpn_jacobi_init (alow, blow, (result_bit1>>1) & 1);
-
- /* Allocation strategy: When one operand is much larger than the
- other, we currently have to allocate space for the entire
- quotient, even though we need juste the lowest few bits. But we
- at least avoid allocating a copy of th larger input.
-
- We put the reduction of the larger operand first in the scratch
- area, followed by an area that holds first the quotient, and then
- the working copy of the smaller operand. */
-
- if (asize > bsize)
- {
- n = bsize;
-
- if (asize >= 2*bsize)
- itch = asize + 1;
- else
- itch = 2*bsize;
+ ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));
+ bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;
}
else
- {
- n = asize;
+ MPN_COPY (bp, bsrcp, bsize);
- if (bsize >= 2*asize)
- itch = bsize + 1;
- else
- itch = 2*asize;
- }
-
- TMP_MARK;
-
- scratch = TMP_ALLOC_LIMBS (itch);
-
- if (n < asize)
- {
- mp_limb_t q0;
- ap = scratch;
- bp = scratch + n;
-
- mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, n);
- q0 = bp[0];
-
- if (btwos > 0)
- {
- ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, n, btwos));
- n -= (bp[n-1] | ap[n-1]) == 0;
-
- /* We have reduced a -= q * 2^k b */
- q0 <<= btwos;
- }
- else
- MPN_COPY (bp, bsrcp, n);
-
- bits = mpn_jacobi_update (bits, 1, q0 & 3);
- if (mpn_zero_p (ap, n))
- {
- /* FIXME: n > 1 always? */
- if (n > 1 || bp[0] != 1)
- {
- TMP_FREE;
- return 0;
- }
-
- TMP_FREE;
- return mpn_jacobi_finish (bits);
- }
- }
- else if (n < bsize)
- {
- mp_limb_t q0;
- mp_limb_t cy;
- bp = scratch;
- ap = scratch + n;
-
- mpn_tdiv_qr (ap, bp, 0, bsrcp, bsize, asrcp, n);
- q0 = ap[0];
-
- if (btwos > 0)
- {
- /* Let b be the correctly shifted, odd, value, and b' = 2^k
- b (k = btwos). We have divided
-
- b' = q' a + r'
-
- Let q' = 2^k q + ql, then we can recover the correct
- division as
-
- b = q a + r
-
- where the remainder is
-
- r = (ql a + r')/2^k
- */
- mp_limb_t ql, hi;
-
- ql = q0 & ((CNST_LIMB(1) << btwos) - 1);
- q0 = (q0 >> btwos) | (ap[1] << (GMP_LIMB_BITS - btwos));
- hi = mpn_addmul_1 (bp, asrcp, n, ql);
-
- ASSERT_NOCARRY (mpn_rshift (bp, bp, n, btwos));
- bp[n-1] |= hi << (GMP_LIMB_BITS - btwos);
- }
-
- bits = mpn_jacobi_update (bits, 0, q0 & 3);
-
- if (mpn_zero_p (bp, n))
- {
- TMP_FREE;
-
- /* FIXME: n > 1 always? */
- if (n > 1 || asrcp[0] != 1)
- return 0;
- else
- return mpn_jacobi_finish (bits);
- }
-
- MPN_COPY (ap, asrcp, n);
- }
- else
- {
- ap = scratch;
- bp = scratch + n;
-
- MPN_COPY (ap, asrcp, n);
- if (btwos > 0)
- ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, n, btwos));
- else
- MPN_COPY (bp, bsrcp, n);
- }
-
- res = mpn_jacobi_n (ap, bp, n, bits);
+ ASSERT (blow == bp[0]);
+ res = mpn_jacobi_n (ap, bp, bsize,
+ mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));
TMP_FREE;
return res;
diff -r 6d7da908a2ef -r 38d657553015 tests/mpz/t-jac.c
--- a/tests/mpz/t-jac.c Sun May 22 20:31:06 2011 +0200
+++ b/tests/mpz/t-jac.c Mon May 23 10:57:57 2011 +0200
@@ -642,6 +642,10 @@
(relevant when GMP_LIMB_BITS == 64). */
{ "17311973299000934401", "1675975991242824637446753124775689449936871337036614677577044717424700351103148799107651171694863695242089956242888229458836426332300124417011114380886016", 1 },
{ "3220569220116583677", "41859917623035396746", -1 },
+
+ /* Other test cases that triggered bugs during development. */
+ { "37200210845139167613356125645445281805", "340116213441272389607827434472642576514", -1 },
+ { "74400421690278335226712251290890563610", "451716845976689892447895811408978421929", -1 },
};
int i;
More information about the gmp-commit
mailing list