[Gmp-commit] /home/hgfiles/gmp: New Jacobi algorithm, based on Lehmer's algor...
mercurial at gmplib.org
mercurial at gmplib.org
Mon Apr 19 21:42:07 CEST 2010
details: /home/hgfiles/gmp/rev/efcac1ec4a81
changeset: 13537:efcac1ec4a81
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Apr 19 21:16:09 2010 +0200
description:
New Jacobi algorithm, based on Lehmer's algorithm.
diffstat:
configure.in | 2 +-
gmp-impl.h | 68 +++
mpn/Makefile.am | 2 +-
mpn/generic/jacbase.c | 12 +-
mpn/generic/jacobi_lehmer.c | 927 ++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 1003 insertions(+), 8 deletions(-)
diffs (truncated from 1094 to 300 lines):
diff -r 08ef31c0390d -r efcac1ec4a81 configure.in
--- a/configure.in Wed Apr 14 21:49:00 2010 +0200
+++ b/configure.in Mon Apr 19 21:16:09 2010 +0200
@@ -2515,7 +2515,7 @@
perfsqr perfpow \
gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step \
gcdext_lehmer gcdext_subdiv_step \
- div_q tdiv_qr jacbase get_d \
+ div_q tdiv_qr jacbase jacobi_lehmer get_d \
matrix22_mul matrix22_mul1_inverse_vector \
hgcd2 hgcd mullo_n mullo_basecase \
toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
diff -r 08ef31c0390d -r efcac1ec4a81 gmp-impl.h
--- a/gmp-impl.h Wed Apr 14 21:49:00 2010 +0200
+++ b/gmp-impl.h Mon Apr 19 21:16:09 2010 +0200
@@ -876,6 +876,9 @@
#define mpn_jacobi_base __MPN(jacobi_base)
__GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
+#define mpn_jacobi_lehmer __MPN(jacobi_lehmer)
+__GMP_DECLSPEC int mpn_jacobi_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, unsigned, mp_ptr));
+
#define mpn_mod_1c __MPN(mod_1c)
__GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
@@ -3570,6 +3573,7 @@
#endif
+/* FIXME: Macros for old jacobi code, review what's still needed. */
/* BIT1 means a result value in bit 1 (second least significant bit), with a
zero bit representing +1 and a one bit representing -1. Bits other than
@@ -3715,6 +3719,70 @@
} \
} while (0)
+/* For new jacobi code. */
+#define jacobi_table __gmp_jacobi_table
+__GMP_DECLSPEC extern const unsigned char jacobi_table[208];
+
+/* Bit layout for the initial state. b must be odd.
+
+ 3 2 1 0
+ +--+--+--+--+
+ |a1|a0|b1| s|
+ +--+--+--+--+
+
+ */
+static inline unsigned
+mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
+{
+ ASSERT (b & 1);
+ ASSERT (s <= 1);
+ return ((a & 3) << 2) + (b & 2) + s;
+}
+
+static inline int
+mpn_jacobi_finish (unsigned bits)
+{
+ /* (a, b) = (1,0) or (0,1) */
+ ASSERT ( (bits & 14) == 0);
+
+ return 1-2*(bits & 1);
+}
+
+static inline unsigned
+mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
+{
+ /* FIXME: Could halve table size by not including the e bit in the
+ * index, and instead xor when updating. Then the lookup would be
+ * like
+ *
+ * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
+ */
+
+ ASSERT (bits < 26);
+ ASSERT (denominator < 2);
+ ASSERT (q < 4);
+
+ /* For almost all calls, denominator is constant and quite often q
+ is constant too. So use addition rather than or, so the compiler
+ can put the constant part can into the offset of an indexed
+ addressing instruction.
+
+ With constant denominator, the below table lookup is compiled to
+
+ C Constant q = 1, constant denominator = 1
+ movzbl table+5(%eax,8), %eax
+
+ or
+
+ C q in %edx, constant denominator = 1
+ movzbl table+4(%edx,%eax,8), %eax
+
+ One could maintain the state preshifted 3 bits, to save a shift
+ here, but at least on x86, that's no real saving.
+ */
+ return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
+}
+
/* Matrix multiplication */
#define mpn_matrix22_mul __MPN(matrix22_mul)
__GMP_DECLSPEC void mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
diff -r 08ef31c0390d -r efcac1ec4a81 mpn/Makefile.am
--- a/mpn/Makefile.am Wed Apr 14 21:49:00 2010 +0200
+++ b/mpn/Makefile.am Mon Apr 19 21:16:09 2010 +0200
@@ -44,7 +44,7 @@
dump.c fib2_ui.c gcd.c \
gcd_1.c gcdext.c get_d.c get_str.c \
hamdist.c hgcd2.c hgcd.c invert_limb.c \
- ior_n.c iorn_n.c jacbase.c lshift.c \
+ ior_n.c iorn_n.c jacbase.c jacobi_lehmer.c lshift.c \
matrix22_mul.c matrix22_mul1_inverse_vector.c mod_1.c mod_34lsub1.c mode1o.c \
mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c \
mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c \
diff -r 08ef31c0390d -r efcac1ec4a81 mpn/generic/jacbase.c
--- a/mpn/generic/jacbase.c Wed Apr 14 21:49:00 2010 +0200
+++ b/mpn/generic/jacbase.c Mon Apr 19 21:16:09 2010 +0200
@@ -110,7 +110,7 @@
#if JACOBI_BASE_METHOD < 4
/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
- with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
+ with a restricted range of inputs accepted, namely b>1, b odd.
The initial result_bit1 is taken as a parameter for the convenience of
mpz_kronecker_ui() et al. The sign changes both here and in those
@@ -122,17 +122,13 @@
Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
possible, but a couple of tests suggest it's not a significant speedup,
- and may even be a slowdown, so what's here is good enough for now.
-
- Future: The code doesn't demand a<=b actually, so maybe this could be
- relaxed. All the places this is used currently call with a<=b though. */
+ and may even be a slowdown, so what's here is good enough for now. */
int
mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
{
ASSERT (b & 1); /* b odd */
ASSERT (b != 1);
- ASSERT (a <= b);
if (a == 0)
return 0;
@@ -141,11 +137,15 @@
if (a == 1)
goto done;
+ if (a >= b)
+ goto a_gt_b;
+
for (;;)
{
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
MP_LIMB_T_SWAP (a, b);
+ a_gt_b:
do
{
/* working on (a/b), a,b odd, a>=b */
diff -r 08ef31c0390d -r efcac1ec4a81 mpn/generic/jacobi_lehmer.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/jacobi_lehmer.c Mon Apr 19 21:16:09 2010 +0200
@@ -0,0 +1,927 @@
+/* jacobi_lehmer.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* FIXME: Bad name for this symbol. */
+#ifndef JACOBI_2_METHOD
+#define JACOBI_2_METHOD 2
+#endif
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
+ possibly be renamed. */
+static inline mp_limb_t
+div1 (mp_ptr rp,
+ mp_limb_t n0,
+ mp_limb_t d0)
+{
+ mp_limb_t q = 0;
+
+ if ((mp_limb_signed_t) n0 < 0)
+ {
+ int cnt;
+ for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
+ {
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ q <<= 1;
+ if (n0 >= d0)
+ {
+ n0 = n0 - d0;
+ q |= 1;
+ }
+ d0 = d0 >> 1;
+ cnt--;
+ }
+ }
+ else
+ {
+ int cnt;
+ for (cnt = 0; n0 >= d0; cnt++)
+ {
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ d0 = d0 >> 1;
+ q <<= 1;
+ if (n0 >= d0)
+ {
+ n0 = n0 - d0;
+ q |= 1;
+ }
+ cnt--;
+ }
+ }
+ *rp = n0;
+ return q;
+}
+
+/* Two-limb division optimized for small quotients. */
+static inline mp_limb_t
+div2 (mp_ptr rp,
+ mp_limb_t nh, mp_limb_t nl,
+ mp_limb_t dh, mp_limb_t dl)
+{
+ mp_limb_t q = 0;
+
+ if ((mp_limb_signed_t) nh < 0)
+ {
+ int cnt;
+ for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
+ {
+ dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+ dl = dl << 1;
+ }
+
+ while (cnt)
+ {
+ q <<= 1;
+ if (nh > dh || (nh == dh && nl >= dl))
+ {
+ sub_ddmmss (nh, nl, nh, nl, dh, dl);
+ q |= 1;
+ }
+ dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+ dh = dh >> 1;
+ cnt--;
+ }
+ }
+ else
+ {
+ int cnt;
+ for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
+ {
+ dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+ dl = dl << 1;
+ }
+
+ while (cnt)
+ {
More information about the gmp-commit
mailing list