[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 7 18:29:28 CET 2011
details: /var/hg/gmp/rev/b42a961db3cb
changeset: 14531:b42a961db3cb
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Dec 07 18:27:29 2011 +0100
description:
gen-fac_ui: two more tables computed.
details: /var/hg/gmp/rev/018161d99ceb
changeset: 14532:018161d99ceb
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Dec 07 18:29:12 2011 +0100
description:
mpz/fac_ui.c: Rewrite.
diffstat:
ChangeLog | 6 +
gen-fac_ui.c | 53 ++-
mpz/fac_ui.c | 905 ++++++++++++++++++++++++++++++++++++----------------------
3 files changed, 613 insertions(+), 351 deletions(-)
diffs (truncated from 1056 to 300 lines):
diff -r d60545d62c22 -r 018161d99ceb ChangeLog
--- a/ChangeLog Tue Dec 06 15:16:48 2011 +0100
+++ b/ChangeLog Wed Dec 07 18:29:12 2011 +0100
@@ -1,3 +1,9 @@
+2011-12-07 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * gen-fac_ui.c: Generate two more tables: odd factorial, swing.
+
+ * mpz/fac_ui.c: Rewrite.
+
2011-12-06 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/hgcd.c (mpn_hgcd): Use hgcd_reduce for first
diff -r d60545d62c22 -r 018161d99ceb gen-fac_ui.c
--- a/gen-fac_ui.c Tue Dec 06 15:16:48 2011 +0100
+++ b/gen-fac_ui.c Wed Dec 07 18:29:12 2011 +0100
@@ -1,6 +1,6 @@
/* Generate mpz_fac_ui data.
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -45,7 +45,7 @@
gen_consts (int numb, int nail, int limb)
{
mpz_t x, y, z, t;
- unsigned long a, b, first = 1;
+ unsigned long a, b;
printf ("/* This file is automatically generated by gen-fac_ui.c */\n\n");
printf ("#if GMP_NUMB_BITS != %d\n", numb);
@@ -58,22 +58,51 @@
printf
("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
printf
- ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),");
+ ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2");
mpz_init_set_ui (x, 2);
for (b = 3;; b++)
{
mpz_mul_ui (x, x, b); /* so b!=a */
if (mpz_sizeinbase (x, 2) > numb)
break;
- if (first)
- {
- first = 0;
- }
- else
- {
- printf ("),");
- }
- printf ("CNST_LIMB(0x");
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+
+ printf
+ ("\n/* This table is 0!,1!,2!/2,3!/2,...,n!/2^sn where n!/2^sn is an */\n");
+ printf
+ ("/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n");
+ printf
+ ("#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1");
+ mpz_init_set_ui (x, 1);
+ for (b = 3;; b++)
+ {
+ for (a = b; (a & 1) == 0; a >>= 1);
+ mpz_mul_ui (x, x, a);
+ if (mpz_sizeinbase (x, 2) > numb)
+ break;
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+
+
+ printf
+ ("\n/* This table is 0$,1$,2$,3$,...,n$ where n$=binomial(2n,n) */\n");
+ printf
+ ("/* is the swing factorial, and n$ has <= GMP_NUMB_BITS bits */\n");
+ printf
+ ("#define ONE_LIMB_EVEN_SWING_TABLE CNST_LIMB(0x1),CNST_LIMB(0x2");
+ mpz_init_set_ui (x, 2);
+ for (b = 2;; b++)
+ {
+ mpz_tdiv_q_ui (x, x, b);
+ mpz_mul_ui (x, x, 4*b-2);
+ if (mpz_sizeinbase (x, 2) > numb)
+ break;
+ printf ("),CNST_LIMB(0x");
mpz_out_str (stdout, 16, x);
}
printf (")\n");
diff -r d60545d62c22 -r 018161d99ceb mpz/fac_ui.c
--- a/mpz/fac_ui.c Tue Dec 06 15:16:48 2011 +0100
+++ b/mpz/fac_ui.c Wed Dec 07 18:29:12 2011 +0100
@@ -1,7 +1,9 @@
/* mpz_fac_ui(result, n) -- Set RESULT to N!.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -20,383 +22,608 @@
#include "gmp.h"
#include "gmp-impl.h"
-#include "longlong.h"
#include "fac_ui.h"
+/* TODO:
+ - write the code for tuning thresholds;
+ - split this file in smaller parts with functions that can be recycled for different computations.
+ */
-static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
-static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+/* This is intended for constant THRESHOLDs only, where the compiler
+ can completely fold the result. */
+#define LOG2C(n) \
+ (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
+ ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
+ ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
+ ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
-/* must be >=2 */
-#define APCONST 5
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = 1; \
+ }
-/* for single non-zero limb */
-#define MPZ_SET_1_NZ(z,n) \
- do { \
- mpz_ptr __z = (z); \
- ASSERT ((n) != 0); \
- PTR(__z)[0] = (n); \
- SIZ(__z) = 1; \
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
+ do { \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = (P); \
+ } else \
+ (PR) *= (P); \
} while (0)
-/* for src>0 and n>0 */
-#define MPZ_MUL_1_POS(dst,src,n) \
- do { \
- mpz_ptr __dst = (dst); \
- mpz_srcptr __src = (src); \
- mp_size_t __size = SIZ(__src); \
- mp_ptr __dst_p; \
- mp_limb_t __c; \
- \
- ASSERT (__size > 0); \
- ASSERT ((n) != 0); \
- \
- MPZ_REALLOC (__dst, __size+1); \
- __dst_p = PTR(__dst); \
- \
- __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n); \
- __dst_p[__size] = __c; \
- SIZ(__dst) = __size + (__c != 0); \
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
+ do { \
+ mp_limb_t __mask, __index, __max_i, __i; \
+ \
+ __i = (start)-(off); \
+ __index = __i / GMP_LIMB_BITS; \
+ __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
+ __i += (off); \
+ \
+ __max_i = (end); \
+ \
+ do { \
+ ++__i; \
+ if (((sieve)[__index] & __mask) == 0) \
+ { \
+ (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_END \
+ } \
+ __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
+ __index += __mask & 1; \
+ } while (__i <= __max_i); \
} while (0)
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
-#if BITS_PER_ULONG == GMP_LIMB_BITS
-#define BSWAP_ULONG(x,y) BSWAP_LIMB(x,y)
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+
+/* id_to_n (x) = bit_to_n (x-1) = id*3+1+(id&1)*/
+static mp_limb_t
+id_to_n (mp_limb_t id) { return (id*3+1)|1; }
+
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+
+/* FIXME: also for 8... */
+#if GMP_LIMB_BITS > 30
+#if GMP_LIMB_BITS < 62
+#define SIEVE_SEED CNST_LIMB(0x69128480)
+#define SEED_LIMIT 114
+#else
+#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
+#define SEED_LIMIT 202
+#endif
+#else
+#if GMP_LIMB_BITS > 15
+#define SIEVE_SEED CNST_LIMB(0x8480)
+#define SEED_LIMIT 54
+#else
+#error Not implemented
+#endif
#endif
-/* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
- by a shift down to get the high part. But it provoked incorrect code
- from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode. This
- case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
- can get that directly muxing a 4-byte ulong if it matters enough. */
+static void
+first_block_primesieve (mp_limb_t *bit_array, mp_limb_t n)
+{
+ mp_size_t bits, limbs;
-#if ! defined (BSWAP_ULONG)
-#define BSWAP_ULONG(dst, src) \
- do { \
- unsigned long __bswapl_src = (src); \
- unsigned long __bswapl_dst = 0; \
- int __i; \
- for (__i = 0; __i < sizeof(unsigned long); __i++) \
- { \
- __bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF); \
- __bswapl_src >>= 8; \
- } \
- (dst) = __bswapl_dst; \
- } while (0)
+ ASSERT (n > 4);
+
+ bits = n_to_bit(n);
+ limbs = bits / GMP_LIMB_BITS + 1;
+
+ /* FIXME: We can skip 5 too, filling with a 5-part pattern. */
+ MPN_ZERO (bit_array, limbs);
+ bit_array[0] = SIEVE_SEED;
+
+ if ((bits + 1) % GMP_LIMB_BITS != 0)
+ bit_array[limbs-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
+
+ if (n > SEED_LIMIT) {
+ mp_limb_t mask, index, i;
+
+ ASSERT (n > 49);
+
+ mask = 1;
+ index = 0;
+ i = 1;
+ do {
+ if ((bit_array[index] & mask) == 0)
+ {
+ mp_size_t step, lindex;
+ mp_limb_t lmask;
+ int maskrot;
+
+ step = id_to_n(i);
+/* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
+ lindex = i*(step+1+(i&1))-1+(i&1);
+/* lindex = i*(i*3+2+(i&1)*2)-1+(i&1); */
+ if (lindex > bits)
+ break;
+
+ step <<= 1;
+ maskrot = step % GMP_LIMB_BITS;
+
More information about the gmp-commit
mailing list