[Gmp-commit] /var/hg/gmp: 2 new changesets

Wed Dec 7 18:29:28 CET 2011

details:   /var/hg/gmp/rev/b42a961db3cb
changeset: 14531:b42a961db3cb
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Dec 07 18:27:29 2011 +0100
description:
gen-fac_ui: two more tables computed.

details:   /var/hg/gmp/rev/018161d99ceb
changeset: 14532:018161d99ceb
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Dec 07 18:29:12 2011 +0100
description:
mpz/fac_ui.c: Rewrite.

diffstat:

 ChangeLog    |    6 +
 gen-fac_ui.c |   53 ++-
 mpz/fac_ui.c |  905 ++++++++++++++++++++++++++++++++++++----------------------
 3 files changed, 613 insertions(+), 351 deletions(-)

diffs (truncated from 1056 to 300 lines):

diff -r d60545d62c22 -r 018161d99ceb ChangeLog

--- a/ChangeLog	Tue Dec 06 15:16:48 2011 +0100
+++ b/ChangeLog	Wed Dec 07 18:29:12 2011 +0100
@@ -1,3 +1,9 @@
+2011-12-07 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* gen-fac_ui.c: Generate two more tables: odd factorial, swing.
+
+	* mpz/fac_ui.c: Rewrite.
+	
 2011-12-06  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/generic/hgcd.c (mpn_hgcd): Use hgcd_reduce for first
diff -r d60545d62c22 -r 018161d99ceb gen-fac_ui.c
--- a/gen-fac_ui.c	Tue Dec 06 15:16:48 2011 +0100
+++ b/gen-fac_ui.c	Wed Dec 07 18:29:12 2011 +0100
@@ -1,6 +1,6 @@
 /* Generate mpz_fac_ui data.
 
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2011 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -45,7 +45,7 @@
 gen_consts (int numb, int nail, int limb)
 {
   mpz_t x, y, z, t;
-  unsigned long a, b, first = 1;
+  unsigned long a, b;
 
   printf ("/* This file is automatically generated by gen-fac_ui.c */\n\n");
   printf ("#if GMP_NUMB_BITS != %d\n", numb);
@@ -58,22 +58,51 @@
   printf
     ("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
   printf
-    ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),");
+    ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2");
   mpz_init_set_ui (x, 2);
   for (b = 3;; b++)
     {
       mpz_mul_ui (x, x, b);	/* so b!=a       */
       if (mpz_sizeinbase (x, 2) > numb)
 	break;
-      if (first)
-	{
-	  first = 0;
-	}
-      else
-	{
-	  printf ("),");
-	}
-      printf ("CNST_LIMB(0x");
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+  printf
+    ("\n/* This table is 0!,1!,2!/2,3!/2,...,n!/2^sn where n!/2^sn is an */\n");
+  printf
+    ("/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1");
+  mpz_init_set_ui (x, 1);
+  for (b = 3;; b++)
+    {
+      for (a = b; (a & 1) == 0; a >>= 1);
+      mpz_mul_ui (x, x, a);
+      if (mpz_sizeinbase (x, 2) > numb)
+	break;
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+
+  printf
+    ("\n/* This table is 0$,1$,2$,3$,...,n$ where n$=binomial(2n,n) */\n");
+  printf
+    ("/* is the swing factorial, and n$ has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_EVEN_SWING_TABLE CNST_LIMB(0x1),CNST_LIMB(0x2");
+  mpz_init_set_ui (x, 2);
+  for (b = 2;; b++)
+    {
+      mpz_tdiv_q_ui (x, x, b);
+      mpz_mul_ui (x, x, 4*b-2);
+      if (mpz_sizeinbase (x, 2) > numb)
+	break;
+      printf ("),CNST_LIMB(0x");
       mpz_out_str (stdout, 16, x);
     }
   printf (")\n");
diff -r d60545d62c22 -r 018161d99ceb mpz/fac_ui.c
--- a/mpz/fac_ui.c	Tue Dec 06 15:16:48 2011 +0100
+++ b/mpz/fac_ui.c	Wed Dec 07 18:29:12 2011 +0100
@@ -1,7 +1,9 @@
 /* mpz_fac_ui(result, n) -- Set RESULT to N!.
 
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -20,383 +22,608 @@
 
 #include "gmp.h"
 #include "gmp-impl.h"
-#include "longlong.h"
 
 #include "fac_ui.h"
 
+/* TODO:
+   - write the code for tuning thresholds;
+   - split this file in smaller parts with functions that can be recycled for different computations.
+ */
 
-static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
-static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
 
+/* This is intended for constant THRESHOLDs only, where the compiler
+   can completely fold the result.  */
+#define LOG2C(n) \
+ (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
+  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
+  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
+  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
 
-/* must be >=2	*/
-#define APCONST	5
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)	\
+  if ((PR) > (MAX_PR)) {			\
+    (VEC)[(I)++] = (PR);			\
+    (PR) = 1;					\
+  }
 
-/* for single non-zero limb */
-#define MPZ_SET_1_NZ(z,n)	\
-  do {				\
-    mpz_ptr  __z = (z);		\
-    ASSERT ((n) != 0);		\
-    PTR(__z)[0] = (n);		\
-    SIZ(__z) = 1;		\
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)	\
+  do {						\
+    if ((PR) > (MAX_PR)) {			\
+      (VEC)[(I)++] = (PR);			\
+      (PR) = (P);				\
+    } else					\
+      (PR) *= (P);				\
   } while (0)
 
-/* for src>0 and n>0 */
-#define MPZ_MUL_1_POS(dst,src,n)			\
-  do {							\
-    mpz_ptr    __dst = (dst);				\
-    mpz_srcptr __src = (src);				\
-    mp_size_t  __size = SIZ(__src);			\
-    mp_ptr     __dst_p;					\
-    mp_limb_t  __c;					\
-							\
-    ASSERT (__size > 0);				\
-    ASSERT ((n) != 0);					\
-							\
-    MPZ_REALLOC (__dst, __size+1);			\
-    __dst_p = PTR(__dst);				\
-							\
-    __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n);	\
-    __dst_p[__size] = __c;				\
-    SIZ(__dst) = __size + (__c != 0);			\
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
+  do {								\
+    mp_limb_t __mask, __index, __max_i, __i;			\
+								\
+    __i = (start)-(off);					\
+    __index = __i / GMP_LIMB_BITS;				\
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
+    __i += (off);						\
+								\
+    __max_i = (end);						\
+								\
+    do {							\
+      ++__i;							\
+      if (((sieve)[__index] & __mask) == 0)			\
+	{							\
+      (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_END					\
+	}							\
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
+      __index += __mask & 1;					\
+    }  while (__i <= __max_i);				\
   } while (0)
 
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
 
-#if BITS_PER_ULONG == GMP_LIMB_BITS
-#define BSWAP_ULONG(x,y)	BSWAP_LIMB(x,y)
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+
+/* id_to_n (x) = bit_to_n (x-1) = id*3+1+(id&1)*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return (id*3+1)|1; }
+
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+
+/* FIXME: also for 8... */
+#if GMP_LIMB_BITS > 30
+#if GMP_LIMB_BITS < 62
+#define SIEVE_SEED CNST_LIMB(0x69128480)
+#define SEED_LIMIT 114
+#else
+#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
+#define SEED_LIMIT 202
+#endif
+#else
+#if GMP_LIMB_BITS > 15
+#define SIEVE_SEED CNST_LIMB(0x8480)
+#define SEED_LIMIT 54
+#else
+#error Not implemented 
+#endif
 #endif
 
-/* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
-   by a shift down to get the high part.  But it provoked incorrect code
-   from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode.  This
-   case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
-   can get that directly muxing a 4-byte ulong if it matters enough.  */
+static void
+first_block_primesieve (mp_limb_t *bit_array, mp_limb_t n)
+{
+  mp_size_t bits, limbs;
 
-#if ! defined (BSWAP_ULONG)
-#define BSWAP_ULONG(dst, src)						\
-  do {									\
-    unsigned long  __bswapl_src = (src);				\
-    unsigned long  __bswapl_dst = 0;					\
-    int	       __i;							\
-    for (__i = 0; __i < sizeof(unsigned long); __i++)			\
-      {									\
-	__bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF);	\
-	__bswapl_src >>= 8;						\
-      }									\
-    (dst) = __bswapl_dst;						\
-  } while (0)
+  ASSERT (n > 4);
+
+  bits  = n_to_bit(n);
+  limbs = bits / GMP_LIMB_BITS + 1;
+
+  /* FIXME: We can skip 5 too, filling with a 5-part pattern. */
+  MPN_ZERO (bit_array, limbs);
+  bit_array[0] = SIEVE_SEED;
+
+  if ((bits + 1) % GMP_LIMB_BITS != 0)
+    bit_array[limbs-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
+
+  if (n > SEED_LIMIT) {
+    mp_limb_t mask, index, i;
+
+    ASSERT (n > 49);
+
+    mask = 1;
+    index = 0;
+    i = 1;
+    do {
+      if ((bit_array[index] & mask) == 0)
+	{
+	  mp_size_t step, lindex;
+	  mp_limb_t lmask;
+	  int maskrot;
+
+	  step = id_to_n(i);
+/* 	  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
+	  lindex = i*(step+1+(i&1))-1+(i&1);
+/* 	  lindex = i*(i*3+2+(i&1)*2)-1+(i&1); */
+	  if (lindex > bits)
+	    break;
+
+	  step <<= 1;
+	  maskrot = step % GMP_LIMB_BITS;
+