[Gmp-commit] /var/hg/gmp: fac_ui: micro-optimisations (the real thing).

mercurial at gmplib.org mercurial at gmplib.org
Thu Jan 26 22:43:00 CET 2012


details:   /var/hg/gmp/rev/3c33c8fe53c8
changeset: 14585:3c33c8fe53c8
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Jan 26 22:41:13 2012 +0100
description:
fac_ui: micro-optimisations (the real thing).

diffstat:

 mpz/fac_ui.c |  89 +++++++++++++++++++++++++++++++++++------------------------
 1 files changed, 53 insertions(+), 36 deletions(-)

diffs (212 lines):

diff -r 60e765a525e8 -r 3c33c8fe53c8 mpz/fac_ui.c
--- a/mpz/fac_ui.c	Thu Jan 26 22:21:46 2012 +0100
+++ b/mpz/fac_ui.c	Thu Jan 26 22:41:13 2012 +0100
@@ -1,4 +1,4 @@
-/* mpz_fac_ui(result, n) -- Set RESULT to N!.
+/* mpz_fac_ui(RESULT, N) -- Set RESULT to N!.
 
 Contributed to the GNU project by Marco Bodrato.
 
@@ -58,6 +58,15 @@
       (PR) *= (P);						\
   } while (0)
 
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)			\
+    __max_i = (end);						\
+								\
+    do {							\
+      ++__i;							\
+      if (((sieve)[__index] & __mask) == 0)			\
+	{							\
+	  (prime) = id_to_n(__i)
+
 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
   do {								\
     mp_limb_t __mask, __index, __max_i, __i;			\
@@ -67,19 +76,16 @@
     __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
     __i += (off);						\
 								\
-    __max_i = (end);						\
-								\
-    do {							\
-      ++__i;							\
-      if (((sieve)[__index] & __mask) == 0)			\
-	{							\
-	  (prime) = id_to_n(__i)
+    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
 
-#define LOOP_ON_SIEVE_END					\
+#define LOOP_ON_SIEVE_STOP					\
 	}							\
       __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
       __index += __mask & 1;					\
-    }  while (__i <= __max_i);					\
+    }  while (__i <= __max_i)					\
+
+#define LOOP_ON_SIEVE_END					\
+    LOOP_ON_SIEVE_STOP;						\
   } while (0)
 
 /*********************************************************/
@@ -89,10 +95,11 @@
 static mp_limb_t
 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
 
-/* id_to_n (x) = bit_to_n (x-1) = id*3+1+(id&1)*/
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
 static mp_limb_t
-id_to_n  (mp_limb_t id)  { return (id*3+1)|1; }
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
 
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
 static mp_limb_t
 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
 
@@ -123,7 +130,7 @@
 #endif /* 61 */
 
 static void
-first_block_primesieve (mp_limb_t *bit_array, mp_limb_t n)
+first_block_primesieve (mp_ptr bit_array, mp_limb_t n)
 {
   mp_size_t bits, limbs;
 
@@ -152,12 +159,12 @@
 	{
 	  mp_size_t step, lindex;
 	  mp_limb_t lmask;
-	  int maskrot;
+	  unsigned  maskrot;
 
 	  step = id_to_n(i);
 /*	  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
-	  lindex = i*(step+1+(i&1))-1+(i&1);
-/*	  lindex = i*(i*3+2+(i&1)*2)-1+(i&1); */
+	  lindex = i*(step+1)-1+(-(i&1)&(i+1));
+/*	  lindex = i*(step+1+(i&1))-1+(i&1); */
 	  if (lindex > bits)
 	    break;
 
@@ -204,11 +211,11 @@
   {
     mp_size_t lindex;
     mp_limb_t lmask;
-    int maskrot;
+    unsigned  maskrot;
 
 /*  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
-    lindex = __i*(step+1+(__i&1))-1+(__i&1);
-/*  lindex = i*(i*3+2+(i&1)*2)-1+(i&1); */
+    lindex = __i*(step+1)-1+(-(__i&1)&(__i+1));
+/*  lindex = __i*(step+1+(__i&1))-1+(__i&1); */
     if (lindex > bits + offset)
       break;
 
@@ -342,17 +349,19 @@
  */
 
 static void
-mpz_oddswing_1 (mpz_ptr x, mp_limb_t n, mp_limb_t *sieve, mp_limb_t *factors)
+mpz_oddswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
 {
   mp_limb_t prod, max_prod;
-  mp_limb_t j;
+  mp_size_t j;
 
-  ASSERT (n > 24);
+  ASSERT (n >= 15);
 
   j = 0;
+  prod  = -(n & 1);
+  n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
 
-  prod = 1;
-  max_prod = GMP_NUMB_MAX / n;
+  prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
+  max_prod = GMP_NUMB_MAX / (n-1);
 
   /* Handle prime = 3 separately. */
   SWING_A_PRIME (3, n, prod, max_prod, factors, j);
@@ -365,13 +374,14 @@
       mp_limb_t prime;
 
       s = limb_apprsqrt(n);
+      ASSERT (s >= 5);
       s = n_to_bit (s);
-      LOOP_ON_SIEVE_BEGIN (prime, 0, s, 0,sieve);
+      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
       SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
       LOOP_ON_SIEVE_END;
       s++;
     } else
-      s = 0;
+      s = n_to_bit (5);
 
     ASSERT (max_prod <= GMP_NUMB_MAX / 3);
     max_prod *= 3;
@@ -456,15 +466,15 @@
     }
   else
     {
-      unsigned   s;
-      mp_limb_t *factors;
+      unsigned s;
+      mp_ptr   factors;
 
       ASSERT (n <= GMP_NUMB_MAX);
 
       s = 0;
       {
 	mp_limb_t tn;
-	unsigned  j;
+	mp_size_t j;
 	TMP_SDECL;
 
 #if TUNE_PROGRAM_BUILD
@@ -519,23 +529,28 @@
 	*/
 	{
 	  mpz_t swing;
-	  mp_limb_t *sieve;
+	  mp_ptr sieve;
 	  mp_size_t size;
 	  TMP_DECL;
 
 	  TMP_MARK;
-	  sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
 
-	  size = (bitwise_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
+	  ASSERT (primesieve_size (n - 1) <= (n / GMP_NUMB_BITS + 4) - (n / GMP_NUMB_BITS / 2 + 3));
+	  /* swing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS); one
+	     more can be overwritten by mul, another for the sieve */
+	  MPZ_TMP_INIT (swing, n / GMP_NUMB_BITS + 4);
+	  /* Put the sieve on the second half, it will be overwritten by the last swing. */
+	  sieve = PTR(swing) + n / GMP_NUMB_BITS / 2 + 3;
 
-	  MPZ_TMP_INIT (swing, size + n / GMP_NUMB_BITS + 4);
-	  factors = PTR(swing) + n / GMP_NUMB_BITS + 4;
+	  size = (bitwise_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
+
+	  factors = TMP_ALLOC_LIMBS (size);
 	  do {
 	    mpz_t square;
 	    TMP_DECL;
 
 	    s--;
-	    mpz_oddswing_1 (swing, n>>s, sieve, factors);
+	    mpz_oddswing_1 (swing, n >> s, sieve, factors);
 
 	    TMP_MARK;
 	    size = SIZ (x);
@@ -572,7 +587,9 @@
     }
   else if (BELOW_THRESHOLD (n, FAC_ODD_THRESHOLD))
     {
-      mp_limb_t *factors, prod, max_prod, j;
+      mp_limb_t prod, max_prod;
+      mp_size_t j;
+      mp_ptr    factors;
       TMP_SDECL;
 
       TMP_SMARK;


More information about the gmp-commit mailing list