[Gmp-commit] /var/hg/gmp: mpz_fac_ui: Less memory, less divisions.

mercurial at gmplib.org mercurial at gmplib.org
Mon Jan 16 20:36:32 CET 2012


details:   /var/hg/gmp/rev/e654dd4a9626
changeset: 14569:e654dd4a9626
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Jan 16 20:36:28 2012 +0100
description:
mpz_fac_ui: Less memory, less divisions.

diffstat:

 ChangeLog                  |   8 +++
 mpn/minithres/gmp-mparam.h |   6 +-
 mpz/fac_ui.c               |  95 ++++++++++++++++++++++++++++++++++-----------
 tune/tuneup.c              |   2 +-
 4 files changed, 84 insertions(+), 27 deletions(-)

diffs (235 lines):

diff -r ddc781726d67 -r e654dd4a9626 ChangeLog
--- a/ChangeLog	Sun Jan 15 23:34:29 2012 +0100
+++ b/ChangeLog	Mon Jan 16 20:36:28 2012 +0100
@@ -1,3 +1,11 @@
+2012-01-16 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpz/fac_ui.h (SIEVE_SEED): Define value for small limb size.
+	(mpz_oddswing_1): Reduce the number of divisions.
+	(mpz_oddfac_1): Reduce memory usage.
+	* mpn/minithres/gmp-mparam.h: Correct minimum for FAC_DSC_.
+	* tune/tuneup.c (tune_fac_ui): Likewise.
+
 2012-01-15  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpz/scan0.c (mpz_scan0): Use ~(mp_bitcnt_t) 0, rather than
diff -r ddc781726d67 -r e654dd4a9626 mpn/minithres/gmp-mparam.h
--- a/mpn/minithres/gmp-mparam.h	Sun Jan 15 23:34:29 2012 +0100
+++ b/mpn/minithres/gmp-mparam.h	Mon Jan 16 20:36:28 2012 +0100
@@ -1,7 +1,7 @@
 /* Minimal values gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010, 2012 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -95,4 +95,4 @@
 #define SET_STR_PRECOMPUTE_THRESHOLD       100
 
 #define FAC_ODD_THRESHOLD                    0  /* always */
-#define FAC_DSC_THRESHOLD                   16
+#define FAC_DSC_THRESHOLD                   25
diff -r ddc781726d67 -r e654dd4a9626 mpz/fac_ui.c
--- a/mpz/fac_ui.c	Sun Jan 15 23:34:29 2012 +0100
+++ b/mpz/fac_ui.c	Mon Jan 16 20:36:28 2012 +0100
@@ -22,11 +22,12 @@
 
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
 
 #include "fac_ui.h"
 
 /* TODO:
-   - write the code for tuning thresholds;
+   - write the code for tuning more thresholds;
    - split this file in smaller parts with functions that can be recycled for different computations.
  */
 
@@ -98,23 +99,28 @@
 static mp_size_t
 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
 
-/* FIXME: also for 8... */
+#if GMP_LIMB_BITS > 61
+#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
+#define SEED_LIMIT 202
+#else
 #if GMP_LIMB_BITS > 30
-#if GMP_LIMB_BITS < 62
 #define SIEVE_SEED CNST_LIMB(0x69128480)
 #define SEED_LIMIT 114
 #else
-#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
-#define SEED_LIMIT 202
-#endif
-#else
 #if GMP_LIMB_BITS > 15
 #define SIEVE_SEED CNST_LIMB(0x8480)
 #define SEED_LIMIT 54
 #else
-#error Not implemented
-#endif
-#endif
+#if GMP_LIMB_BITS > 7
+#define SIEVE_SEED CNST_LIMB(0x80)
+#define SEED_LIMIT 34
+#else
+#define SIEVE_SEED CNST_LIMB(0x0)
+#define SEED_LIMIT 24
+#endif /* 7 */
+#endif /* 15 */
+#endif /* 30 */
+#endif /* 61 */
 
 static void
 first_block_primesieve (mp_limb_t *bit_array, mp_limb_t n)
@@ -358,18 +364,39 @@
 /* Section swing: swing factorial                        */
 /*********************************************************/
 
-#define SWING_A_PRIME(P, PR, MAX_PR, VEC, I)	\
+/* Returns an approximation of the sqare root of x.  *
+ * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+  int s;
+
+  ASSERT (x > 2);
+  count_leading_zeros (s, x - 1);
+  s = GMP_LIMB_BITS - 1 - s;
+  return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
+}
+
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
   do {						\
     mp_limb_t __q, __prime;			\
     __prime = (P);				\
     FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);	\
-    __q = n;					\
+    __q = (N);					\
     do {					\
       __q /= __prime;				\
       if ((__q & 1) != 0) (PR) *= __prime;	\
     } while (__q >= __prime);			\
   } while (0)
 
+#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
+  do {							\
+    mp_limb_t __prime;					\
+    __prime = (P);					\
+    if ((((N) / __prime) & 1) != 0)			\
+      FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);	\
+  } while (0)
+
 /* mpz_oddswing_1 computes the odd part of the swing factorial of the parameter n.
    The result x is an odd positive integer so that n$ = x 2^a.
 
@@ -387,7 +414,7 @@
   mp_limb_t prod, max_prod;
   mp_limb_t j;
 
-  ASSERT (n > 15);
+  ASSERT (n > 24);
 
   j = 0;
 
@@ -395,16 +422,37 @@
   max_prod = GMP_NUMB_MAX / n;
 
   /* Handle prime = 3 separately. */
-  SWING_A_PRIME (3, prod, max_prod, factors, j);
+  SWING_A_PRIME (3, n, prod, max_prod, factors, j);
 
   /* Swing primes from 5 to n/3 */
-  if (1 || n > 5*3)
+  if (1 || n > 5*3) {
+    mp_limb_t s;
+
+    if (1 || n >= 5*5) {
+      mp_limb_t prime;
+
+      s = limb_apprsqrt(n);
+      s = n_to_bit (s);
+      LOOP_ON_SIEVE_BEGIN (prime, 0, s, 0,sieve);
+      SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
+      LOOP_ON_SIEVE_END;
+      s++;
+    } else
+      s = 0;
+
+    ASSERT (max_prod <= GMP_NUMB_MAX / 3);
+    max_prod *= 3;
+    ASSERT (bit_to_n (s) * bit_to_n (s) > n);
+    ASSERT (s <= n_to_bit (n / 3));
     {
       mp_limb_t prime;
-      LOOP_ON_SIEVE_BEGIN (prime, 0, n_to_bit (n/3), 0,sieve);
-      SWING_A_PRIME (prime, prod, max_prod, factors, j);
+
+      LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
+      SH_SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
       LOOP_ON_SIEVE_END;
     }
+    max_prod /= 3;
+  }
 
   /* Store primes from (n+1)/2 to n */
   {
@@ -428,6 +476,7 @@
 }
 
 #undef SWING_A_PRIME
+#undef SH_SWING_A_PRIME
 
 /*********************************************************/
 /* Section oddfac: odd factorial, needed also by binomial*/
@@ -458,10 +507,6 @@
   return log;
 }
 
-/*********************************************************/
-/* Section factorial: fast factorial implementations     */
-/*********************************************************/
-
 /* mpz_oddfac_1 computes the odd part of the factorial of the
    parameter n.  I.e. n! = x 2^a, where x is the returned value: an
    odd positive integer.
@@ -539,8 +584,8 @@
 
 	  size = (bitwise_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
 
-	  MPZ_TMP_INIT (swing, size << 1);
-	  factors = PTR(swing) + size;
+	  MPZ_TMP_INIT (swing, size + n / GMP_NUMB_BITS + 4);
+	  factors = PTR(swing) + n / GMP_NUMB_BITS + 4;
 	  do {
 	    mpz_t square;
 	    TMP_DECL;
@@ -562,6 +607,10 @@
     }
 }
 
+/*********************************************************/
+/* Section factorial: fast factorial implementations     */
+/*********************************************************/
+
 /* Computes n!, the factorial of n.
    WARNING: it assumes that n fits in a limb!
  */
diff -r ddc781726d67 -r e654dd4a9626 tune/tuneup.c
--- a/tune/tuneup.c	Sun Jan 15 23:34:29 2012 +0100
+++ b/tune/tuneup.c	Mon Jan 16 20:36:28 2012 +0100
@@ -2629,7 +2629,7 @@
   param.function = speed_mpz_fac_ui_tune;
 
   param.name = "FAC_DSC_THRESHOLD";
-  param.min_size = 16;
+  param.min_size = 25;
   param.max_size = FAC_DSC_THRESHOLD_LIMIT;
   one (&fac_dsc_threshold, &param);
 


More information about the gmp-commit mailing list