[Gmp-commit] /var/hg/gmp: mpz_fac_ui: Less memory, less divisions.
mercurial at gmplib.org
mercurial at gmplib.org
Mon Jan 16 20:36:32 CET 2012
details: /var/hg/gmp/rev/e654dd4a9626
changeset: 14569:e654dd4a9626
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Jan 16 20:36:28 2012 +0100
description:
mpz_fac_ui: Less memory, less divisions.
diffstat:
ChangeLog | 8 +++
mpn/minithres/gmp-mparam.h | 6 +-
mpz/fac_ui.c | 95 ++++++++++++++++++++++++++++++++++-----------
tune/tuneup.c | 2 +-
4 files changed, 84 insertions(+), 27 deletions(-)
diffs (235 lines):
diff -r ddc781726d67 -r e654dd4a9626 ChangeLog
--- a/ChangeLog Sun Jan 15 23:34:29 2012 +0100
+++ b/ChangeLog Mon Jan 16 20:36:28 2012 +0100
@@ -1,3 +1,11 @@
+2012-01-16 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpz/fac_ui.h (SIEVE_SEED): Define value for small limb size.
+ (mpz_oddswing_1): Reduce the number of divisions.
+ (mpz_oddfac_1): Reduce memory usage.
+ * mpn/minithres/gmp-mparam.h: Correct minimum for FAC_DSC_.
+ * tune/tuneup.c (tune_fac_ui): Likewise.
+
2012-01-15 Niels Möller <nisse at lysator.liu.se>
* mpz/scan0.c (mpz_scan0): Use ~(mp_bitcnt_t) 0, rather than
diff -r ddc781726d67 -r e654dd4a9626 mpn/minithres/gmp-mparam.h
--- a/mpn/minithres/gmp-mparam.h Sun Jan 15 23:34:29 2012 +0100
+++ b/mpn/minithres/gmp-mparam.h Mon Jan 16 20:36:28 2012 +0100
@@ -1,7 +1,7 @@
/* Minimal values gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -95,4 +95,4 @@
#define SET_STR_PRECOMPUTE_THRESHOLD 100
#define FAC_ODD_THRESHOLD 0 /* always */
-#define FAC_DSC_THRESHOLD 16
+#define FAC_DSC_THRESHOLD 25
diff -r ddc781726d67 -r e654dd4a9626 mpz/fac_ui.c
--- a/mpz/fac_ui.c Sun Jan 15 23:34:29 2012 +0100
+++ b/mpz/fac_ui.c Mon Jan 16 20:36:28 2012 +0100
@@ -22,11 +22,12 @@
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
#include "fac_ui.h"
/* TODO:
- - write the code for tuning thresholds;
+ - write the code for tuning more thresholds;
- split this file in smaller parts with functions that can be recycled for different computations.
*/
@@ -98,23 +99,28 @@
static mp_size_t
primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
-/* FIXME: also for 8... */
+#if GMP_LIMB_BITS > 61
+#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
+#define SEED_LIMIT 202
+#else
#if GMP_LIMB_BITS > 30
-#if GMP_LIMB_BITS < 62
#define SIEVE_SEED CNST_LIMB(0x69128480)
#define SEED_LIMIT 114
#else
-#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
-#define SEED_LIMIT 202
-#endif
-#else
#if GMP_LIMB_BITS > 15
#define SIEVE_SEED CNST_LIMB(0x8480)
#define SEED_LIMIT 54
#else
-#error Not implemented
-#endif
-#endif
+#if GMP_LIMB_BITS > 7
+#define SIEVE_SEED CNST_LIMB(0x80)
+#define SEED_LIMIT 34
+#else
+#define SIEVE_SEED CNST_LIMB(0x0)
+#define SEED_LIMIT 24
+#endif /* 7 */
+#endif /* 15 */
+#endif /* 30 */
+#endif /* 61 */
static void
first_block_primesieve (mp_limb_t *bit_array, mp_limb_t n)
@@ -358,18 +364,39 @@
/* Section swing: swing factorial */
/*********************************************************/
-#define SWING_A_PRIME(P, PR, MAX_PR, VEC, I) \
+/* Returns an approximation of the sqare root of x. *
+ * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+ int s;
+
+ ASSERT (x > 2);
+ count_leading_zeros (s, x - 1);
+ s = GMP_LIMB_BITS - 1 - s;
+ return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
+}
+
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
do { \
mp_limb_t __q, __prime; \
__prime = (P); \
FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
- __q = n; \
+ __q = (N); \
do { \
__q /= __prime; \
if ((__q & 1) != 0) (PR) *= __prime; \
} while (__q >= __prime); \
} while (0)
+#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
+ do { \
+ mp_limb_t __prime; \
+ __prime = (P); \
+ if ((((N) / __prime) & 1) != 0) \
+ FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
+ } while (0)
+
/* mpz_oddswing_1 computes the odd part of the swing factorial of the parameter n.
The result x is an odd positive integer so that n$ = x 2^a.
@@ -387,7 +414,7 @@
mp_limb_t prod, max_prod;
mp_limb_t j;
- ASSERT (n > 15);
+ ASSERT (n > 24);
j = 0;
@@ -395,16 +422,37 @@
max_prod = GMP_NUMB_MAX / n;
/* Handle prime = 3 separately. */
- SWING_A_PRIME (3, prod, max_prod, factors, j);
+ SWING_A_PRIME (3, n, prod, max_prod, factors, j);
/* Swing primes from 5 to n/3 */
- if (1 || n > 5*3)
+ if (1 || n > 5*3) {
+ mp_limb_t s;
+
+ if (1 || n >= 5*5) {
+ mp_limb_t prime;
+
+ s = limb_apprsqrt(n);
+ s = n_to_bit (s);
+ LOOP_ON_SIEVE_BEGIN (prime, 0, s, 0,sieve);
+ SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
+ s++;
+ } else
+ s = 0;
+
+ ASSERT (max_prod <= GMP_NUMB_MAX / 3);
+ max_prod *= 3;
+ ASSERT (bit_to_n (s) * bit_to_n (s) > n);
+ ASSERT (s <= n_to_bit (n / 3));
{
mp_limb_t prime;
- LOOP_ON_SIEVE_BEGIN (prime, 0, n_to_bit (n/3), 0,sieve);
- SWING_A_PRIME (prime, prod, max_prod, factors, j);
+
+ LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
+ SH_SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
LOOP_ON_SIEVE_END;
}
+ max_prod /= 3;
+ }
/* Store primes from (n+1)/2 to n */
{
@@ -428,6 +476,7 @@
}
#undef SWING_A_PRIME
+#undef SH_SWING_A_PRIME
/*********************************************************/
/* Section oddfac: odd factorial, needed also by binomial*/
@@ -458,10 +507,6 @@
return log;
}
-/*********************************************************/
-/* Section factorial: fast factorial implementations */
-/*********************************************************/
-
/* mpz_oddfac_1 computes the odd part of the factorial of the
parameter n. I.e. n! = x 2^a, where x is the returned value: an
odd positive integer.
@@ -539,8 +584,8 @@
size = (bitwise_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
- MPZ_TMP_INIT (swing, size << 1);
- factors = PTR(swing) + size;
+ MPZ_TMP_INIT (swing, size + n / GMP_NUMB_BITS + 4);
+ factors = PTR(swing) + n / GMP_NUMB_BITS + 4;
do {
mpz_t square;
TMP_DECL;
@@ -562,6 +607,10 @@
}
}
+/*********************************************************/
+/* Section factorial: fast factorial implementations */
+/*********************************************************/
+
/* Computes n!, the factorial of n.
WARNING: it assumes that n fits in a limb!
*/
diff -r ddc781726d67 -r e654dd4a9626 tune/tuneup.c
--- a/tune/tuneup.c Sun Jan 15 23:34:29 2012 +0100
+++ b/tune/tuneup.c Mon Jan 16 20:36:28 2012 +0100
@@ -2629,7 +2629,7 @@
param.function = speed_mpz_fac_ui_tune;
param.name = "FAC_DSC_THRESHOLD";
- param.min_size = 16;
+ param.min_size = 25;
param.max_size = FAC_DSC_THRESHOLD_LIMIT;
one (&fac_dsc_threshold, ¶m);
More information about the gmp-commit
mailing list