[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Jan 5 01:52:46 CET 2012
details: /var/hg/gmp/rev/fff41cd2c041
changeset: 14559:fff41cd2c041
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Jan 05 01:46:28 2012 +0100
description:
mpz_fac_ui: faster (odd)basecase.
details: /var/hg/gmp/rev/9335072bbae2
changeset: 14560:9335072bbae2
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Jan 05 01:52:27 2012 +0100
description:
tune_fac_ui: FAC_ODD_THRESHOLD can be "always".
diffstat:
ChangeLog | 10 ++
gen-fac_ui.c | 21 ++---
mpz/fac_ui.c | 200 ++++++++++++++++++++++++++++++---------------------------
tune/tuneup.c | 3 +-
4 files changed, 125 insertions(+), 109 deletions(-)
diffs (truncated from 359 to 300 lines):
diff -r 38f6ba0ae094 -r 9335072bbae2 ChangeLog
--- a/ChangeLog Mon Jan 02 17:19:55 2012 +0100
+++ b/ChangeLog Thu Jan 05 01:52:27 2012 +0100
@@ -1,3 +1,13 @@
+2012-01-05 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * gen-fac_ui.c: Remove currently unused constants; add new odd
+ double factorial table.
+ * mpz/fac_ui.h (RECURSIVE_PROD_THRESHOLD): Increase default.
+ (mpz_oddfac_1): New function: a merge of _bc_odd and _dsc_odd.
+ (mpz_prodlimbs): More in-place computations.
+
+ * tune/tuneup.c (tune_fac_ui): min_is_always for FAC_ODD_.
+
2012-01-02 Marco Bodrato <bodrato at mail.dm.unipi.it>
* tune/tuneup.c (tune_fac_ui): Compute FAC_DSC before FAC_ODD.
diff -r 38f6ba0ae094 -r 9335072bbae2 gen-fac_ui.c
--- a/gen-fac_ui.c Mon Jan 02 17:19:55 2012 +0100
+++ b/gen-fac_ui.c Thu Jan 05 01:52:27 2012 +0100
@@ -1,6 +1,6 @@
/* Generate mpz_fac_ui data.
-Copyright 2002, 2011 Free Software Foundation, Inc.
+Copyright 2002, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -76,7 +76,7 @@
("/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n");
printf
("#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1");
- mpz_init_set_ui (x, 1);
+ mpz_set_ui (x, 1);
for (b = 3;; b++)
{
for (a = b; (a & 1) == 0; a >>= 1);
@@ -88,18 +88,14 @@
}
printf (")\n");
-
printf
- ("\n/* This table is 0$,1$,2$,3$,...,n$ where n$=binomial(2n,n) */\n");
+ ("\n/* This table is 1!!,3!!,...,(2n+1)!! where (2n+1)!! has <= GMP_NUMB_BITS bits */\n");
printf
- ("/* is the swing factorial, and n$ has <= GMP_NUMB_BITS bits */\n");
- printf
- ("#define ONE_LIMB_EVEN_SWING_TABLE CNST_LIMB(0x1),CNST_LIMB(0x2");
- mpz_init_set_ui (x, 2);
- for (b = 2;; b++)
+ ("#define ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE CNST_LIMB(0x1");
+ mpz_set_ui (x, 1);
+ for (b = 3;; b+=2)
{
- mpz_tdiv_q_ui (x, x, b);
- mpz_mul_ui (x, x, 4*b-2);
+ mpz_mul_ui (x, x, b);
if (mpz_sizeinbase (x, 2) > numb)
break;
printf ("),CNST_LIMB(0x");
@@ -107,7 +103,7 @@
}
printf (")\n");
-
+#if 0
mpz_set_ui (x, 1);
mpz_mul_2exp (x, x, limb + 1); /* x=2^(limb+1) */
mpz_init (y);
@@ -160,6 +156,7 @@
mpz_out_str (stdout, 16, x);
printf (")\n");
}
+#endif
return 0;
}
diff -r 38f6ba0ae094 -r 9335072bbae2 mpz/fac_ui.c
--- a/mpz/fac_ui.c Mon Jan 02 17:19:55 2012 +0100
+++ b/mpz/fac_ui.c Thu Jan 05 01:52:27 2012 +0100
@@ -2,8 +2,8 @@
Contributed to the GNU project by Marco Bodrato.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -293,11 +293,12 @@
/* FIXME: should be tuned */
#ifndef RECURSIVE_PROD_THRESHOLD
-#define RECURSIVE_PROD_THRESHOLD MUL_TOOM22_THRESHOLD
+#define RECURSIVE_PROD_THRESHOLD (MUL_TOOM22_THRESHOLD|(MUL_TOOM22_THRESHOLD>>1))
#endif
/* Computes the product of the j>1 limbs pointed by factors, puts the
result in x.
+ The list in {factors, j} is overwritten.
*/
static void
@@ -313,21 +314,21 @@
if (BELOW_THRESHOLD (j, RECURSIVE_PROD_THRESHOLD)) {
mp_limb_t cy;
- PTR (x)[0] = factors[0];
- SIZ (x) = 1;
- prod = MPZ_REALLOC (x, j);
+ j--;
size = 1;
- i = 1;
- do
+ for (i = 1; i < j; i++)
{
- cy = mpn_mul_1 (prod, prod, size, factors[i]);
- prod[size] = cy;
+ cy = mpn_mul_1 (factors, factors, size, factors[i]);
+ factors[size] = cy;
size += cy != 0;
- i++;
- } while (i < j);
+ };
- SIZ (x) = size;
+ prod = MPZ_REALLOC (x, size + 1);
+
+ cy = mpn_mul_1 (prod, factors, size, factors[i]);
+ prod[size] = cy;
+ SIZ (x) = size + (cy != 0);;
} else {
mpz_t x1, x2;
TMP_DECL;
@@ -361,7 +362,7 @@
do { \
mp_limb_t __q, __prime; \
__prime = (P); \
- FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
+ FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
__q = n; \
do { \
__q /= __prime; \
@@ -457,98 +458,108 @@
return log;
}
-/* mpz_bc_oddfac_1 computes the odd part of the factorial of the parameter n.
- I.e. n! = x 2^a, the result x is an odd positive integer.
- */
-
-static void
-mpz_bc_oddfac_1 (mpz_ptr x, mp_limb_t n)
-{
- static const mp_limb_t table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE };
-
- mp_limb_t *factors, prod, max_prod, i, j;
- TMP_SDECL;
-
- ASSERT (numberof (table) > 3);
-#if TUNE_PROGRAM_BUILD
- ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
-#endif
-
- j = 0;
- prod = 1;
-
- TMP_SMARK;
- factors = TMP_SALLOC_LIMBS (1 + n / FACTORS_PER_LIMB);
-
- do {
- i = 3;
- max_prod = GMP_NUMB_MAX / n;
- do {
- FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
- i += 2;
- } while (i <= n);
- n >>= 1;
- } while (n >= numberof (table));
-
- factors[j++] = prod;
- factors[j++] = table[n];
- mpz_prodlimbs (x, factors, j);
-
- TMP_SFREE;
-}
-
/*********************************************************/
/* Section factorial: fast factorial implementations */
/*********************************************************/
-/* mpz_dsc_oddfac_1 computes the odd part of the factorial of the parameter n.
- I.e. n! = x 2^a, the result x is an odd positive integer.
+/* mpz_oddfac_1 computes the odd part of the factorial of the
+ parameter n. I.e. n! = x 2^a, where x is the returned value: an
+ odd positive integer.
+ */
+static void
+mpz_oddfac_1 (mpz_ptr x, mp_limb_t n)
+{
+ static const mp_limb_t tablef[] = { ONE_LIMB_ODD_FACTORIAL_TABLE };
+ static const mp_limb_t tabled[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
- Uses the algorithm described by Peter Luschny in "Divide, Swing and
- Conquer the Factorial!".
- */
+ if (n < numberof (tablef))
+ {
+ PTR (x)[0] = tablef[n];
+ SIZ (x) = 1;
+ }
+ else
+ {
+ unsigned s;
+ mp_limb_t *factors;
-static void
-mpz_dsc_oddfac_1 (mpz_ptr x, mp_limb_t n)
-{
- mpz_t swing;
- mp_limb_t *sieve, *factors;
- mp_size_t size;
- unsigned s;
- TMP_DECL;
+ ASSERT (n <= GMP_NUMB_MAX);
- ASSERT (n <= GMP_NUMB_MAX);
+ s = 0;
+ {
+ mp_limb_t tn;
+ mp_limb_t prod, max_prod, i, j;
+ TMP_SDECL;
- s = 1;
- size = n;
- for ( ; (size >>= 1) > FAC_DSC_THRESHOLD; s++)
- ;
- mpz_bc_oddfac_1 (x, size);
+ ASSERT (numberof (tablef) > 3);
+#if TUNE_PROGRAM_BUILD
+ ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
+#endif
- TMP_MARK;
- sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
+ /* Compute the number of recursive steps for the DSC algorithm. */
+ for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
+ tn >>= 1;
- size = (bitwise_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
+ j = 0;
+ prod = 1;
- MPZ_TMP_INIT (swing, size << 1);
- factors = PTR(swing) + size;
- do {
- mpz_t square;
- TMP_DECL;
+ TMP_SMARK;
+ factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
- s--;
- mpz_oddswing_1 (swing, n>>s, sieve, factors);
+ for (; (tn - 1) >> 1 >= numberof (tabled); tn >>= 1) {
+ i = numberof (tabled) * 2 + 1;
+ max_prod = GMP_NUMB_MAX / tn;
+ factors[j++] = tabled[numberof (tabled) - 1];
+ do {
+ FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
+ i += 2;
+ } while (i <= tn);
+ }
- TMP_MARK;
- size = SIZ (x);
- MPZ_TMP_INIT (square, size << 1);
- mpn_sqr (PTR (square), PTR (x), size);
- SIZ (square) = (size << 1) - (PTR (square)[(size << 1) - 1] == 0);
- mpz_mul (x, square, swing); /* n!= n$ * floor(n/2)!^2 */
+ factors[j] = prod;
+ j += prod > 1;
+ factors[j++] = tabled[(tn - 1) >> 1];
+ factors[j++] = tablef[tn >> 1];
+ mpz_prodlimbs (x, factors, j);
- TMP_FREE;
- } while (s != 0);
- TMP_FREE;
+ TMP_SFREE;
+ }
+
+ if (s != 0)
+ /* Use the algorithm described by Peter Luschny in "Divide,
+ Swing and Conquer the Factorial!".
+ */
+ {
More information about the gmp-commit
mailing list