[Gmp-commit] /var/hg/gmp: 2 new changesets

Thu Jan 5 01:52:46 CET 2012

details:   /var/hg/gmp/rev/fff41cd2c041
changeset: 14559:fff41cd2c041
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Jan 05 01:46:28 2012 +0100
description:
mpz_fac_ui: faster (odd)basecase.

details:   /var/hg/gmp/rev/9335072bbae2
changeset: 14560:9335072bbae2
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Jan 05 01:52:27 2012 +0100
description:
tune_fac_ui: FAC_ODD_THRESHOLD can be "always".

diffstat:

 ChangeLog     |   10 ++
 gen-fac_ui.c  |   21 ++---
 mpz/fac_ui.c  |  200 ++++++++++++++++++++++++++++++---------------------------
 tune/tuneup.c |    3 +-
 4 files changed, 125 insertions(+), 109 deletions(-)

diffs (truncated from 359 to 300 lines):

diff -r 38f6ba0ae094 -r 9335072bbae2 ChangeLog

--- a/ChangeLog	Mon Jan 02 17:19:55 2012 +0100
+++ b/ChangeLog	Thu Jan 05 01:52:27 2012 +0100
@@ -1,3 +1,13 @@
+2012-01-05 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* gen-fac_ui.c: Remove currently unused constants; add new odd
+	double factorial table.
+	* mpz/fac_ui.h (RECURSIVE_PROD_THRESHOLD): Increase default.
+	(mpz_oddfac_1): New function: a merge of _bc_odd and _dsc_odd.
+	(mpz_prodlimbs): More in-place computations.
+
+	* tune/tuneup.c (tune_fac_ui): min_is_always for FAC_ODD_.
+
 2012-01-02 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* tune/tuneup.c (tune_fac_ui): Compute FAC_DSC before FAC_ODD.
diff -r 38f6ba0ae094 -r 9335072bbae2 gen-fac_ui.c
--- a/gen-fac_ui.c	Mon Jan 02 17:19:55 2012 +0100
+++ b/gen-fac_ui.c	Thu Jan 05 01:52:27 2012 +0100
@@ -1,6 +1,6 @@
 /* Generate mpz_fac_ui data.
 
-Copyright 2002, 2011 Free Software Foundation, Inc.
+Copyright 2002, 2011, 2012 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -76,7 +76,7 @@
     ("/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n");
   printf
     ("#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1");
-  mpz_init_set_ui (x, 1);
+  mpz_set_ui (x, 1);
   for (b = 3;; b++)
     {
       for (a = b; (a & 1) == 0; a >>= 1);
@@ -88,18 +88,14 @@
     }
   printf (")\n");
 
-
   printf
-    ("\n/* This table is 0$,1$,2$,3$,...,n$ where n$=binomial(2n,n) */\n");
+    ("\n/* This table is 1!!,3!!,...,(2n+1)!! where (2n+1)!! has <= GMP_NUMB_BITS bits */\n");
   printf
-    ("/* is the swing factorial, and n$ has <= GMP_NUMB_BITS bits */\n");
-  printf
-    ("#define ONE_LIMB_EVEN_SWING_TABLE CNST_LIMB(0x1),CNST_LIMB(0x2");
-  mpz_init_set_ui (x, 2);
-  for (b = 2;; b++)
+    ("#define ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE CNST_LIMB(0x1");
+  mpz_set_ui (x, 1);
+  for (b = 3;; b+=2)
     {
-      mpz_tdiv_q_ui (x, x, b);
-      mpz_mul_ui (x, x, 4*b-2);
+      mpz_mul_ui (x, x, b);
       if (mpz_sizeinbase (x, 2) > numb)
 	break;
       printf ("),CNST_LIMB(0x");
@@ -107,7 +103,7 @@
     }
   printf (")\n");
 
-
+#if 0
   mpz_set_ui (x, 1);
   mpz_mul_2exp (x, x, limb + 1);	/* x=2^(limb+1)        */
   mpz_init (y);
@@ -160,6 +156,7 @@
       mpz_out_str (stdout, 16, x);
       printf (")\n");
     }
+#endif
 
   return 0;
 }
diff -r 38f6ba0ae094 -r 9335072bbae2 mpz/fac_ui.c
--- a/mpz/fac_ui.c	Mon Jan 02 17:19:55 2012 +0100
+++ b/mpz/fac_ui.c	Thu Jan 05 01:52:27 2012 +0100
@@ -2,8 +2,8 @@
 
 Contributed to the GNU project by Marco Bodrato.
 
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011, 2012
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -293,11 +293,12 @@
 
 /* FIXME: should be tuned */
 #ifndef RECURSIVE_PROD_THRESHOLD
-#define RECURSIVE_PROD_THRESHOLD MUL_TOOM22_THRESHOLD
+#define RECURSIVE_PROD_THRESHOLD (MUL_TOOM22_THRESHOLD|(MUL_TOOM22_THRESHOLD>>1))
 #endif
 
 /* Computes the product of the j>1 limbs pointed by factors, puts the
    result in x.
+   The list in {factors, j} is overwritten.
 */
 
 static void
@@ -313,21 +314,21 @@
   if (BELOW_THRESHOLD (j, RECURSIVE_PROD_THRESHOLD)) {
     mp_limb_t cy;
 
-    PTR (x)[0] = factors[0];
-    SIZ (x) = 1;
-    prod = MPZ_REALLOC (x, j);
+    j--;
     size = 1;
 
-    i = 1;
-    do
+    for (i = 1; i < j; i++)
       {
-	cy = mpn_mul_1 (prod, prod, size, factors[i]);
-	prod[size] = cy;
+	cy = mpn_mul_1 (factors, factors, size, factors[i]);
+	factors[size] = cy;
 	size += cy != 0;
- 	i++;
-      } while (i < j);
+      };
 
-    SIZ (x) = size;
+    prod = MPZ_REALLOC (x, size + 1);
+
+    cy = mpn_mul_1 (prod, factors, size, factors[i]);
+    prod[size] = cy;
+    SIZ (x) = size + (cy != 0);;
   } else {
     mpz_t x1, x2;
     TMP_DECL;
@@ -361,7 +362,7 @@
   do {						\
     mp_limb_t __q, __prime;			\
     __prime = (P);				\
-    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);		\
+    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);	\
     __q = n;					\
     do {					\
       __q /= __prime;				\
@@ -457,98 +458,108 @@
   return log;
 }
 
-/* mpz_bc_oddfac_1 computes the odd part of the factorial of the parameter n.
-   I.e. n! = x 2^a, the result x is an odd positive integer.
- */
-
-static void
-mpz_bc_oddfac_1 (mpz_ptr x, mp_limb_t n)
-{
-  static const mp_limb_t table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE };
-
-  mp_limb_t *factors, prod, max_prod, i, j;
-  TMP_SDECL;
-
-  ASSERT (numberof (table) > 3);
-#if TUNE_PROGRAM_BUILD
-  ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
-#endif
-
-  j = 0;
-  prod = 1;
-
-  TMP_SMARK;
-  factors = TMP_SALLOC_LIMBS (1 + n / FACTORS_PER_LIMB);
-
-  do {
-    i = 3;
-    max_prod = GMP_NUMB_MAX / n;
-    do {
-      FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
-      i += 2;
-    } while (i <= n);
-    n >>= 1;
-  } while (n >= numberof (table));
-
-  factors[j++] = prod;
-  factors[j++] = table[n];
-  mpz_prodlimbs (x, factors, j);
-
-  TMP_SFREE;
-}
-
 /*********************************************************/
 /* Section factorial: fast factorial implementations     */
 /*********************************************************/
 
-/* mpz_dsc_oddfac_1 computes the odd part of the factorial of the parameter n.
-   I.e. n! = x 2^a, the result x is an odd positive integer.
+/* mpz_oddfac_1 computes the odd part of the factorial of the
+   parameter n.  I.e. n! = x 2^a, where x is the returned value: an
+   odd positive integer.
+ */
+static void
+mpz_oddfac_1 (mpz_ptr x, mp_limb_t n)
+{
+  static const mp_limb_t tablef[] = { ONE_LIMB_ODD_FACTORIAL_TABLE };
+  static const mp_limb_t tabled[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
 
-   Uses the algorithm described by Peter Luschny in "Divide, Swing and
-   Conquer the Factorial!".
- */
+  if (n < numberof (tablef))
+    {
+      PTR (x)[0] = tablef[n];
+      SIZ (x) = 1;
+    }
+  else
+    {
+      unsigned s;
+      mp_limb_t *factors;
 
-static void
-mpz_dsc_oddfac_1 (mpz_ptr x, mp_limb_t n)
-{
-  mpz_t swing;
-  mp_limb_t *sieve, *factors;
-  mp_size_t size;
-  unsigned s;
-  TMP_DECL;
+      ASSERT (n <= GMP_NUMB_MAX);
 
-  ASSERT (n <= GMP_NUMB_MAX);
+      s = 0;
+      {
+	mp_limb_t tn;
+	mp_limb_t prod, max_prod, i, j;
+	TMP_SDECL;
 
-  s = 1;
-  size = n;
-  for ( ; (size >>= 1) > FAC_DSC_THRESHOLD; s++)
-    ;
-  mpz_bc_oddfac_1 (x, size);
+	ASSERT (numberof (tablef) > 3);
+#if TUNE_PROGRAM_BUILD
+	ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
+#endif
 
-  TMP_MARK;
-  sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
+	/* Compute the number of recursive steps for the DSC algorithm. */
+	for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
+	  tn >>= 1;
 
-  size = (bitwise_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
+	j = 0;
+	prod = 1;
 
-  MPZ_TMP_INIT (swing, size << 1);
-  factors = PTR(swing) + size;
-  do {
-    mpz_t square;
-    TMP_DECL;
+	TMP_SMARK;
+	factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
 
-    s--;
-    mpz_oddswing_1 (swing, n>>s, sieve, factors);
+	for (; (tn - 1) >> 1 >= numberof (tabled); tn >>= 1) {
+	  i = numberof (tabled) * 2 + 1;
+	  max_prod = GMP_NUMB_MAX / tn;
+	  factors[j++] = tabled[numberof (tabled) - 1];
+	  do {
+	    FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
+	    i += 2;
+	  } while (i <= tn);
+	}
 
-    TMP_MARK;
-    size = SIZ (x);
-    MPZ_TMP_INIT (square, size << 1);
-    mpn_sqr (PTR (square), PTR (x), size);
-    SIZ (square) = (size << 1) - (PTR (square)[(size << 1) - 1] == 0);
-    mpz_mul (x, square, swing); /* n!= n$ * floor(n/2)!^2 */
+	factors[j] = prod;
+	j += prod > 1;
+	factors[j++] = tabled[(tn - 1) >> 1];
+	factors[j++] = tablef[tn >> 1];
+	mpz_prodlimbs (x, factors, j);
 
-    TMP_FREE;
-  } while (s != 0);
-  TMP_FREE;
+	TMP_SFREE;
+      }
+
+      if (s != 0)
+	/* Use the algorithm described by Peter Luschny in "Divide,
+	   Swing and Conquer the Factorial!".
+	*/
+	{