[Gmp-commit] /var/hg/gmp: 10 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Sep 1 07:20:27 UTC 2014


details:   /var/hg/gmp/rev/f3e3f042a09f
changeset: 16484:f3e3f042a09f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 07:40:28 2014 +0200
description:
gmp-impl.h (TMP_ALLOC_LIMBS_3): New macro to allocate 3 blocks.

details:   /var/hg/gmp/rev/4ec3d2853b19
changeset: 16485:4ec3d2853b19
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 07:52:30 2014 +0200
description:
mpn/generic/remove.c: Use mp_srcptr for source operands.

details:   /var/hg/gmp/rev/75c3b1124f0f
changeset: 16486:75c3b1124f0f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 07:53:39 2014 +0200
description:
mpn/generic/remove.c: Use TMP_ALLOC_LIMBS_3.

details:   /var/hg/gmp/rev/a8261b6c0fc8
changeset: 16487:a8261b6c0fc8
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 08:01:20 2014 +0200
description:
mpn/generic/perfpow.c (pow_equals): TMP_DECL only where needed.

details:   /var/hg/gmp/rev/4acd301bdfe8
changeset: 16488:4acd301bdfe8
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 08:02:25 2014 +0200
description:
mpn/generic/perfpow.c (perfpow): Use TMP_ALLOC_LIMBS_3.

details:   /var/hg/gmp/rev/7afe5fe70c9f
changeset: 16489:7afe5fe70c9f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 08:59:47 2014 +0200
description:
mpn/generic/perfpow.c (mpn_perfect_power_p): Skip useless allocations. Use mpn_remove.

details:   /var/hg/gmp/rev/e4f5770eebd9
changeset: 16490:e4f5770eebd9
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 09:11:17 2014 +0200
description:
mpn/generic/divis.c: Share a count.

details:   /var/hg/gmp/rev/3ae2ce501e56
changeset: 16491:3ae2ce501e56
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 09:12:25 2014 +0200
description:
mpn/generic/divis.c: Use TMP_ALLOC_LIMBS_2

details:   /var/hg/gmp/rev/2412c47a54ba
changeset: 16492:2412c47a54ba
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 09:18:39 2014 +0200
description:
tests/mpz/t-perfpow.c (check_random): Check more perfect powers.

details:   /var/hg/gmp/rev/08c7152bfab8
changeset: 16493:08c7152bfab8
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Sep 01 09:20:16 2014 +0200
description:
ChangeLog

diffstat:

 ChangeLog             |   13 ++++
 gmp-impl.h            |   20 +++++-
 mpn/generic/divis.c   |   10 +-
 mpn/generic/perfpow.c |  159 +++++++++++++------------------------------------
 mpn/generic/remove.c  |   21 ++++--
 tests/mpz/t-perfpow.c |   11 ++-
 6 files changed, 100 insertions(+), 134 deletions(-)

diffs (truncated from 466 to 300 lines):

diff -r 6931b74acfb8 -r 08c7152bfab8 ChangeLog
--- a/ChangeLog	Sun Aug 31 21:31:22 2014 +0200
+++ b/ChangeLog	Mon Sep 01 09:20:16 2014 +0200
@@ -1,3 +1,16 @@
+2014-09-01 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* gmp-impl.h (TMP_ALLOC_LIMBS_3): New macro to allocate 3 blocks.
+	(mpn_remove): Update declaration with mp_srcptr arguments.
+	* mpn/generic/remove.c: Use TMP_ALLOC_LIMBS_3. mp_srcptr for args.
+
+	* mpn/generic/perfpow.c (pow_equals): TMP_DECL only where needed.
+	(perfpow): Use TMP_ALLOC_LIMBS_3.
+	(mpn_perfect_power_p): Skip useless allocations. Use mpn_remove.
+	* tests/mpz/t-perfpow.c (check_random): Check more perfect powers.
+
+	* mpn/generic/divis.c: Use TMP_ALLOC_LIMBS_2. Share a count.
+
 2014-08-27  Niels Möller  <nisse at lysator.liu.se>
 
 	* mini-gmp/mini-gmp.c (mpz_abs_sub_bit): Do full normalization,
diff -r 6931b74acfb8 -r 08c7152bfab8 gmp-impl.h
--- a/gmp-impl.h	Sun Aug 31 21:31:22 2014 +0200
+++ b/gmp-impl.h	Mon Sep 01 09:20:16 2014 +0200
@@ -490,7 +490,7 @@
 #define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
 #define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
 
-/* It's more efficient to allocate one block than two.  This is certainly
+/* It's more efficient to allocate one block than many.  This is certainly
    true of the malloc methods, but it can even be true of alloca if that
    involves copying a chunk of stack (various RISCs), or a call to a stack
    bounds check (mingw).  In any case, when debugging keep separate blocks
@@ -508,7 +508,21 @@
 	(yp) = (xp) + (xsize);						\
       }									\
   } while (0)
-
+#define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize)			\
+  do {									\
+    if (WANT_TMP_DEBUG)							\
+      {									\
+	(xp) = TMP_ALLOC_LIMBS (xsize);					\
+	(yp) = TMP_ALLOC_LIMBS (ysize);					\
+	(zp) = TMP_ALLOC_LIMBS (zsize);					\
+      }									\
+    else								\
+      {									\
+	(xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize));		\
+	(yp) = (xp) + (xsize);						\
+	(zp) = (yp) + (ysize);						\
+      }									\
+  } while (0)
 
 /* From gmp.h, nicer names for internal use. */
 #define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
@@ -2546,7 +2560,7 @@
 __GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
 
 #define mpn_remove __MPN(remove)
-__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t);
+__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t);
 
 
 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
diff -r 6931b74acfb8 -r 08c7152bfab8 mpn/generic/divis.c
--- a/mpn/generic/divis.c	Sun Aug 31 21:31:22 2014 +0200
+++ b/mpn/generic/divis.c	Mon Sep 01 09:20:16 2014 +0200
@@ -4,7 +4,7 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005, 2009, 2014 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -113,12 +113,12 @@
       return mpn_modexact_1_odd (ap, an, dlow) == 0;
     }
 
+  count_trailing_zeros (twos, dlow);
   if (dn == 2)
     {
       mp_limb_t  dsecond = dp[1];
       if (dsecond <= dmask)
 	{
-	  count_trailing_zeros (twos, dlow);
 	  dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
 	  ASSERT_LIMB (dlow);
 	  return MPN_MOD_OR_MODEXACT_1_ODD (ap, an, dlow) == 0;
@@ -135,10 +135,8 @@
 
   TMP_MARK;
 
-  rp = TMP_ALLOC_LIMBS (an + 1);
-  qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this? */
-
-  count_trailing_zeros (twos, dp[0]);
+  TMP_ALLOC_LIMBS_2 (rp, an + 1,
+		     qp, an - dn + 1); /* FIXME: Could we avoid this? */
 
   if (twos != 0)
     {
diff -r 6931b74acfb8 -r 08c7152bfab8 mpn/generic/perfpow.c
--- a/mpn/generic/perfpow.c	Sun Aug 31 21:31:22 2014 +0200
+++ b/mpn/generic/perfpow.c	Mon Sep 01 09:20:16 2014 +0200
@@ -54,12 +54,9 @@
 	    mp_limb_t k, mp_bitcnt_t f,
 	    mp_ptr tp)
 {
-  mp_limb_t *tp2;
   mp_bitcnt_t y, z;
-  mp_size_t i, bn;
-  int ans;
+  mp_size_t bn;
   mp_limb_t h, l;
-  TMP_DECL;
 
   ASSERT (n > 1 || (n == 1 && np[0] > 1));
   ASSERT (np[n - 1] > 0);
@@ -76,8 +73,6 @@
 	return 0;
     }
 
-  TMP_MARK;
-
   /* Final check. Estimate the size of {xp,xn}^k before computing the power
      with full precision.  Optimization: It might pay off to make a more
      accurate estimation of the logarithm of {xp,xn}, rather than using the
@@ -92,10 +87,16 @@
   z = f - 1; /* msb_index (np, n) */
   if (h == 0 && l <= z)
     {
+      mp_limb_t *tp2;
+      mp_size_t i;
+      int ans;
       mp_limb_t size;
+      TMP_DECL;
+
       size = l + k;
       ASSERT_ALWAYS (size >= k);
 
+      TMP_MARK;
       y = 2 + size / GMP_LIMB_BITS;
       tp2 = TMP_ALLOC_LIMBS (y);
 
@@ -104,14 +105,11 @@
 	ans = 1;
       else
 	ans = 0;
-    }
-  else
-    {
-      ans = 0;
+      TMP_FREE;
+      return ans;
     }
 
-  TMP_FREE;
-  return ans;
+  return 0;
 }
 
 
@@ -186,10 +184,14 @@
   gmp_init_primesieve (&ps);
   b = (f + 3) >> 1;
 
+#ifdef TMP_ALLOC_LIMBS_3
+  TMP_ALLOC_LIMBS_3 (ip, n, rp, n, tp, 5 * n);
+#else
   tmp = TMP_ALLOC_LIMBS (7 * n);
   ip = tmp; tmp += n;
   rp = tmp; tmp += n;
   tp = tmp; tmp += 5 * n;
+#endif
 
   MPN_ZERO (rp, n);
 
@@ -248,142 +250,73 @@
 int
 mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
 {
-  mp_size_t ncn, s, pn, xn;
   mp_limb_t *nc, factor, g;
-  mp_limb_t exp, *prev, *next, d, l, r, c, *tp, cry;
+  mp_limb_t exp, d;
   mp_bitcnt_t twos, count;
   int ans, where, neg, trial;
-  mp_ptr tmp;
   TMP_DECL;
 
-  nc = (mp_ptr) np;
-
-  neg = 0;
-  if (n < 0)
+  neg = n < 0;
+  if (neg)
     {
-      neg = 1;
       n = -n;
     }
 
-  if (n == 0 || (n == 1 && np[0] == 1))
+  if (n == 0 || (n == 1 && np[0] == 1)) /* Valgrind doesn't like
+					   (n <= (np[0] == 1)) */
     return 1;
 
   TMP_MARK;
 
-  g = 0;
+  count = 0;
 
-  ncn = n;
   twos = mpn_scan1 (np, 0);
-  if (twos > 0)
+  if (twos != 0)
     {
+      mp_size_t s;
       if (twos == 1)
 	{
-	  ans = 0;
-	  goto ret;
+	  return 0;
 	}
       s = twos / GMP_LIMB_BITS;
       if (s + 1 == n && POW2_P (np[s]))
 	{
-	  ans = ! (neg && POW2_P (twos));
-	  goto ret;
+	  return ! (neg && POW2_P (twos));
 	}
       count = twos % GMP_LIMB_BITS;
-      ncn = n - s;
-      nc = TMP_ALLOC_LIMBS (ncn);
+      n -= s;
+      np += s;
       if (count > 0)
 	{
-	  mpn_rshift (nc, np + s, ncn, count);
-	  ncn -= (nc[ncn - 1] == 0);
+	  nc = TMP_ALLOC_LIMBS (n);
+	  mpn_rshift (nc, np, n, count);
+	  n -= (nc[n - 1] == 0);
+	  np = nc;
 	}
-      else
-	{
-	  MPN_COPY (nc, np + s, ncn);
-	}
-      g = twos;
     }
+  g = twos;
 
-  if (ncn <= SMALL)
-    trial = 0;
-  else if (ncn <= MEDIUM)
-    trial = 1;
-  else
-    trial = 2;
+  trial = (n > SMALL) + (n > MEDIUM);
 
   where = 0;
-  factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
+  factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
 
   if (factor != 0)
     {
-      if (twos == 0)
+      if (count == 0) /* We did not allocate nc yet. */
 	{
-	  nc = TMP_ALLOC_LIMBS (ncn);
-	  MPN_COPY (nc, np, ncn);
+	  nc = TMP_ALLOC_LIMBS (n);
 	}
 
-      /* Remove factors found by trialdiv.  Optimization: Perhaps better to use
-	 the strategy in mpz_remove ().  */
-      tmp = TMP_ALLOC_LIMBS (6 * ncn + 4);
-      prev = tmp; tmp += ncn + 2;
-      next = tmp; tmp += ncn + 2;
-      tp = tmp; tmp += 4 * ncn;
+      /* Remove factors found by trialdiv.  Optimization: If remove
+	 define _itch, we can allocate its scratch just once */
 
       do
 	{
 	  binvert_limb (d, factor);
-	  prev[0] = d;
-	  pn = 1;
-	  exp = 1;
-	  while (2 * pn - 1 <= ncn)
-	    {
-	      mpn_sqr (next, prev, pn);
-	      xn = 2 * pn;
-	      xn -= (next[xn - 1] == 0);
 
-	      if (mpn_divisible_p (nc, ncn, next, xn) == 0)
-		break;
-


More information about the gmp-commit mailing list