[Gmp-commit] /home/hgfiles/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Dec 27 00:30:14 CET 2009


details:   /home/hgfiles/gmp/rev/4f8840c98c7e
changeset: 13229:4f8840c98c7e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Dec 26 12:42:48 2009 +0100
description:
(TOOM22_MUL_REC): New name for TOOM22_MUL_MN_REC.

details:   /home/hgfiles/gmp/rev/40248ad41cab
changeset: 13230:40248ad41cab
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Dec 27 00:25:58 2009 +0100
description:
Tune BMOD_1_TO_MOD_1_THRESHOLD instead of MODEXACT_1_ODD_THRESHOLD.

details:   /home/hgfiles/gmp/rev/b3a07d48af77
changeset: 13231:b3a07d48af77
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Dec 27 00:30:10 2009 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                     |  29 ++++++++++++++
 gmp-impl.h                    |  11 ++--
 mpn/generic/divis.c           |   2 +-
 mpn/generic/sbpi1_divappr_q.c |   5 +-
 mpn/generic/toom22_mul.c      |   4 +-
 mpn/generic/toom32_mul.c      |  88 ++++++++++++++++++++++--------------------
 mpz/cong.c                    |   2 +-
 mpz/cong_ui.c                 |   2 +-
 mpz/divis_ui.c                |   2 +-
 tests/mpn/t-bdiv.c            |   2 +-
 tests/mpn/t-toom32.c          |   6 +-
 tune/tuneup.c                 |  19 ++------
 12 files changed, 98 insertions(+), 74 deletions(-)

diffs (truncated from 440 to 300 lines):

diff -r 2f7eab694ee9 -r b3a07d48af77 ChangeLog
--- a/ChangeLog	Sat Dec 26 12:39:10 2009 +0100
+++ b/ChangeLog	Sun Dec 27 00:30:10 2009 +0100
@@ -1,3 +1,32 @@
+2009-12-27  Torbjorn Granlund  <tege at gmplib.org>
+
+	* gmp-impl.h (MODEXACT_1_ODD_THRESHOLD): Remove.
+	(BMOD_1_TO_MOD_1_THRESHOLD): New parameter, with the reverse meaning of
+	MODEXACT_1_ODD_THRESHOLD.
+	(MPN_MOD_OR_MODEXACT_1_ODD): Use BMOD_1_TO_MOD_1_THRESHOLD.
+	* mpn/generic/divis.c, mpz/{cong.c,cong_ui.c,divis_ui.c}: Likewise.
+	* tune/tuneup.c (tune_modexact_1_odd): Tune BMOD_1_TO_MOD_1_THRESHOLD;
+	Do not assume native mpn_modexact_1_odd is faster than mpn_mod_1.
+	(tuned_speed_mpn_mod_1): Remove variable.
+
+	* mpn/generic/toom22_mul.c (TOOM22_MUL_REC): New name for
+	TOOM22_MUL_MN_REC.
+
+2009-12-26  Niels Möller  <nisse at lysator.liu.se>
+
+	* tests/mpn/t-toom32.c (MIN_AN, MIN_BN, MAX_BN): Relax
+	requirements a bit.
+
+	* mpn/generic/toom32_mul.c (mpn_toom32_mul): Relax requirement on
+	input sizes, to support s+t>=n (used to be s+t>=n+2). Keep high
+	limbs of the evaluated values in scalar variables.
+
+	* mpn/generic/sbpi1_divappr_q.c (mpn_sbpi1_divappr_q): Remove
+	unused variables.
+
+	* mpn/generic/toom32_mul.c (mpn_toom32_mul): Fixed left-over use
+	of mpn_addsub_n which should be mpn_add_n_sub_n.
+
 2009-12-26  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add new toom files (spotted by Torbjorn).
diff -r 2f7eab694ee9 -r b3a07d48af77 gmp-impl.h
--- a/gmp-impl.h	Sat Dec 26 12:39:10 2009 +0100
+++ b/gmp-impl.h	Sun Dec 27 00:30:10 2009 +0100
@@ -591,7 +591,6 @@
 #undef USE_PREINV_DIVREM_1
 #undef DIVREM_2_THRESHOLD
 #undef DIVEXACT_1_THRESHOLD
-#undef MODEXACT_1_ODD_THRESHOLD
 #define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
 #define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
 #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
@@ -2816,15 +2815,15 @@
 
 
 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
-   plain mpn_divrem_1.  Likewise MODEXACT_1_ODD_THRESHOLD for
+   plain mpn_divrem_1.  Likewise BMOD_1_TO_MOD_1_THRESHOLD for
    mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
    modexact are faster at all sizes, so the defaults are 0.  Those CPUs
    where this is not right have a tuned threshold.  */
 #ifndef DIVEXACT_1_THRESHOLD
 #define DIVEXACT_1_THRESHOLD  0
 #endif
-#ifndef MODEXACT_1_ODD_THRESHOLD
-#define MODEXACT_1_ODD_THRESHOLD  0
+#ifndef BMOD_1_TO_MOD_1_THRESHOLD
+#define BMOD_1_TO_MOD_1_THRESHOLD  10
 #endif
 
 #ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */
@@ -2857,7 +2856,7 @@
 #endif
 
 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)			\
-  (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD)			\
+  (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD)			\
    ? mpn_modexact_1_odd (src, size, divisor)				\
    : mpn_mod_1 (src, size, divisor))
 
@@ -3675,7 +3674,7 @@
     ASSERT (__b & 1);                                                      \
                                                                            \
     if ((GMP_NUMB_BITS % 2) != 0                                           \
-        || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD))           \
+        || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))          \
       {                                                                    \
         (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
       }                                                                    \
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/divis.c
--- a/mpn/generic/divis.c	Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/divis.c	Sun Dec 27 00:30:10 2009 +0100
@@ -96,7 +96,7 @@
 
   if (dn == 1)
     {
-      if (BELOW_THRESHOLD (an, MODEXACT_1_ODD_THRESHOLD))
+      if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
 	return mpn_mod_1 (ap, an, dlow) == 0;
 
       count_trailing_zeros (twos, dlow);
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/sbpi1_divappr_q.c
--- a/mpn/generic/sbpi1_divappr_q.c	Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/sbpi1_divappr_q.c	Sun Dec 27 00:30:10 2009 +0100
@@ -41,9 +41,8 @@
   mp_limb_t n1, n0;
   mp_limb_t d1, d0;
   mp_limb_t cy, cy1;
-  mp_limb_t q, q0;
-  mp_limb_t t1, t0;
-  mp_limb_t mask, flag;
+  mp_limb_t q;
+  mp_limb_t flag;
 
   ASSERT (dn > 2);
   ASSERT (nn >= dn);
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/toom22_mul.c
--- a/mpn/generic/toom22_mul.c	Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/toom22_mul.c	Sun Dec 27 00:30:10 2009 +0100
@@ -63,7 +63,7 @@
    each recursion (the difference s-t will be invariant over recursive calls).
    Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
    MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
-#define TOOM22_MUL_MN_REC(p, a, an, b, bn, ws)				\
+#define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
   do {									\
     if (! MAYBE_mul_toom22						\
 	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
@@ -167,7 +167,7 @@
   /* vm1, 2n limbs */
   TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
 
-  if (s > t)  TOOM22_MUL_MN_REC (vinf, a1, s, b1, t, scratch_out);
+  if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
   else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
 
   /* v0, 2n limbs */
diff -r 2f7eab694ee9 -r b3a07d48af77 mpn/generic/toom32_mul.c
--- a/mpn/generic/toom32_mul.c	Sat Dec 26 12:39:10 2009 +0100
+++ b/mpn/generic/toom32_mul.c	Sun Dec 27 00:30:10 2009 +0100
@@ -61,6 +61,7 @@
   int vm1_neg;
   mp_limb_t cy;
   int hi;
+  mp_limb_t ap1_hi, bp1_hi;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -68,14 +69,8 @@
 #define b0  bp
 #define b1  (bp + n)
 
-  /* Required, to guarantee that s + t >= n + 2.
-
-     Also implies that bn >= 9
-
-     FIXME: Too strict? E.g., an = 15, bn = 9, ==> n = 5, s = 5, t = 4
-     seem to be ok. */
-  
-  ASSERT (bn + 4 <= an && an + 14 <= 3*bn);
+  /* Required, to ensure that s + t >= n. */
+  ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
 
   n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
 
@@ -84,39 +79,39 @@
 
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
-  ASSERT (s + t >= n + 2);
+  ASSERT (s + t >= n);
 
   /* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
-#define ap1 (pp)		/* n + 1 */
-#define bp1 (pp + n + 1)	/* n + 1 */
-#define am1 (pp + 2*n + 2)	/* n, most significant bit stored elsewhere */
-#define bm1 (pp + 3*n + 2)	/* n */
+#define ap1 (pp)		/* n, most significant limb in ap1_hi */
+#define bp1 (pp + n)		/* n, most significant bit in bp1_hi */
+#define am1 (pp + 2*n)		/* n, most significant bit in hi */
+#define bm1 (pp + 3*n)		/* n */
 #define v1 (scratch)		/* 2n + 1 */
 #define vm1 (pp)		/* 2n + 1 */
-#define scratch_out (scratch + 2*n + 1) /* Currently unused. *(
+#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
 
   /* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
 
-  /* FIXME: Keep ap1[n] and bp1[n] in scalar variables. */
+  /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
 
   /* Compute ap1 = a0 + a1 + a3, am1 = a0 - a1 + a3 */
-  ap1[n] = mpn_add (ap1, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_addsub_n
-  if (ap1[n] == 0 && mpn_cmp (ap1, a1, n) < 0)
+  ap1_hi = mpn_add (ap1, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_add_n_sub_n
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
     {
-      ap1[n] = mpn_addsub_n (ap1, am1, a1, ap1, n) >> 1;
+      ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
       hi = 0;
       vm1_neg = 1;
     }
   else
     {
-      cy = mpn_addsub_n (ap1, am1, ap1, a1, n);
-      hi = ap1[n] - (cy & 1);
-      ap1[n] += (cy >> 1);
+      cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
+      hi = ap1_hi - (cy & 1);
+      ap1_hi += (cy >> 1);
       vm1_neg = 0;
     }
 #else
-  if (ap1[n] == 0 && mpn_cmp (ap1, a1, n) < 0)
+  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
     {
       ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
       hi = 0;
@@ -124,28 +119,28 @@
     }
   else
     {
-      hi = ap1[n] - mpn_sub_n (am1, ap1, a1, n);
+      hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
       vm1_neg = 0;
     }
-  ap1[n] += mpn_add_n (ap1, ap1, a1, n);
+  ap1_hi += mpn_add_n (ap1, ap1, a1, n);
 #endif
 
   /* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
   if (t == n)
     {
-#if HAVE_NATIVE_mpn_addsub_n
+#if HAVE_NATIVE_mpn_add_n_sub_n
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  cy = mpn_addsub_n (bp1, bm1, b1, b0, n);
+	  cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  cy = mpn_addsub_n (bp1, bm1, b0, b1, n);
+	  cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
 	}
-      bp1[n] = cy >> 1;
+      bp1_hi = cy >> 1;
 #else
-      bp1[n] = mpn_add_n (bp1, b0, b1, n);
+      bp1_hi = mpn_add_n (bp1, b0, b1, n);
 
       if (mpn_cmp (b0, b1, n) < 0)
 	{
@@ -160,8 +155,8 @@
     }
   else
     {
-      /* FIXME: Should still use addsub for the main part. */
-      bp1[n] = mpn_add (bp1, b0, n, b1, t);
+      /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
+      bp1_hi = mpn_add (bp1, b0, n, b1, t);
 
       if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
 	{
@@ -176,21 +171,21 @@
     }
 
   TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
-  if (ap1[n] == 1)
+  if (ap1_hi == 1)
     {
-      cy = bp1[n] + mpn_add_n (v1 + n, v1 + n, bp1, n);
+      cy = bp1_hi + mpn_add_n (v1 + n, v1 + n, bp1, n);
     }
-  else if (ap1[n] == 2)
+  else if (ap1_hi == 2)
     {
 #if HAVE_NATIVE_mpn_addlsh1_n
-      cy = 2 * bp1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
+      cy = 2 * bp1_hi + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
 #else
-      cy = 2 * bp1[n] + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+      cy = 2 * bp1_hi + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
 #endif
     }
   else
     cy = 0;
-  if (bp1[n] != 0)
+  if (bp1_hi != 0)
     cy += mpn_add_n (v1 + n, v1 + n, ap1, n);
   v1[2 * n] = cy;
   
@@ -250,6 +245,8 @@
   cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
   MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
 
+  /* FIXME: Can we get rid of this second vm1_neg conditional by
+     swapping the location of +1 and -1 values? */
   if (vm1_neg)
     {
       cy = mpn_add_n (v1, v1, vm1, n);
@@ -299,10 +296,17 @@


More information about the gmp-commit mailing list