[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

Thu Dec 17 17:06:52 CET 2009

details:   /home/hgfiles/gmp/rev/8473a9dd7980
changeset: 13114:8473a9dd7980
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Dec 17 17:06:06 2009 +0100
description:
New files I forgot to add...

details:   /home/hgfiles/gmp/rev/7ec6dba53d54
changeset: 13115:7ec6dba53d54
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Dec 17 17:06:47 2009 +0100
description:
Trivial merge

diffstat:

 ChangeLog                 |    6 +
 mpn/generic/mul.c         |   47 +++------
 mpn/generic/sqrmod_bnm1.c |  219 ++++++++++++++++++++++++++++++++++++++++++++++
 mpn/generic/toom22_mul.c  |   10 +-
 tests/mpn/t-sqrmod_bnm1.c |  192 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 442 insertions(+), 32 deletions(-)

diffs (truncated from 540 to 300 lines):

diff -r e1f87ead7745 -r 7ec6dba53d54 ChangeLog

--- a/ChangeLog	Thu Dec 17 16:42:54 2009 +0100
+++ b/ChangeLog	Thu Dec 17 17:06:47 2009 +0100
@@ -1,5 +1,11 @@
 2009-12-17  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/generic/mul.c: Move allocation of ws to where it is used.
+	Identify toom22, 32, 42, in that order (in two places).  Use midline
+	between toom22, 32, 42.
+	* mpn/generic/toom22_mul.c (TOOM22_MUL_MN_REC): Call also
+	mpn_toom32_mul.
+
 	* doc/gmp.texi: Update References section.  Update Contributors
 	section.  Misc updates.
 
diff -r e1f87ead7745 -r 7ec6dba53d54 mpn/generic/mul.c
--- a/mpn/generic/mul.c	Thu Dec 17 16:42:54 2009 +0100
+++ b/mpn/generic/mul.c	Thu Dec 17 17:06:47 2009 +0100
@@ -134,12 +134,9 @@
 	   (!TOOM33_OK (un, vn) && BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD * 3 / 2)))
     {
       /* Loop over toom42, then choose toom42, toom32, or toom22 */
-      mp_ptr ws;
       mp_ptr scratch;
       TMP_DECL; TMP_MARK;
 
-#define WSALL (4 * vn)
-      ws = TMP_SALLOC_LIMBS (WSALL + 1);
 
 #define ITCH ((un + vn) * 4 + 100)
       scratch = TMP_ALLOC_LIMBS (ITCH + 1);
@@ -147,6 +144,9 @@
       if (un >= 3 * vn)
 	{
 	  mp_limb_t cy;
+	  mp_ptr ws;
+#define WSALL (4 * vn)
+	  ws = TMP_SALLOC_LIMBS (WSALL + 1);
 
 	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
 	  un -= 2 * vn;
@@ -164,40 +164,25 @@
 	      prodp += 2 * vn;
 	    }
 
-	  /* FIXME: Test these in opposite order, following the philosophy of
-	     minimizing the relative overhead.  */
-	  if (5 * un > 9 * vn)
-	    {
-	      mpn_toom42_mul (ws, up, un, vp, vn, scratch);
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
-	  else if (9 * un > 10 * vn)
-	    {
-	      mpn_toom32_mul (ws, up, un, vp, vn, scratch);
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
+	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
 	  else
-	    {
-	      mpn_toom22_mul (ws, up, un, vp, vn, scratch);
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
+	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+
+	  cy = mpn_add_n (prodp, prodp, ws, vn);
+	  MPN_COPY (prodp + vn, ws + vn, un);
+	  mpn_incr_u (prodp + vn, cy);
 	}
       else
 	{
-	  /* FIXME: Test these in opposite order, following the philosophy of
-	     minimizing the relative overhead.  */
-	  if (5 * un > 9 * vn)
-	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-	  else if (9 * un > 10 * vn)
+	  if (4 * un < 5 * vn)
+	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+	  else if (4 * un < 7 * vn)
 	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
 	  else
-	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
 	}
       TMP_FREE;
     }
diff -r e1f87ead7745 -r 7ec6dba53d54 mpn/generic/sqrmod_bnm1.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/sqrmod_bnm1.c	Thu Dec 17 17:06:47 2009 +0100
@@ -0,0 +1,219 @@
+/* sqrmod_bnm1.c -- squaring mod B^n-1.
+
+   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+   Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Input is {ap,rn}; output is {rp,rn}, computation is
+   mod B^rn - 1, and values are semi-normalised; zero is represented
+   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
+   tp==rp is allowed. */
+static void
+mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_sqr_n (tp, ap, rn);
+  cy = mpn_add_n (rp, tp, tp + rn, rn);
+  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+   * be no overflow when adding in the carry. */
+  MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Input is {ap,rn+1}; output is {rp,rn+1}, in
+   semi-normalised representation, computation is mod B^rn + 1. Needs
+   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
+   Output is normalised. */
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+  mp_limb_t cy;
+
+  ASSERT (0 < rn);
+
+  mpn_sqr_n (tp, ap, rn + 1);
+  ASSERT (tp[2*rn+1] == 0);
+  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
+  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
+  rp[rn] = 0;
+  MPN_INCR_U (rp, rn+1, cy );
+}
+
+
+/* Computes {rp,rn} <- {ap,an}^2 Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1.
+ * Moreover it should not be a problem if sqrmod_bnm1 is used to
+ * compute the full square with an <= 2*rn, because this condition
+ * implies (B^an-1)^2 < (B^rn-1) .
+ *
+ * Requires 0 < an <= rn
+ * Scratch need: rn + 2 + (need for recursive call OR rn + 2). This gives
+ *
+ * S(n) <= rn + 2 + MAX (rn + 2, S(n/2)) <= 2rn + 2 log2 rn + 2
+ */
+void
+mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
+{
+  ASSERT (0 < an);
+  ASSERT (an <= rn);
+
+  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
+    {
+      if (UNLIKELY (an < rn))
+	{
+	  if (UNLIKELY (2*an <= rn))
+	    {
+	      mpn_sqr_n (rp, ap, an);
+	      MPN_ZERO (rp + 2*an, rn - 2*an);
+	    }
+	  else
+	    {
+	      mp_limb_t cy;
+	      mpn_sqr_n (tp, ap, an);
+	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
+	      MPN_INCR_U (rp, rn, cy);
+	    }
+	}
+      else
+	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
+    }
+  else
+    {
+      mp_size_t n;
+      mp_limb_t cy;
+      mp_limb_t hi;
+
+      n = rn >> 1;
+
+      /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
+	 and crt together as
+
+	 x = xm + (B^n - 1) * [B^n/2 (xp - xm) mod (B^n+1)]
+      */
+
+#define a0 ap
+#define a1 (ap + n)
+
+#define xp  tp	/* 2n + 2 */
+      /* am1  maybe in {xp, n} */
+#define so (tp + 2*n + 2)
+      /* ap1  maybe in {so, n + 1} */
+
+      {
+	mp_srcptr am1;
+	mp_size_t anm;
+
+	if( an > n ) {
+	  am1 = xp;
+	  cy = mpn_add (xp, a0, n, a1, an - n);
+	  MPN_INCR_U (xp, n, cy);
+	  anm = n;
+	} else {
+	  am1 = a0;
+	  anm = an;
+	}
+
+	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
+      }
+
+      {
+	int       k;
+	mp_srcptr ap1;
+	mp_size_t anp;
+
+	if( an > n ) {
+	  ap1 = so;
+	  cy = mpn_sub (so, a0, n, a1, an - n);
+	  so[n] = 0;
+	  MPN_INCR_U (so, n + 1, cy);
+	  anp = n + ap1[n];
+	} else {
+	  ap1 = a0;
+	  anp = an;
+	}
+
+	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+	  k=0;
+	else
+	  {
+	    int mask;
+	    k = mpn_fft_best_k (n, 0);
+	    mask = (1<<k) -1;
+	    while (n & mask) {k--; mask >>=1;};
+	  }
+	if (k >= FFT_FIRST_K)
+	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
+	else
+	  {
+	    if (UNLIKELY (ap1 == a0)) {
+	      ap1 = so;
+	      MPN_COPY (so, a0, anp);
+	      MPN_ZERO (so + anp, n + 1 - anp);
+	    }
+	    mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
+	  }
+      }
+
+      /* xp = xm - xp mod (B^n + 1). Assumes normalised
+	 representation. Puts high bit in hi. */
+      if (UNLIKELY (xp[n]))
+	hi = mpn_add_1 (xp, rp, n, 1);
+      else
+	{
+	  cy = mpn_sub_n (xp, rp, xp, n);
+	  MPN_INCR_U (xp, n + 1 , cy);
+	  hi = xp[n];
+	}
+      /* Multiply by -B^n/2, using
+
+	 -B^n/2 * (2 x1 + x0) = x1 - B^n/2 x0 (mod (B^n + 1))
+      */
+