[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Dec 17 17:06:52 CET 2009
details: /home/hgfiles/gmp/rev/8473a9dd7980
changeset: 13114:8473a9dd7980
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Dec 17 17:06:06 2009 +0100
description:
New files I forgot to add...
details: /home/hgfiles/gmp/rev/7ec6dba53d54
changeset: 13115:7ec6dba53d54
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Dec 17 17:06:47 2009 +0100
description:
Trivial merge
diffstat:
ChangeLog | 6 +
mpn/generic/mul.c | 47 +++------
mpn/generic/sqrmod_bnm1.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++
mpn/generic/toom22_mul.c | 10 +-
tests/mpn/t-sqrmod_bnm1.c | 192 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 442 insertions(+), 32 deletions(-)
diffs (truncated from 540 to 300 lines):
diff -r e1f87ead7745 -r 7ec6dba53d54 ChangeLog
--- a/ChangeLog Thu Dec 17 16:42:54 2009 +0100
+++ b/ChangeLog Thu Dec 17 17:06:47 2009 +0100
@@ -1,5 +1,11 @@
2009-12-17 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/generic/mul.c: Move allocation of ws to where it is used.
+ Identify toom22, 32, 42, in that order (in two places). Use midline
+ between toom22, 32, 42.
+ * mpn/generic/toom22_mul.c (TOOM22_MUL_MN_REC): Call also
+ mpn_toom32_mul.
+
* doc/gmp.texi: Update References section. Update Contributors
section. Misc updates.
diff -r e1f87ead7745 -r 7ec6dba53d54 mpn/generic/mul.c
--- a/mpn/generic/mul.c Thu Dec 17 16:42:54 2009 +0100
+++ b/mpn/generic/mul.c Thu Dec 17 17:06:47 2009 +0100
@@ -134,12 +134,9 @@
(!TOOM33_OK (un, vn) && BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD * 3 / 2)))
{
/* Loop over toom42, then choose toom42, toom32, or toom22 */
- mp_ptr ws;
mp_ptr scratch;
TMP_DECL; TMP_MARK;
-#define WSALL (4 * vn)
- ws = TMP_SALLOC_LIMBS (WSALL + 1);
#define ITCH ((un + vn) * 4 + 100)
scratch = TMP_ALLOC_LIMBS (ITCH + 1);
@@ -147,6 +144,9 @@
if (un >= 3 * vn)
{
mp_limb_t cy;
+ mp_ptr ws;
+#define WSALL (4 * vn)
+ ws = TMP_SALLOC_LIMBS (WSALL + 1);
mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
un -= 2 * vn;
@@ -164,40 +164,25 @@
prodp += 2 * vn;
}
- /* FIXME: Test these in opposite order, following the philosophy of
- minimizing the relative overhead. */
- if (5 * un > 9 * vn)
- {
- mpn_toom42_mul (ws, up, un, vp, vn, scratch);
- cy = mpn_add_n (prodp, prodp, ws, vn);
- MPN_COPY (prodp + vn, ws + vn, un);
- mpn_incr_u (prodp + vn, cy);
- }
- else if (9 * un > 10 * vn)
- {
- mpn_toom32_mul (ws, up, un, vp, vn, scratch);
- cy = mpn_add_n (prodp, prodp, ws, vn);
- MPN_COPY (prodp + vn, ws + vn, un);
- mpn_incr_u (prodp + vn, cy);
- }
+ if (4 * un < 5 * vn)
+ mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+ else if (4 * un < 7 * vn)
+ mpn_toom32_mul (ws, up, un, vp, vn, scratch);
else
- {
- mpn_toom22_mul (ws, up, un, vp, vn, scratch);
- cy = mpn_add_n (prodp, prodp, ws, vn);
- MPN_COPY (prodp + vn, ws + vn, un);
- mpn_incr_u (prodp + vn, cy);
- }
+ mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+
+ cy = mpn_add_n (prodp, prodp, ws, vn);
+ MPN_COPY (prodp + vn, ws + vn, un);
+ mpn_incr_u (prodp + vn, cy);
}
else
{
- /* FIXME: Test these in opposite order, following the philosophy of
- minimizing the relative overhead. */
- if (5 * un > 9 * vn)
- mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
- else if (9 * un > 10 * vn)
+ if (4 * un < 5 * vn)
+ mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+ else if (4 * un < 7 * vn)
mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
else
- mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+ mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
}
TMP_FREE;
}
diff -r e1f87ead7745 -r 7ec6dba53d54 mpn/generic/sqrmod_bnm1.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/sqrmod_bnm1.c Thu Dec 17 17:06:47 2009 +0100
@@ -0,0 +1,219 @@
+/* sqrmod_bnm1.c -- squaring mod B^n-1.
+
+ Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
+ Marco Bodrato.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Input is {ap,rn}; output is {rp,rn}, computation is
+ mod B^rn - 1, and values are semi-normalised; zero is represented
+ as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp.
+ tp==rp is allowed. */
+static void
+mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+ mp_limb_t cy;
+
+ ASSERT (0 < rn);
+
+ mpn_sqr_n (tp, ap, rn);
+ cy = mpn_add_n (rp, tp, tp + rn, rn);
+ /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
+ * be no overflow when adding in the carry. */
+ MPN_INCR_U (rp, rn, cy);
+}
+
+
+/* Input is {ap,rn+1}; output is {rp,rn+1}, in
+ semi-normalised representation, computation is mod B^rn + 1. Needs
+ a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
+ Output is normalised. */
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
+{
+ mp_limb_t cy;
+
+ ASSERT (0 < rn);
+
+ mpn_sqr_n (tp, ap, rn + 1);
+ ASSERT (tp[2*rn+1] == 0);
+ ASSERT (tp[2*rn] < GMP_NUMB_MAX);
+ cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
+ rp[rn] = 0;
+ MPN_INCR_U (rp, rn+1, cy );
+}
+
+
+/* Computes {rp,rn} <- {ap,an}^2 Mod(B^rn-1)
+ *
+ * The result is expected to be ZERO if and only if the operand
+ * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
+ * B^rn-1.
+ * Moreover it should not be a problem if sqrmod_bnm1 is used to
+ * compute the full square with an <= 2*rn, because this condition
+ * implies (B^an-1)^2 < (B^rn-1) .
+ *
+ * Requires 0 < an <= rn
+ * Scratch need: rn + 2 + (need for recursive call OR rn + 2). This gives
+ *
+ * S(n) <= rn + 2 + MAX (rn + 2, S(n/2)) <= 2rn + 2 log2 rn + 2
+ */
+void
+mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
+{
+ ASSERT (0 < an);
+ ASSERT (an <= rn);
+
+ if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
+ {
+ if (UNLIKELY (an < rn))
+ {
+ if (UNLIKELY (2*an <= rn))
+ {
+ mpn_sqr_n (rp, ap, an);
+ MPN_ZERO (rp + 2*an, rn - 2*an);
+ }
+ else
+ {
+ mp_limb_t cy;
+ mpn_sqr_n (tp, ap, an);
+ cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
+ MPN_INCR_U (rp, rn, cy);
+ }
+ }
+ else
+ mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
+ }
+ else
+ {
+ mp_size_t n;
+ mp_limb_t cy;
+ mp_limb_t hi;
+
+ n = rn >> 1;
+
+ /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
+ and crt together as
+
+ x = xm + (B^n - 1) * [B^n/2 (xp - xm) mod (B^n+1)]
+ */
+
+#define a0 ap
+#define a1 (ap + n)
+
+#define xp tp /* 2n + 2 */
+ /* am1 maybe in {xp, n} */
+#define so (tp + 2*n + 2)
+ /* ap1 maybe in {so, n + 1} */
+
+ {
+ mp_srcptr am1;
+ mp_size_t anm;
+
+ if( an > n ) {
+ am1 = xp;
+ cy = mpn_add (xp, a0, n, a1, an - n);
+ MPN_INCR_U (xp, n, cy);
+ anm = n;
+ } else {
+ am1 = a0;
+ anm = an;
+ }
+
+ mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
+ }
+
+ {
+ int k;
+ mp_srcptr ap1;
+ mp_size_t anp;
+
+ if( an > n ) {
+ ap1 = so;
+ cy = mpn_sub (so, a0, n, a1, an - n);
+ so[n] = 0;
+ MPN_INCR_U (so, n + 1, cy);
+ anp = n + ap1[n];
+ } else {
+ ap1 = a0;
+ anp = an;
+ }
+
+ if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
+ k=0;
+ else
+ {
+ int mask;
+ k = mpn_fft_best_k (n, 0);
+ mask = (1<<k) -1;
+ while (n & mask) {k--; mask >>=1;};
+ }
+ if (k >= FFT_FIRST_K)
+ xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
+ else
+ {
+ if (UNLIKELY (ap1 == a0)) {
+ ap1 = so;
+ MPN_COPY (so, a0, anp);
+ MPN_ZERO (so + anp, n + 1 - anp);
+ }
+ mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
+ }
+ }
+
+ /* xp = xm - xp mod (B^n + 1). Assumes normalised
+ representation. Puts high bit in hi. */
+ if (UNLIKELY (xp[n]))
+ hi = mpn_add_1 (xp, rp, n, 1);
+ else
+ {
+ cy = mpn_sub_n (xp, rp, xp, n);
+ MPN_INCR_U (xp, n + 1 , cy);
+ hi = xp[n];
+ }
+ /* Multiply by -B^n/2, using
+
+ -B^n/2 * (2 x1 + x0) = x1 - B^n/2 x0 (mod (B^n + 1))
+ */
+
More information about the gmp-commit
mailing list