toom54
Niels Möller
nisse at lysator.liu.se
Mon Feb 13 13:20:35 CET 2012
I noticed that toom54 is missing. And it's easy, since all the building
blocks already are in place. Patch below. Comments?
I'd expect it to have a place with toom43 and toom44 below, and toom6h
above, but I have no good guess on how large that place might be.
Also toom72 is missing, which would use the same interpolation function
as toom54 and toom63. Not sure how useful that might be.
Regards,
/Niels
diff -r b856752462ac configure.in
--- a/configure.in Mon Feb 13 12:38:05 2012 +0100
+++ b/configure.in Mon Feb 13 12:40:42 2012 +0100
@@ -2634,7 +2634,7 @@
hgcd2_jacobi hgcd_jacobi \
mullo_n mullo_basecase \
toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
- toom33_mul toom43_mul toom53_mul toom63_mul \
+ toom33_mul toom43_mul toom53_mul toom54_mul toom63_mul \
toom44_mul \
toom6h_mul toom6_sqr toom8h_mul toom8_sqr \
toom_couple_handling \
diff -r b856752462ac gmp-impl.h
--- a/gmp-impl.h Mon Feb 13 12:38:05 2012 +0100
+++ b/gmp-impl.h Mon Feb 13 12:40:42 2012 +0100
@@ -4983,6 +4983,13 @@
return 9 * n + 3;
}
+static inline mp_size_t
+mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
+{
+ mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+ return 9 * n + 3;
+}
+
/* let S(n) = space required for input size n,
then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */
#define mpn_toom42_mulmid_itch(n) \
diff -r b856752462ac mpn/generic/toom54_mul.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/toom54_mul.c Mon Feb 13 12:40:42 2012 +0100
@@ -0,0 +1,135 @@
+/* Implementation of the toom54 (same points as toom63)
+
+ Contributed to the GNU project by Niels Möller.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Toom-4.5, the splitting 5x4 unbalanced version.
+ Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+ <--s-><--n--><--n--><--n--><--n-->
+ ____ ______ ______ ______ ______
+ |_a4_|__a3__|__a2__|__a1__|__a0__|
+ |b3_|__b2__|__b1__|__b0__|
+ <-t-><--n--><--n--><--n-->
+
+*/
+#define TOOM_54_MUL_N_REC(p, a, b, n, ws) \
+ do { mpn_mul_n (p, a, b, n); \
+ } while (0)
+
+#define TOOM_54_MUL_REC(p, a, na, b, nb, ws) \
+ do { mpn_mul (p, a, na, b, nb); \
+ } while (0)
+
+void
+mpn_toom54_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ mp_limb_t cy;
+ int sign;
+
+ /***************************** decomposition *******************************/
+#define a4 (ap + 4 * n)
+#define b3 (bp + 3 * n)
+
+ ASSERT (an >= bn);
+ n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+
+ s = an - 4 * n;
+ t = bn - 3 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ /* WARNING! it assumes s+t>=n */
+ ASSERT ( s + t >= n );
+ ASSERT ( s + t > 4);
+ ASSERT ( n > 2);
+
+#define r8 pp /* 2n */
+#define r7 scratch /* 3n+1 */
+#define r5 (pp + 3*n) /* 3n+1 */
+#define v0 (pp + 3*n) /* n+1 */
+#define v1 (pp + 4*n+1) /* n+1 */
+#define v2 (pp + 5*n+2) /* n+1 */
+#define v3 (pp + 6*n+3) /* n+1 */
+#define r3 (scratch + 3 * n + 1) /* 3n+1 */
+#define r1 (pp + 7*n) /* s+t <= 2*n */
+#define ws (scratch + 6 * n + 2) /* ??? */
+
+ /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+ need all of them, when DO_mpn_sublsh_n usea a scratch */
+ /********************** evaluation and recursive calls *********************/
+ /* $\pm4$ */
+ sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp);
+ sign ^= mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+ TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+ mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4); /* FIXME: ...,2,4 ?*/
+
+ /* $\pm1$ */
+ sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s, pp);
+ sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+ TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+ mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+ /* $\pm2$ */
+ sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp);
+ sign ^= mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+ TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+ mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2); /* FIXME: ...,1,2)? */
+
+ /* A(0)*B(0) */
+ TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
+
+ /* Infinity */
+ if (s > t) {
+ TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
+ } else {
+ TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
+ };
+
+ mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a4
+#undef b3
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}
+
diff -r b856752462ac tests/mpn/Makefile.am
--- a/tests/mpn/Makefile.am Mon Feb 13 12:38:05 2012 +0100
+++ b/tests/mpn/Makefile.am Mon Feb 13 12:40:42 2012 +0100
@@ -25,7 +25,7 @@
check_PROGRAMS = t-asmtype t-aors_1 t-divrem_1 t-mod_1 t-fat t-get_d \
t-instrument t-iord_u t-mp_bases t-perfsqr t-scan \
t-toom22 t-toom32 t-toom33 t-toom42 t-toom43 t-toom44 \
- t-toom52 t-toom53 t-toom62 t-toom63 t-toom6h t-toom8h \
+ t-toom52 t-toom53 t-toom54 t-toom62 t-toom63 t-toom6h t-toom8h \
t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1 t-mulmid \
t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv
diff -r b856752462ac tests/mpn/t-toom54.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/mpn/t-toom54.c Mon Feb 13 12:40:42 2012 +0100
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom54_mul
+#define mpn_toomMN_mul_itch mpn_toom54_mul_itch
+
+#define MIN_AN 31
+#define MIN_BN(an) ((3*(an) + 32) / (size_t) 5) /* 3/5 */
+#define MAX_BN(an) ((an) - 6) /* 1/1 */
+
+#include "toom-shared.h"
--
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list