[Gmp-commit] /home/hgfiles/gmp: One step toward adding a temporary area param...
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 23 15:00:28 CET 2009
details: /home/hgfiles/gmp/rev/a5b28ede335f
changeset: 13201:a5b28ede335f
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Dec 23 15:00:21 2009 +0100
description:
One step toward adding a temporary area parameter to mullo.
diffstat:
ChangeLog | 2 +
mpn/generic/mullo_n.c | 166 +++++++++++++++++++++++++++++--------------------
2 files changed, 99 insertions(+), 69 deletions(-)
diffs (239 lines):
diff -r 2197cd115a90 -r a5b28ede335f ChangeLog
--- a/ChangeLog Wed Dec 23 13:37:59 2009 +0100
+++ b/ChangeLog Wed Dec 23 15:00:21 2009 +0100
@@ -1,5 +1,7 @@
2009-12-23 Marco Bodrato <bodrato at mail.dm.unipi.it>
+ * mpn/generic/mullo_n.c: Split dc_mullo_n function; ALLOC memory at once.
+
* mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Update.
* mpn/generic/toom6h_mul.c: Add prefix to toom_interpolate_12pts.
diff -r 2197cd115a90 -r a5b28ede335f mpn/generic/mullo_n.c
--- a/mpn/generic/mullo_n.c Wed Dec 23 13:37:59 2009 +0100
+++ b/mpn/generic/mullo_n.c Wed Dec 23 15:00:21 2009 +0100
@@ -37,7 +37,7 @@
#endif
#ifndef MULLO_MUL_N_THRESHOLD
-#define MULLO_MUL_N_THRESHOLD 10*MULLO_DC_THRESHOLD
+#define MULLO_MUL_N_THRESHOLD MUL_FFT_THRESHOLD
#endif
#if TUNE_PROGRAM_BUILD
@@ -45,21 +45,12 @@
#define MAYBE_range_toom22 1
#else
#define MAYBE_range_basecase \
- ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < 2 * MUL_TOOM22_THRESHOLD)
+ ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM22_THRESHOLD*36/(36-11))
#define MAYBE_range_toom22 \
((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM33_THRESHOLD*36/(36-11) )
#endif
-/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0. */
-#define MUL_BASECASE_ALLOC \
- (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
-
-/*
- FIXME: This function should accept a temporary area.
- THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
- (typically thanks to mpn_addmul_2) should we unconditionally use
- mpn_mul_n?
- THINK: The DC strategy uses different constatnts in different Toom's
+/* THINK: The DC strategy uses different constants in different Toom's
ranges. Something smoother?
*/
@@ -69,9 +60,9 @@
Above the given threshold, the Divide and Conquer strategy is used.
The operands are split in two, and a full product plus two mullo
- are used to obtain the final result. The more natural stategy is to
+ are used to obtain the final result. The more natural strategy is to
split in two halves, but this is far from optimal when a
- sebquadratic multiplication is used.
+ sub-quadratic multiplication is used.
Mulders suggests an unbalanced split in favour of the full product,
split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
@@ -120,7 +111,7 @@
+----+--`
^ n2 ^n1^
- For an actual impementation, the assumption that M(n)=n^e is
+ For an actual implementation, the assumption that M(n)=n^e is
incorrect, as a consequence also the assumption that ML(n)=k*M(n)
with a constant k is wrong.
@@ -134,6 +125,80 @@
k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
*/
+static mp_size_t
+mpn_mullo_n_itch (mp_size_t n)
+{
+ return 2*n;
+}
+
+/*
+ mpn_dc_mullo_n requires a scratch space of 2*n limbs at tp.
+ It accepts tp == rp.
+*/
+static void
+mpn_dc_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)
+{
+ mp_size_t n2, n1;
+ ASSERT (n >= 2);
+ ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+ ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
+
+ /* Divide-and-conquer */
+
+ /* We need fractional approximation of the value 0 < a <= 1/2
+ giving the minimum in the function k=(1-a)^e/(1-2*a^e).
+ */
+ if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*36/(36-11)))
+ n1 = n >> 1;
+ else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
+ n1 = n * 11 / (size_t) 36; /* n1 ~= n*(1-.694...) */
+ else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
+ n1 = n * 9 / (size_t) 40; /* n1 ~= n*(1-.775...) */
+ else
+ n1 = n * 7 / (size_t) 39; /* n1 ~= n*(1-.821...) */
+ /* n1 = n * 4 / (size_t) 31; // n1 ~= n*(1-.871...) [TOOM66] */
+ /* n1 = n / (size_t) 10; // n1 ~= n*(1-.899...) [TOOM88] */
+ n2 = n - n1;
+
+ /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
+ y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
+
+ /* x0 * y0 */
+ mpn_mul_n (tp, xp, yp, n2);
+ MPN_COPY (rp, tp, n2);
+
+ /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
+ if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+ mpn_mul_basecase (tp + n, xp + n2, n1, yp, n1);
+ else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+ mpn_mullo_basecase (tp + n, xp + n2, yp, n1);
+ else
+ mpn_dc_mullo_n (tp + n, xp + n2, yp, n1, tp + n);
+ mpn_add_n (rp + n2, tp + n2, tp + n, n1);
+
+ /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
+ if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
+ mpn_mul_basecase (tp + n, xp, n1, yp + n2, n1);
+ else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
+ mpn_mullo_basecase (tp + n, xp, yp + n2, n1);
+ else
+ mpn_dc_mullo_n (tp + n, xp, yp + n2, n1, tp + n);
+ mpn_add_n (rp + n2, rp + n2, tp + n, n1);
+}
+
+/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0. */
+#define MUL_BASECASE_ALLOC \
+ (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
+
+/* FIXME: This function should accept a temporary area; dc_mullow_n
+ accepts a pointer tp, and handle the case tp == rp, do the same here.
+ Maybe recombine the two functions.
+ THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
+ (typically thanks to mpn_addmul_2) should we unconditionally use
+ mpn_mul_n?
+*/
+
void
mpn_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
@@ -144,72 +209,35 @@
if (BELOW_THRESHOLD (n, MULLO_BASECASE_THRESHOLD))
{
/* Allocate workspace of fixed size on stack: fast! */
- mp_limb_t ws[MUL_BASECASE_ALLOC];
- mpn_mul_basecase (ws, xp, n, yp, n);
- MPN_COPY (rp, ws, n);
+ mp_limb_t tp[MUL_BASECASE_ALLOC];
+ mpn_mul_basecase (tp, xp, n, yp, n);
+ MPN_COPY (rp, tp, n);
}
else if (BELOW_THRESHOLD (n, MULLO_DC_THRESHOLD))
{
mpn_mullo_basecase (rp, xp, yp, n);
}
- else if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
- {
- /* Divide-and-conquer */
- mp_size_t n2, n1;
- mp_ptr tp;
- TMP_SDECL;
- TMP_SMARK;
- /* We need fractional approximation of the value 1/2< b <1
- giving the minimum in the function b^a/(1-2*(1-b)^a) .
-
- Constants obtained with the following gp-pari commands:
-
- */
- if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*2) )
- n1 = n >> 1;
- else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
- n1 = n * 11 / (size_t) 36; /* n1 ~= n*(1-.694...) */
- else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
- n1 = n * 9 / (size_t) 40; /* n1 ~= n*(1-.775...) */
- else
- n1 = n * 7 / (size_t) 39; /* n1 ~= n*(1-.821...) */
- /* n1 = n * 4 / (size_t) 31; // n1 ~= n*(1-.871...) [TOOM66] */
- /* n1 = n / (size_t) 10; // n1 ~= n*(1-.899...) [TOOM88] */
- n2 = n - n1;
- ASSERT (n1 < 2 * n2);
- tp = TMP_SALLOC_LIMBS (2 * n2);
-
- /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
- y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
-
- /* x0 * y0 */
- mpn_mul_n (tp, xp, yp, n2);
- MPN_COPY (rp, tp, n2);
-
- /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
- mpn_mullo_n (rp + n2, xp + n2, yp, n1);
- mpn_add_n (rp + n2, rp + n2, tp + n2, n1);
-
- /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
- mpn_mullo_n (tp, yp + n2, xp, n1);
- mpn_add_n (rp + n2, rp + n2, tp, n1);
- TMP_SFREE;
- }
else
{
- /* For really large operands, use plain mpn_mul_n but throw away upper n
- limbs of result. */
mp_ptr tp;
TMP_DECL;
TMP_MARK;
- tp = TMP_ALLOC_LIMBS (2 * n);
-
-#if !TUNE_PROGRAM_BUILD && WANT_FFT && (MULLO_MUL_N_THRESHOLD >= MUL_FFT_THRESHOLD)
- mpn_fft_mul (tp, xp, n, yp, n);
+ tp = TMP_ALLOC_LIMBS (mpn_mullo_n_itch (n));
+ if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
+ {
+ mpn_dc_mullo_n (rp, xp, yp, n, tp);
+ }
+ else
+ {
+ /* For really large operands, use plain mpn_mul_n but throw away upper n
+ limbs of result. */
+#if !TUNE_PROGRAM_BUILD && (MULLO_MUL_N_THRESHOLD > MUL_FFT_THRESHOLD)
+ mpn_fft_mul (tp, xp, n, yp, n);
#else
- mpn_mul_n (tp, xp, yp, n);
+ mpn_mul_n (tp, xp, yp, n);
#endif
- MPN_COPY (rp, tp, n);
+ MPN_COPY (rp, tp, n);
+ }
TMP_FREE;
}
}
More information about the gmp-commit
mailing list