[Gmp-commit] /home/hgfiles/gmp: Move reference mpn multiply to refmpn.c.
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 16 19:23:03 CET 2009
details: /home/hgfiles/gmp/rev/ee417dae4cb4
changeset: 13092:ee417dae4cb4
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Dec 16 19:22:57 2009 +0100
description:
Move reference mpn multiply to refmpn.c.
diffstat:
ChangeLog | 7 ++
tests/mpz/t-mul.c | 135 ++----------------------------------------------------
tests/refmpn.c | 69 +++++++++++++++++++++++++++-
tests/tests.h | 1 +
4 files changed, 81 insertions(+), 131 deletions(-)
diffs (truncated from 304 to 300 lines):
diff -r 34f43c28d9f6 -r ee417dae4cb4 ChangeLog
--- a/ChangeLog Wed Dec 16 16:38:29 2009 +0100
+++ b/ChangeLog Wed Dec 16 19:22:57 2009 +0100
@@ -8,6 +8,13 @@
2009-12-16 Torbjorn Granlund <tege at gmplib.org>
+ * tests/mpz/t-mul.c: Misc cleanups.
+ (mul_basecase): Remove.
+ (ref_mpn_mul): Remove.
+ * tests/refmpn.c (refmpn_mul): New function, mainly from t-mul.c's
+ ref_mpn_mul.
+ (refmpn_mullo_n): Add a missing free.
+
* tune/speed.c (routine): Measure speed_mpn_{sb,dc}pi1_div_qr,
mpn_{sb,dc}pi1_divappr_q, mpn_{sb,dc}pi1_bdiv_qr, and
mpn_{sb,dc}pi1_bdiv_q.
diff -r 34f43c28d9f6 -r ee417dae4cb4 tests/mpz/t-mul.c
--- a/tests/mpz/t-mul.c Wed Dec 16 16:38:29 2009 +0100
+++ b/tests/mpz/t-mul.c Wed Dec 16 19:22:57 2009 +0100
@@ -27,8 +27,7 @@
#include "tests.h"
void debug_mp __GMP_PROTO ((mpz_t));
-static void ref_mpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
-static void ref_mpz_mul __GMP_PROTO ((mpz_t, const mpz_t, const mpz_t));
+static void refmpz_mul __GMP_PROTO ((mpz_t, const mpz_t, const mpz_t));
void dump_abort __GMP_PROTO ((int, char *, mpz_t, mpz_t, mpz_t, mpz_t));
#define FFT_MIN_BITSIZE 100000
@@ -39,15 +38,13 @@
one (int i, mpz_t multiplicand, mpz_t multiplier)
{
mpz_t product, ref_product;
- mpz_t quotient;
mpz_init (product);
mpz_init (ref_product);
- mpz_init (quotient);
/* Test plain multiplication comparing results against reference code. */
mpz_mul (product, multiplier, multiplicand);
- ref_mpz_mul (ref_product, multiplier, multiplicand);
+ refmpz_mul (ref_product, multiplier, multiplicand);
if (mpz_cmp (product, ref_product))
dump_abort (i, "incorrect plain product",
multiplier, multiplicand, product, ref_product);
@@ -62,7 +59,6 @@
mpz_clear (product);
mpz_clear (ref_product);
- mpz_clear (quotient);
}
int
@@ -141,7 +137,7 @@
}
static void
-ref_mpz_mul (mpz_t w, const mpz_t u, const mpz_t v)
+refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v)
{
mp_size_t usize = u->_mp_size;
mp_size_t vsize = v->_mp_size;
@@ -169,9 +165,9 @@
wp = __GMP_ALLOCATE_FUNC_LIMBS (talloc);
if (usize > vsize)
- ref_mpn_mul (wp, up, usize, vp, vsize);
+ refmpn_mul (wp, up, usize, vp, vsize);
else
- ref_mpn_mul (wp, vp, vsize, up, usize);
+ refmpn_mul (wp, vp, vsize, up, usize);
wsize = usize + vsize;
wsize -= wp[wsize - 1] == 0;
MPZ_REALLOC (w, wsize);
@@ -181,127 +177,6 @@
__GMP_FREE_FUNC_LIMBS (wp, talloc);
}
-static void mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
-
-#define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
-#define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
-#define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
-
-static void
-ref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
-{
- mp_ptr tp;
- mp_size_t tn;
- mp_limb_t cy;
-
- if (vn < TOOM3_THRESHOLD)
- {
- /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
- mul_basecase. */
- if (vn != 0)
- mul_basecase (wp, up, un, vp, vn);
- else
- MPN_ZERO (wp, un);
- return;
- }
-
- if (vn < TOOM4_THRESHOLD)
- {
- /* In the mpn_toom33_mul range, use mpn_toom22_mul. */
- tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
- tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
- mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
- }
- else if (vn < FFT_THRESHOLD)
- {
- /* In the mpn_toom44_mul range, use mpn_toom33_mul. */
- tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
- tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
- mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
- }
- else
- {
- /* Finally, for the largest operands, use mpn_toom44_mul. */
- tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
- tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
- mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
- }
-
- if (un != vn)
- {
- if (un - vn < vn)
- ref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn);
- else
- ref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn);
-
- MPN_COPY (wp, tp, vn);
- cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn);
- mpn_incr_u (wp + 2 * vn, cy);
- }
- else
- {
- MPN_COPY (wp, tp, 2 * vn);
- }
-
- __GMP_FREE_FUNC_LIMBS (tp, tn);
-}
-
-static void
-mul_basecase (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
-{
- mp_size_t i, j;
- mp_limb_t prod_low, prod_high;
- mp_limb_t cy_dig;
- mp_limb_t v_limb;
-
- /* Multiply by the first limb in V separately, as the result can
- be stored (not added) to PROD. We also avoid a loop for zeroing. */
- v_limb = vp[0];
- cy_dig = 0;
- for (j = un; j > 0; j--)
- {
- mp_limb_t u_limb, w_limb;
- u_limb = *up++;
- umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);
- add_ssaaaa (cy_dig, w_limb, prod_high, prod_low, 0, cy_dig << GMP_NAIL_BITS);
- *wp++ = w_limb >> GMP_NAIL_BITS;
- }
-
- *wp++ = cy_dig;
- wp -= un;
- up -= un;
-
- /* For each iteration in the outer loop, multiply one limb from
- U with one limb from V, and add it to PROD. */
- for (i = 1; i < vn; i++)
- {
- v_limb = vp[i];
- cy_dig = 0;
-
- for (j = un; j > 0; j--)
- {
- mp_limb_t u_limb, w_limb;
- u_limb = *up++;
- umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);
- w_limb = *wp;
- add_ssaaaa (prod_high, prod_low, prod_high, prod_low, 0, w_limb << GMP_NAIL_BITS);
- prod_low >>= GMP_NAIL_BITS;
- prod_low += cy_dig;
-#if GMP_NAIL_BITS == 0
- cy_dig = prod_high + (prod_low < cy_dig);
-#else
- cy_dig = prod_high;
- cy_dig += prod_low >> GMP_NUMB_BITS;
-#endif
- *wp++ = prod_low & GMP_NUMB_MASK;
- }
-
- *wp++ = cy_dig;
- wp -= un;
- up -= un;
- }
-}
-
void
dump_abort (int i, char *s,
mpz_t op1, mpz_t op2, mpz_t product, mpz_t ref_product)
diff -r 34f43c28d9f6 -r ee417dae4cb4 tests/refmpn.c
--- a/tests/refmpn.c Wed Dec 16 16:38:29 2009 +0100
+++ b/tests/refmpn.c Wed Dec 16 19:22:57 2009 +0100
@@ -1450,10 +1450,76 @@
prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
}
+#define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
+#define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
+#if WANT_FFT
+#define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
+#else
+#define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
+#endif
+
+void
+refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
+{
+ mp_ptr tp;
+ mp_size_t tn;
+ mp_limb_t cy;
+
+ if (vn < TOOM3_THRESHOLD)
+ {
+ /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
+ mul_basecase. */
+ if (vn != 0)
+ refmpn_mul_basecase (wp, up, un, vp, vn);
+ else
+ MPN_ZERO (wp, un);
+ return;
+ }
+
+ if (vn < TOOM4_THRESHOLD)
+ {
+ /* In the mpn_toom33_mul range, use mpn_toom22_mul. */
+ tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
+ tp = refmpn_malloc_limbs (tn);
+ mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+ }
+ else if (vn < FFT_THRESHOLD)
+ {
+ /* In the mpn_toom44_mul range, use mpn_toom33_mul. */
+ tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
+ tp = refmpn_malloc_limbs (tn);
+ mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+ }
+ else
+ {
+ /* Finally, for the largest operands, use mpn_toom44_mul. */
+ tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
+ tp = refmpn_malloc_limbs (tn);
+ mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+ }
+
+ if (un != vn)
+ {
+ if (un - vn < vn)
+ refmpn_mul (wp + vn, vp, vn, up + vn, un - vn);
+ else
+ refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
+
+ MPN_COPY (wp, tp, vn);
+ cy = refmpn_add (wp + vn, wp + vn, un, tp + vn, vn);
+ }
+ else
+ {
+ MPN_COPY (wp, tp, 2 * vn);
+ }
+
+ free (tp);
+}
+
void
refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
{
- refmpn_mul_basecase (prodp, up, size, vp, size);
+ refmpn_mul (prodp, up, size, vp, size);
}
void
@@ -1462,6 +1528,7 @@
mp_ptr tp = refmpn_malloc_limbs (2*size);
refmpn_mul_basecase (tp, up, size, vp, size);
refmpn_copyi (prodp, tp, size);
+ free (tp);
}
void
diff -r 34f43c28d9f6 -r ee417dae4cb4 tests/tests.h
--- a/tests/tests.h Wed Dec 16 16:38:29 2009 +0100
+++ b/tests/tests.h Wed Dec 16 19:22:57 2009 +0100
@@ -316,6 +316,7 @@
mp_srcptr vp, mp_size_t vsize));
void refmpn_mul_n __GMP_PROTO ((mp_ptr prodp, mp_srcptr up, mp_srcptr vp,
mp_size_t size));
More information about the gmp-commit
mailing list