[Gmp-commit] /home/hgfiles/gmp: Improvements to division speed measuring.
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 30 22:59:37 CET 2009
details: /home/hgfiles/gmp/rev/80e9f7f7b40c
changeset: 13273:80e9f7f7b40c
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Dec 30 22:59:34 2009 +0100
description:
Improvements to division speed measuring.
diffstat:
ChangeLog | 6 ++++
tune/speed.c | 6 ++-
tune/speed.h | 83 ++++++++++++++++++++++++++++++++++++-----------------------
3 files changed, 61 insertions(+), 34 deletions(-)
diffs (201 lines):
diff -r 2bb813ced168 -r 80e9f7f7b40c ChangeLog
--- a/ChangeLog Wed Dec 30 22:48:49 2009 +0100
+++ b/ChangeLog Wed Dec 30 22:59:34 2009 +0100
@@ -1,5 +1,11 @@
2009-12-30 Torbjorn Granlund <tege at gmplib.org>
+ * tune/speed.c (routine): New entries for mpn_mu_div_qr and
+ mpn_mupi_div_qr. Allow .r parameter for mpn_sbpi1_div_qr,
+ mpn_dcpi1_div_qr.
+ * tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV, SPEED_ROUTINE_MPN_MU_DIV_QR)
+ (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Handle .r parameter.
+
* tests/mpz/t-tdiv.c: Increase operands size again.
* mpn/generic/tdiv_qr.c: Attempt to choose between DC and MU cleverer.
diff -r 2bb813ced168 -r 80e9f7f7b40c tune/speed.c
--- a/tune/speed.c Wed Dec 30 22:48:49 2009 +0100
+++ b/tune/speed.c Wed Dec 30 22:59:34 2009 +0100
@@ -331,8 +331,10 @@
{ "mpn_ni_invertappr", speed_mpn_ni_invertappr },
{ "mpn_binvert", speed_mpn_binvert },
- { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr },
- { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr },
+ { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL},
+ { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL},
+ { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL},
+ { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL},
{ "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q },
{ "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q },
diff -r 2bb813ced168 -r 80e9f7f7b40c tune/speed.h
--- a/tune/speed.h Wed Dec 30 22:48:49 2009 +0100
+++ b/tune/speed.h Wed Dec 30 22:59:34 2009 +0100
@@ -1475,36 +1475,43 @@
mp_ptr dp, tp, ap, qp; \
gmp_pi1_t inv; \
double t; \
+ mp_size_t size1; \
TMP_DECL; \
\
+ size1 = (s->r == 0 ? 2 * s->size : s->r); \
+ \
SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
\
TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \
- \
+ SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2); \
+ \
+ /* we don't fill in dividend completely when size1 > s->size */ \
MPN_COPY (ap, s->xp, s->size); \
- MPN_COPY (ap+s->size, s->xp, s->size); \
+ MPN_COPY (ap + size1 - s->size, s->xp, s->size); \
+ \
+ MPN_COPY (dp, s->yp, s->size); \
\
/* normalize the data */ \
dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
- ap[2*s->size-1] = dp[s->size-1] - 1; \
+ ap[size1 - 1] = dp[s->size - 1] - 1; \
\
invert_pi1 (inv, dp[s->size-1], dp[s->size-2]); \
\
- speed_operand_src (s, ap, 2*s->size); \
- speed_operand_dst (s, tp, 2*s->size); \
+ speed_operand_src (s, ap, size1); \
+ speed_operand_dst (s, tp, size1); \
speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, qp, s->size); \
+ speed_operand_dst (s, qp, size1 - s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
- MPN_COPY (tp, ap, 2*s->size); \
- function (qp, tp, 2*s->size, dp, s->size, INV); \
+ MPN_COPY (tp, ap, size1); \
+ function (qp, tp, size1, dp, s->size, INV); \
} while (--i != 0); \
t = speed_endtime (); \
\
@@ -1556,29 +1563,35 @@
unsigned i; \
mp_ptr dp, tp, qp, rp, scratch; \
double t; \
- mp_size_t itch; \
+ mp_size_t size1, itch; \
TMP_DECL; \
\
+ size1 = (s->r == 0 ? 2 * s->size : s->r); \
+ \
SPEED_RESTRICT_COND (s->size >= 2); \
- \
- itch = itchfn (2 * s->size, s->size, 0); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
+ \
+ itch = itchfn (size1, s->size, 0); \
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
\
+ /* we don't fill in dividend completely when size1 > s->size */ \
MPN_COPY (tp, s->xp, s->size); \
- MPN_COPY (tp+s->size, s->xp, s->size); \
+ MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
+ \
+ MPN_COPY (dp, s->yp, s->size); \
\
/* normalize the data */ \
dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
- tp[2*s->size-1] = dp[s->size-1] - 1; \
- \
- speed_operand_dst (s, qp, s->size); \
+ tp[size1 - 1] = dp[s->size - 1] - 1; \
+ \
+ speed_operand_dst (s, qp, size1 - s->size); \
speed_operand_dst (s, rp, s->size); \
- speed_operand_src (s, tp, 2 * s->size); \
+ speed_operand_src (s, tp, size1); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, scratch, itch); \
speed_cache_fill (s); \
@@ -1586,7 +1599,7 @@
speed_starttime (); \
i = s->reps; \
do { \
- function (qp, rp, tp, 2 * s->size, dp, s->size, scratch); \
+ function (qp, rp, tp, size1, dp, s->size, scratch); \
} while (--i != 0); \
t = speed_endtime (); \
\
@@ -1598,32 +1611,38 @@
unsigned i; \
mp_ptr dp, tp, qp, rp, ip, scratch; \
double t; \
- mp_size_t itch; \
+ mp_size_t size1, itch; \
TMP_DECL; \
\
+ size1 = (s->r == 0 ? 2 * s->size : s->r); \
+ \
SPEED_RESTRICT_COND (s->size >= 2); \
- \
- itch = itchfn (2 * s->size, s->size, 0); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
+ \
+ itch = itchfn (size1, s->size, 0); \
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */ \
\
+ /* we don't fill in dividend completely when size1 > s->size */ \
MPN_COPY (tp, s->xp, s->size); \
- MPN_COPY (tp+s->size, s->xp, s->size); \
+ MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
+ \
+ MPN_COPY (dp, s->yp, s->size); \
\
/* normalize the data */ \
dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
- tp[2*s->size-1] = dp[s->size-1] - 1; \
+ tp[size1 - 1] = dp[s->size-1] - 1; \
\
mpn_invert (ip, dp, s->size, NULL); \
\
- speed_operand_dst (s, qp, s->size); \
+ speed_operand_dst (s, qp, size1 - s->size); \
speed_operand_dst (s, rp, s->size); \
- speed_operand_src (s, tp, 2 * s->size); \
+ speed_operand_src (s, tp, size1); \
speed_operand_src (s, dp, s->size); \
speed_operand_src (s, ip, s->size); \
speed_operand_dst (s, scratch, itch); \
@@ -1632,7 +1651,7 @@
speed_starttime (); \
i = s->reps; \
do { \
- function (qp, rp, tp, 2 * s->size, dp, s->size, ip, s->size, scratch); \
+ function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch); \
} while (--i != 0); \
t = speed_endtime (); \
\
More information about the gmp-commit
mailing list