[Gmp-commit] /home/hgfiles/gmp: Improvements to division speed measuring.

mercurial at gmplib.org mercurial at gmplib.org
Wed Dec 30 22:59:37 CET 2009


details:   /home/hgfiles/gmp/rev/80e9f7f7b40c
changeset: 13273:80e9f7f7b40c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Dec 30 22:59:34 2009 +0100
description:
Improvements to division speed measuring.

diffstat:

 ChangeLog    |   6 ++++
 tune/speed.c |   6 ++-
 tune/speed.h |  83 ++++++++++++++++++++++++++++++++++++-----------------------
 3 files changed, 61 insertions(+), 34 deletions(-)

diffs (201 lines):

diff -r 2bb813ced168 -r 80e9f7f7b40c ChangeLog
--- a/ChangeLog	Wed Dec 30 22:48:49 2009 +0100
+++ b/ChangeLog	Wed Dec 30 22:59:34 2009 +0100
@@ -1,5 +1,11 @@
 2009-12-30  Torbjorn Granlund  <tege at gmplib.org>
 
+	* tune/speed.c (routine): New entries for mpn_mu_div_qr and
+	mpn_mupi_div_qr.  Allow .r parameter for mpn_sbpi1_div_qr,
+	mpn_dcpi1_div_qr.
+	* tune/speed.h (SPEED_ROUTINE_MPN_PI1_DIV, SPEED_ROUTINE_MPN_MU_DIV_QR)
+	(SPEED_ROUTINE_MPN_MUPI_DIV_QR): Handle .r parameter.
+
 	* tests/mpz/t-tdiv.c: Increase operands size again.
 
 	* mpn/generic/tdiv_qr.c: Attempt to choose between DC and MU cleverer.
diff -r 2bb813ced168 -r 80e9f7f7b40c tune/speed.c
--- a/tune/speed.c	Wed Dec 30 22:48:49 2009 +0100
+++ b/tune/speed.c	Wed Dec 30 22:59:34 2009 +0100
@@ -331,8 +331,10 @@
   { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
   { "mpn_binvert",             speed_mpn_binvert             },
 
-  { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr        },
-  { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr        },
+  { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
+  { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
+  { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
+  { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
   { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q     },
   { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q     },
 
diff -r 2bb813ced168 -r 80e9f7f7b40c tune/speed.h
--- a/tune/speed.h	Wed Dec 30 22:48:49 2009 +0100
+++ b/tune/speed.h	Wed Dec 30 22:59:34 2009 +0100
@@ -1475,36 +1475,43 @@
     mp_ptr     dp, tp, ap, qp;						\
     gmp_pi1_t  inv;							\
     double     t;							\
+    mp_size_t size1;							\
     TMP_DECL;								\
 									\
+    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
+									\
     SPEED_RESTRICT_COND (s->size >= 1);					\
+    SPEED_RESTRICT_COND (size1 >= s->size);				\
 									\
     TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp);			\
+    SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp);			\
     SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2);		\
-									\
+    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2);			\
+									\
+    /* we don't fill in dividend completely when size1 > s->size */	\
     MPN_COPY (ap,         s->xp, s->size);				\
-    MPN_COPY (ap+s->size, s->xp, s->size);				\
+    MPN_COPY (ap + size1 - s->size, s->xp, s->size);			\
+									\
+    MPN_COPY (dp,         s->yp, s->size);				\
 									\
     /* normalize the data */						\
     dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-    ap[2*s->size-1] = dp[s->size-1] - 1;				\
+    ap[size1 - 1] = dp[s->size - 1] - 1;				\
 									\
     invert_pi1 (inv, dp[s->size-1], dp[s->size-2]);			\
 									\
-    speed_operand_src (s, ap, 2*s->size);				\
-    speed_operand_dst (s, tp, 2*s->size);				\
+    speed_operand_src (s, ap, size1);					\
+    speed_operand_dst (s, tp, size1);					\
     speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, qp, s->size);					\
+    speed_operand_dst (s, qp, size1 - s->size);				\
     speed_cache_fill (s);						\
 									\
     speed_starttime ();							\
     i = s->reps;							\
     do {								\
-      MPN_COPY (tp, ap, 2*s->size);					\
-      function (qp, tp, 2*s->size, dp, s->size, INV);			\
+      MPN_COPY (tp, ap, size1);						\
+      function (qp, tp, size1, dp, s->size, INV);			\
     } while (--i != 0);							\
     t = speed_endtime ();						\
 									\
@@ -1556,29 +1563,35 @@
     unsigned   i;							\
     mp_ptr     dp, tp, qp, rp, scratch;					\
     double     t;							\
-    mp_size_t itch;							\
+    mp_size_t size1, itch;						\
     TMP_DECL;								\
 									\
+    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
+									\
     SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    itch = itchfn (2 * s->size, s->size, 0);				\
+    SPEED_RESTRICT_COND (size1 >= s->size);				\
+									\
+    itch = itchfn (size1, s->size, 0);					\
     TMP_MARK;								\
     SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);		\
+    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);			\
     SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
     SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
 									\
+    /* we don't fill in dividend completely when size1 > s->size */	\
     MPN_COPY (tp,         s->xp, s->size);				\
-    MPN_COPY (tp+s->size, s->xp, s->size);				\
+    MPN_COPY (tp + size1 - s->size, s->xp, s->size);			\
+									\
+    MPN_COPY (dp,         s->yp, s->size);				\
 									\
     /* normalize the data */						\
     dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-    tp[2*s->size-1] = dp[s->size-1] - 1;				\
-									\
-    speed_operand_dst (s, qp, s->size);					\
+    tp[size1 - 1] = dp[s->size - 1] - 1;				\
+									\
+    speed_operand_dst (s, qp, size1 - s->size);				\
     speed_operand_dst (s, rp, s->size);					\
-    speed_operand_src (s, tp, 2 * s->size);				\
+    speed_operand_src (s, tp, size1);					\
     speed_operand_src (s, dp, s->size);					\
     speed_operand_dst (s, scratch, itch);				\
     speed_cache_fill (s);						\
@@ -1586,7 +1599,7 @@
     speed_starttime ();							\
     i = s->reps;							\
     do {								\
-      function (qp, rp, tp, 2 * s->size, dp, s->size, scratch);		\
+      function (qp, rp, tp, size1, dp, s->size, scratch);		\
     } while (--i != 0);							\
     t = speed_endtime ();						\
 									\
@@ -1598,32 +1611,38 @@
     unsigned   i;							\
     mp_ptr     dp, tp, qp, rp, ip, scratch;				\
     double     t;							\
-    mp_size_t itch;							\
+    mp_size_t size1, itch;						\
     TMP_DECL;								\
 									\
+    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
+									\
     SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    itch = itchfn (2 * s->size, s->size, 0);				\
+    SPEED_RESTRICT_COND (size1 >= s->size);				\
+									\
+    itch = itchfn (size1, s->size, 0);					\
     TMP_MARK;								\
     SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);		\
+    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);			\
     SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
     SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
     SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */	\
 									\
+    /* we don't fill in dividend completely when size1 > s->size */	\
     MPN_COPY (tp,         s->xp, s->size);				\
-    MPN_COPY (tp+s->size, s->xp, s->size);				\
+    MPN_COPY (tp + size1 - s->size, s->xp, s->size);			\
+									\
+    MPN_COPY (dp,         s->yp, s->size);				\
 									\
     /* normalize the data */						\
     dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-    tp[2*s->size-1] = dp[s->size-1] - 1;				\
+    tp[size1 - 1] = dp[s->size-1] - 1;					\
 									\
     mpn_invert (ip, dp, s->size, NULL);					\
 									\
-    speed_operand_dst (s, qp, s->size);					\
+    speed_operand_dst (s, qp, size1 - s->size);				\
     speed_operand_dst (s, rp, s->size);					\
-    speed_operand_src (s, tp, 2 * s->size);				\
+    speed_operand_src (s, tp, size1);					\
     speed_operand_src (s, dp, s->size);					\
     speed_operand_src (s, ip, s->size);					\
     speed_operand_dst (s, scratch, itch);				\
@@ -1632,7 +1651,7 @@
     speed_starttime ();							\
     i = s->reps;							\
     do {								\
-      function (qp, rp, tp, 2 * s->size, dp, s->size, ip, s->size, scratch); \
+      function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch);	\
     } while (--i != 0);							\
     t = speed_endtime ();						\
 									\


More information about the gmp-commit mailing list