speed enhancement
Paul Zimmermann
Paul.Zimmermann at loria.fr
Tue Sep 28 10:30:10 CEST 2010
Hi,
I suggest to add mpn_sqrt and mpn_root to the "speed" program, to measure
the mpn_sqrtrem and mpn_rootrem functions with NULL 2nd argument. Patch below.
I realized that mpn_rootrem(2) with NULL 2nd argument is faster than
mpn_sqrtrem with NULL 2nd argument!
Maybe mpn_sqrtrem can be improved in that case, or simply call mpn_rootrem(2)?
tarte% ./speed -s 1000 mpn_sqrt mpn_sqrtrem mpn_root.2 mpn_rootrem.2
overhead 0.000000002 secs, precision 10000 units of 3.53e-10 secs, CPU freq 2833.00 MHz
mpn_sqrt mpn_sqrtrem mpn_root.2 mpn_rootrem.2
1000 0.000154164 0.000154746 #0.000145061 0.000225465
Paul Zimmermann
--- common.c.orig 2010-09-28 10:08:16.000000000 +0200
+++ common.c 2010-09-28 10:10:36.000000000 +0200
@@ -1495,11 +1495,22 @@
}
double
+speed_mpn_sqrt (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQRT (mpn_sqrtrem);
+}
+
+double
speed_mpn_rootrem (struct speed_params *s)
{
SPEED_ROUTINE_MPN_ROOTREM (mpn_rootrem);
}
+double
+speed_mpn_root (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_ROOT (mpn_rootrem);
+}
double
speed_mpz_fac_ui (struct speed_params *s)
--- speed.c.orig 2010-09-28 10:12:38.000000000 +0200
+++ speed.c 2010-09-28 10:13:01.000000000 +0200
@@ -347,7 +347,9 @@
{ "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL },
{ "mpn_sqrtrem", speed_mpn_sqrtrem },
+ { "mpn_sqrt", speed_mpn_sqrt },
{ "mpn_rootrem", speed_mpn_rootrem, FLAG_R },
+ { "mpn_root", speed_mpn_root, FLAG_R },
{ "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA },
{ "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA },
--- speed.h.orig 2010-09-28 10:07:03.000000000 +0200
+++ speed.h 2010-09-28 10:11:10.000000000 +0200
@@ -277,7 +277,9 @@
double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sqr __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqrt __GMP_PROTO ((struct speed_params *s));
double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_root __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
double speed_mpn_sublsh2_n __GMP_PROTO ((struct speed_params *s));
@@ -2751,6 +2753,33 @@
return t; \
}
+#define SPEED_ROUTINE_MPN_SQRT(function) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, NULL, s->xp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
#define SPEED_ROUTINE_MPN_ROOTREM(function) \
{ \
mp_ptr wp, wp2; \
@@ -2781,6 +2810,34 @@
}
+#define SPEED_ROUTINE_MPN_ROOT(function) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, NULL, s->xp, s->size, s->r); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
/* s->size controls the number of limbs in the input, s->r is the base, or
decimal by default. */
#define SPEED_ROUTINE_MPN_GET_STR(function) \
More information about the gmp-devel
mailing list