[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Feb 1 21:15:53 CET 2011
details: /home/hgfiles/gmp/rev/984d11fb3acc
changeset: 13814:984d11fb3acc
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Feb 01 11:13:42 2011 +0100
description:
Set up 32-bit x86 paths for new corei* CPU strings.
details: /home/hgfiles/gmp/rev/a8457c08cf20
changeset: 13815:a8457c08cf20
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Feb 01 21:15:50 2011 +0100
description:
Various changes to tune dir code.
diffstat:
ChangeLog | 13 +++++++++
configure.in | 4 +-
tune/common.c | 84 ++--------------------------------------------------------
tune/speed.h | 52 +++++++++++++++++++++++++++++++++++-
4 files changed, 69 insertions(+), 84 deletions(-)
diffs (216 lines):
diff -r e91f940edcd0 -r a8457c08cf20 ChangeLog
--- a/ChangeLog Tue Feb 01 10:17:33 2011 +0100
+++ b/ChangeLog Tue Feb 01 21:15:50 2011 +0100
@@ -1,3 +1,16 @@
+2011-02-01 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/speed.h (struct speed_params): Allow for 4 dst operands.
+ * tune/common.c (TOLERANCE): Increase from 0.5% to 1%.
+
+ * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_CALL): New macro, mainly based
+ on old speed_mpn_hgcd, but with speed_operand_src calls (as suggested
+ by Niels).
+ * tune/common.c (speed_mpn_hgcd): Invoke SPEED_ROUTINE_MPN_HGCD_CALL.
+ (speed_mpn_hgcd_lehmer): Likewise.
+
+ * configure.in: Set up 32-bit x86 paths for new corei* CPU strings.
+
2011-01-31 Torbjorn Granlund <tege at gmplib.org>
* config.guess: Support 'coreinhm' and 'coreisbr'.
diff -r e91f940edcd0 -r a8457c08cf20 configure.in
--- a/configure.in Tue Feb 01 10:17:33 2011 +0100
+++ b/configure.in Tue Feb 01 21:15:50 2011 +0100
@@ -1450,7 +1450,7 @@
gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
;;
- core2 | corei*)
+ core2 | corei | coreinhm | coreiwsm | coreisbr)
gcc_cflags_cpu="-mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
;;
@@ -1472,7 +1472,7 @@
i686 | pentiumpro) path="x86/p6 x86" ;;
pentium2) path="x86/p6/mmx x86/p6 x86" ;;
pentium3) path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86";;
- pentiumm | core2 | corei)
+ pentiumm | core2 | corei | coreinhm | coreiwsm | coreisbr)
path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86";;
[k6[23]]) path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86" ;;
k6) path="x86/k6/mmx x86/k6 x86" ;;
diff -r e91f940edcd0 -r a8457c08cf20 tune/common.c
--- a/tune/common.c Tue Feb 01 10:17:33 2011 +0100
+++ b/tune/common.c Tue Feb 01 21:15:50 2011 +0100
@@ -125,7 +125,7 @@
speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)),
struct speed_params *s)
{
-#define TOLERANCE 1.005 /* 0.5% */
+#define TOLERANCE 1.01 /* 1% */
const int max_zeros = 10;
struct speed_params s_dummy;
@@ -1364,91 +1364,13 @@
double
speed_mpn_hgcd (struct speed_params *s)
{
- mp_ptr wp;
- mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
- mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size);
- mp_ptr ap;
- mp_ptr bp;
- mp_ptr tmp1;
-
- struct hgcd_matrix hgcd;
- int res;
- unsigned i;
- double t;
- TMP_DECL;
-
- if (s->size < 2)
- return -1;
-
- TMP_MARK;
-
- SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
-
- s->xp[s->size - 1] |= 1;
- s->yp[s->size - 1] |= 1;
-
- SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (bp, s->yp, s->size);
- mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
- res = mpn_hgcd (ap, bp, s->size, &hgcd, wp);
- }
- while (--i != 0);
- t = speed_endtime ();
- TMP_FREE;
- return t;
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
}
double
speed_mpn_hgcd_lehmer (struct speed_params *s)
{
- mp_ptr wp;
- mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
- mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size);
- mp_ptr ap;
- mp_ptr bp;
- mp_ptr tmp1;
-
- struct hgcd_matrix hgcd;
- int res;
- unsigned i;
- double t;
- TMP_DECL;
-
- if (s->size < 2)
- return -1;
-
- TMP_MARK;
-
- SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
-
- s->xp[s->size - 1] |= 1;
- s->yp[s->size - 1] |= 1;
-
- SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (bp, s->yp, s->size);
- mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
- res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp);
- }
- while (--i != 0);
- t = speed_endtime ();
- TMP_FREE;
- return t;
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, MPN_HGCD_LEHMER_ITCH);
}
double
diff -r e91f940edcd0 -r a8457c08cf20 tune/speed.h
--- a/tune/speed.h Tue Feb 01 10:17:33 2011 +0100
+++ b/tune/speed.h Tue Feb 01 21:15:50 2011 +0100
@@ -117,7 +117,7 @@
struct {
mp_ptr ptr;
mp_size_t size;
- } src[3], dst[3];
+ } src[3], dst[4];
};
typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
@@ -2434,6 +2434,56 @@
function (px[j-1], py[j-1], 0))
+#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc) \
+ { \
+ mp_size_t hgcd_init_itch, hgcd_itch; \
+ mp_ptr ap, bp, wp, tmp1; \
+ struct hgcd_matrix hgcd; \
+ int res; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ if (s->size < 2) \
+ return -1; \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
+ \
+ s->xp[s->size - 1] |= 1; \
+ s->yp[s->size - 1] |= 1; \
+ \
+ hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
+ hgcd_itch = itchfunc (s->size); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, ap, s->size + 1); \
+ speed_operand_dst (s, bp, s->size + 1); \
+ speed_operand_dst (s, wp, hgcd_itch); \
+ speed_operand_dst (s, tmp1, hgcd_itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (bp, s->yp, s->size); \
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
+ res = func (ap, bp, s->size, &hgcd, wp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ TMP_FREE; \
+ return t; \
+ }
+
/* Run some GCDs of s->size limbs each. The number of different data values
is decreased as s->size**2, since GCD is a quadratic algorithm.
SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
More information about the gmp-commit
mailing list