[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Feb 1 21:15:53 CET 2011


details:   /home/hgfiles/gmp/rev/984d11fb3acc
changeset: 13814:984d11fb3acc
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Feb 01 11:13:42 2011 +0100
description:
Set up 32-bit x86 paths for new corei* CPU strings.

details:   /home/hgfiles/gmp/rev/a8457c08cf20
changeset: 13815:a8457c08cf20
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Feb 01 21:15:50 2011 +0100
description:
Various changes to tune dir code.

diffstat:

 ChangeLog     |  13 +++++++++
 configure.in  |   4 +-
 tune/common.c |  84 ++--------------------------------------------------------
 tune/speed.h  |  52 +++++++++++++++++++++++++++++++++++-
 4 files changed, 69 insertions(+), 84 deletions(-)

diffs (216 lines):

diff -r e91f940edcd0 -r a8457c08cf20 ChangeLog
--- a/ChangeLog	Tue Feb 01 10:17:33 2011 +0100
+++ b/ChangeLog	Tue Feb 01 21:15:50 2011 +0100
@@ -1,3 +1,16 @@
+2011-02-01  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/speed.h (struct speed_params): Allow for 4 dst operands.
+	* tune/common.c (TOLERANCE): Increase from 0.5% to 1%.
+
+	* tune/speed.h (SPEED_ROUTINE_MPN_HGCD_CALL): New macro, mainly based
+	on old speed_mpn_hgcd, but with speed_operand_src calls (as suggested
+	by Niels).
+	* tune/common.c	(speed_mpn_hgcd): Invoke SPEED_ROUTINE_MPN_HGCD_CALL.
+	(speed_mpn_hgcd_lehmer): Likewise.
+
+	* configure.in: Set up 32-bit x86 paths for new corei* CPU strings.
+
 2011-01-31  Torbjorn Granlund  <tege at gmplib.org>
 
 	* config.guess: Support 'coreinhm' and 'coreisbr'.
diff -r e91f940edcd0 -r a8457c08cf20 configure.in
--- a/configure.in	Tue Feb 01 10:17:33 2011 +0100
+++ b/configure.in	Tue Feb 01 21:15:50 2011 +0100
@@ -1450,7 +1450,7 @@
         gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
         gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
         ;;
-      core2 | corei*)
+      core2 | corei | coreinhm | coreiwsm | coreisbr)
         gcc_cflags_cpu="-mtune=core2 -mtune=k8"
         gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
         ;;
@@ -1472,7 +1472,7 @@
       i686 | pentiumpro)    path="x86/p6 x86" ;;
       pentium2)             path="x86/p6/mmx x86/p6 x86" ;;
       pentium3)             path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86";;
-      pentiumm | core2 | corei)
+      pentiumm | core2 | corei | coreinhm | coreiwsm | coreisbr)
                             path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86";;
       [k6[23]])             path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86" ;;
       k6)                   path="x86/k6/mmx x86/k6 x86" ;;
diff -r e91f940edcd0 -r a8457c08cf20 tune/common.c
--- a/tune/common.c	Tue Feb 01 10:17:33 2011 +0100
+++ b/tune/common.c	Tue Feb 01 21:15:50 2011 +0100
@@ -125,7 +125,7 @@
 speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)),
 	       struct speed_params *s)
 {
-#define TOLERANCE    1.005  /* 0.5% */
+#define TOLERANCE    1.01  /* 1% */
   const int max_zeros = 10;
 
   struct speed_params  s_dummy;
@@ -1364,91 +1364,13 @@
 double
 speed_mpn_hgcd (struct speed_params *s)
 {
-  mp_ptr wp;
-  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
-  mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size);
-  mp_ptr ap;
-  mp_ptr bp;
-  mp_ptr tmp1;
-
-  struct hgcd_matrix hgcd;
-  int res;
-  unsigned i;
-  double t;
-  TMP_DECL;
-
-  if (s->size < 2)
-    return -1;
-
-  TMP_MARK;
-
-  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
-  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
-
-  s->xp[s->size - 1] |= 1;
-  s->yp[s->size - 1] |= 1;
-
-  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
-  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
-
-  speed_starttime ();
-  i = s->reps;
-  do
-    {
-      MPN_COPY (ap, s->xp, s->size);
-      MPN_COPY (bp, s->yp, s->size);
-      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
-      res = mpn_hgcd (ap, bp, s->size, &hgcd, wp);
-    }
-  while (--i != 0);
-  t = speed_endtime ();
-  TMP_FREE;
-  return t;
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
 }
 
 double
 speed_mpn_hgcd_lehmer (struct speed_params *s)
 {
-  mp_ptr wp;
-  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
-  mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size);
-  mp_ptr ap;
-  mp_ptr bp;
-  mp_ptr tmp1;
-
-  struct hgcd_matrix hgcd;
-  int res;
-  unsigned i;
-  double t;
-  TMP_DECL;
-
-  if (s->size < 2)
-    return -1;
-
-  TMP_MARK;
-
-  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
-  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
-
-  s->xp[s->size - 1] |= 1;
-  s->yp[s->size - 1] |= 1;
-
-  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
-  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
-
-  speed_starttime ();
-  i = s->reps;
-  do
-    {
-      MPN_COPY (ap, s->xp, s->size);
-      MPN_COPY (bp, s->yp, s->size);
-      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
-      res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp);
-    }
-  while (--i != 0);
-  t = speed_endtime ();
-  TMP_FREE;
-  return t;
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, MPN_HGCD_LEHMER_ITCH);
 }
 
 double
diff -r e91f940edcd0 -r a8457c08cf20 tune/speed.h
--- a/tune/speed.h	Tue Feb 01 10:17:33 2011 +0100
+++ b/tune/speed.h	Tue Feb 01 21:15:50 2011 +0100
@@ -117,7 +117,7 @@
   struct {
     mp_ptr    ptr;
     mp_size_t size;
-  } src[3], dst[3];
+  } src[3], dst[4];
 };
 
 typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
@@ -2434,6 +2434,56 @@
      function (px[j-1], py[j-1], 0))
 
 
+#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc)			\
+  {									\
+    mp_size_t hgcd_init_itch, hgcd_itch;				\
+    mp_ptr ap, bp, wp, tmp1;						\
+    struct hgcd_matrix hgcd;						\
+    int res;								\
+    unsigned i;								\
+    double t;								\
+    TMP_DECL;								\
+    									\
+    if (s->size < 2)							\
+      return -1;							\
+    									\
+    TMP_MARK;								\
+    									\
+    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);		\
+    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);		\
+    									\
+    s->xp[s->size - 1] |= 1;						\
+    s->yp[s->size - 1] |= 1;						\
+    									\
+    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
+    hgcd_itch = itchfunc (s->size);					\
+    									\
+    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp);			\
+    									\
+    speed_operand_src (s, s->xp, s->size);				\
+    speed_operand_src (s, s->yp, s->size);				\
+    speed_operand_dst (s, ap, s->size + 1);				\
+    speed_operand_dst (s, bp, s->size + 1);				\
+    speed_operand_dst (s, wp, hgcd_itch);				\
+    speed_operand_dst (s, tmp1, hgcd_itch);				\
+    speed_cache_fill (s);						\
+    									\
+    speed_starttime ();							\
+    i = s->reps;							\
+    do									\
+      {									\
+	MPN_COPY (ap, s->xp, s->size);					\
+	MPN_COPY (bp, s->yp, s->size);					\
+	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
+	res = func (ap, bp, s->size, &hgcd, wp);			\
+      }									\
+    while (--i != 0);							\
+    t = speed_endtime ();						\
+    TMP_FREE;								\
+    return t;								\
+  }
+
 /* Run some GCDs of s->size limbs each.  The number of different data values
    is decreased as s->size**2, since GCD is a quadratic algorithm.
    SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT


More information about the gmp-commit mailing list