[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Feb 28 22:22:10 CET 2011


details:   /var/hg/gmp/rev/8cba7be7067e
changeset: 13943:8cba7be7067e
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Mon Feb 28 22:14:43 2011 +0100
description:
New algorithm, configured via MOD_1_1P_METHOD

details:   /var/hg/gmp/rev/505e5f964188
changeset: 13944:505e5f964188
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Mon Feb 28 22:15:49 2011 +0100
description:
Fixed last entry

details:   /var/hg/gmp/rev/2e4de8e0a51f
changeset: 13945:2e4de8e0a51f
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Mon Feb 28 22:21:52 2011 +0100
description:
Added tuning of mod_1_1 alternatives.

diffstat:

 ChangeLog             |   21 ++++++
 mpn/generic/mod_1_1.c |  156 ++++++++++++++++++++++++++++++++++++++++++++++++++
 tune/Makefile.am      |    2 +-
 tune/common.c         |   10 +++
 tune/mod_1_1-1.c      |   28 ++++++++
 tune/mod_1_1-2.c      |   28 ++++++++
 tune/speed.c          |    2 +
 tune/speed.h          |    8 ++
 tune/tuneup.c         |   21 ++++++
 9 files changed, 275 insertions(+), 1 deletions(-)

diffs (truncated from 364 to 300 lines):

diff -r 7af6550aaca0 -r 2e4de8e0a51f ChangeLog
--- a/ChangeLog	Mon Feb 28 21:13:25 2011 +0100
+++ b/ChangeLog	Mon Feb 28 22:21:52 2011 +0100
@@ -1,5 +1,26 @@
 2011-02-28  Niels Möller  <nisse at lysator.liu.se>
 
+	* tune/tuneup.c (tune_mod_1): Measure mpn_mod_1_1_1 and
+	mpn_mod_1_1_2, to set MOD_1_1P_METHOD.
+
+	* tune/speed.c (routine): Added mpn_mod_1_1_1 and mpn_mod_1_1_2.
+
+	* tune/speed.h: Declare speed_mpn_mod_1_1_1, speed_mpn_mod_1_1_2,
+	mpn_mod_1_1p_1, mpn_mod_1_1p_2, mpn_mod_1_1p_cps_1, and
+	mpn_mod_1_1p_cps_2.
+
+	* tune/common.c (speed_mpn_mod_1_1_1): New function.
+	(speed_mpn_mod_1_1_2): New function.
+
+	* tune/Makefile.am (libspeed_la_SOURCES): Added mod_1_1-1.c
+	mod_1_1-2.c.
+
+	* tune/mod_1_1-1.c: New file.
+	* tune/mod_1_1-2.c: New file.
+
+	* mpn/generic/mod_1_1.c: Implemented an algorithm with fewer
+	multiplications, configured via MOD_1_1P_METHOD.
+
 	* mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Simplified
 	computation of B2modb, use B^2 mod (normalized b).
 	(mpn_mod_1_1p): Corresponding changes. Don't shift b.
diff -r 7af6550aaca0 -r 2e4de8e0a51f mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c	Mon Feb 28 21:13:25 2011 +0100
+++ b/mpn/generic/mod_1_1.c	Mon Feb 28 22:21:52 2011 +0100
@@ -29,6 +29,61 @@
 #include "gmp-impl.h"
 #include "longlong.h"
 
+#ifndef MOD_1_1P_METHOD
+# define MOD_1_1P_METHOD 1
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
+ * carry out, in the form of a mask. */
+
+#if defined (__GNUC__)
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_mssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tsbb\t%k0, %k0"               \
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
+	   : "0"  ((USItype)(s2)),                                      \
+	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),                 \
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tsbb\t%q0, %q0"               \
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
+	   : "0"  ((UDItype)(s2)),                                      \
+	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),               \
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+/* FIXME: How to do carry -> mask on powerpc? */
+#if 0 && HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"        \
+	   : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
+	   : "0"  ((UDItype)(s2)),                                      \
+	     "r"  ((UDItype)(a1)), "r" ((UDItype)(b1)),                 \
+	     "%2" ((UDItype)(a0)), "rI" ((UDItype)(b0)))
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
+  do {                                                                  \
+    UWtype __s0, __s1, __c0, __c1;                                      \
+    __s0 = (a0) + (b0);                                                 \
+    __s1 = (a1) + (b1);                                                 \
+    __c0 = __s0 < (a0);                                                 \
+    __c1 = __s1 < (a1);                                                 \
+    (s0) = __s0;                                                        \
+    __s1 = __s1 + __c0;                                                 \
+    (s1) = __s1;                                                        \
+    (s2) = - (__c1 + (__s1 < __c0));					\
+  } while (0)
+#endif
+
+#if MOD_1_1P_METHOD == 1
 void
 mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
 {
@@ -103,3 +158,104 @@
 
   return r >> cnt;
 }
+#endif /* MOD_1_1P_METHOD == 1 */
+
+#if MOD_1_1P_METHOD == 2
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B2modb;
+  int cnt;
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t B1modb = -b;
+      B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+      ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+      cps[2] = B1modb >> cnt;
+    }
+  B2modb = - b * bi;
+  ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+  cps[3] = B2modb;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+{
+  int cnt;
+  mp_limb_t bi, B1modb;
+  mp_limb_t r0, r1;
+  mp_limb_t r;
+
+  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
+
+  r0 = ap[n-2];
+  r1 = ap[n-1];
+
+  if (n > 2)
+    {
+      mp_limb_t B2modb, B2mb;
+      mp_limb_t p0, p1;
+      mp_limb_t r2;
+      mp_size_t j;
+
+      B2modb = bmodb[3];
+      B2mb = B2modb - b;
+
+      umul_ppmm (p1, p0, r1, B2modb);
+      add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
+
+      for (j = n-4; j >= 0; j--)
+	{
+	  mp_limb_t cy;
+	  /* mp_limb_t t = r0 + B2mb; */
+	  umul_ppmm (p1, p0, r1, B2modb);
+
+	  ADDC_LIMB (cy, r0, r0, r2 & B2modb);
+	  /* Alternative, for cmov: if (cy) r0 = t; */
+	  r0 -= (-cy) & b;
+	  add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
+	}
+
+      r1 -= (r2 & b);
+    }
+
+  cnt = bmodb[1];
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t t;
+      mp_limb_t B1modb = bmodb[2];
+
+      umul_ppmm (r1, t, r1, B1modb);
+      r0 += t;
+      r1 += (r0 < t);
+
+      /* Normalize */
+      r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
+      r0 <<= cnt;
+
+      /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows
+	 that. */
+    }
+  else
+    {
+      mp_limb_t mask = - (r1 >= b);
+      r1 -= mask & b;
+    }
+
+  bi = bmodb[0];
+
+  udiv_rnnd_preinv (r, r1, r0, b, bi);
+  return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 2 */
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/Makefile.am
--- a/tune/Makefile.am	Mon Feb 28 21:13:25 2011 +0100
+++ b/tune/Makefile.am	Mon Feb 28 22:21:52 2011 +0100
@@ -44,7 +44,7 @@
   freq.c								\
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
   hgcd_lehmer.c jacbase1.c jacbase2.c jacbase3.c jacbase4.c		\
-  mod_1_div.c mod_1_inv.c modlinv.c					\
+  mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
   set_strb.c set_strs.c set_strp.c time.c
 
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/common.c
--- a/tune/common.c	Mon Feb 28 21:13:25 2011 +0100
+++ b/tune/common.c	Mon Feb 28 22:21:52 2011 +0100
@@ -704,6 +704,16 @@
   SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
 }
 double
+speed_mpn_mod_1_1_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
+}
+double
+speed_mpn_mod_1_1_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
+}
+double
 speed_mpn_mod_1_2 (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/mod_1_1-1.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/mod_1_1-1.c	Mon Feb 28 22:21:52 2011 +0100
@@ -0,0 +1,28 @@
+/* mpn/generic/mod_1_1.c method 1.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 1
+#define __gmpn_mod_1_1p mpn_mod_1_1p_1
+#define __gmpn_mod_1_1p_cps mpn_mod_1_1p_cps_1
+
+#include "mpn/generic/mod_1_1.c"
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/mod_1_1-2.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/mod_1_1-2.c	Mon Feb 28 22:21:52 2011 +0100
@@ -0,0 +1,28 @@
+/* mpn/generic/mod_1_1.c method 2.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 2
+#define __gmpn_mod_1_1p mpn_mod_1_1p_2
+#define __gmpn_mod_1_1p_cps mpn_mod_1_1p_cps_2
+
+#include "mpn/generic/mod_1_1.c"
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/speed.c
--- a/tune/speed.c	Mon Feb 28 21:13:25 2011 +0100
+++ b/tune/speed.c	Mon Feb 28 22:21:52 2011 +0100
@@ -213,6 +213,8 @@


More information about the gmp-commit mailing list