[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Feb 28 22:22:10 CET 2011
details: /var/hg/gmp/rev/8cba7be7067e
changeset: 13943:8cba7be7067e
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 22:14:43 2011 +0100
description:
New algorithm, configured via MOD_1_1P_METHOD
details: /var/hg/gmp/rev/505e5f964188
changeset: 13944:505e5f964188
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 22:15:49 2011 +0100
description:
Fixed last entry
details: /var/hg/gmp/rev/2e4de8e0a51f
changeset: 13945:2e4de8e0a51f
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 22:21:52 2011 +0100
description:
Added tuning of mod_1_1 alternatives.
diffstat:
ChangeLog | 21 ++++++
mpn/generic/mod_1_1.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++
tune/Makefile.am | 2 +-
tune/common.c | 10 +++
tune/mod_1_1-1.c | 28 ++++++++
tune/mod_1_1-2.c | 28 ++++++++
tune/speed.c | 2 +
tune/speed.h | 8 ++
tune/tuneup.c | 21 ++++++
9 files changed, 275 insertions(+), 1 deletions(-)
diffs (truncated from 364 to 300 lines):
diff -r 7af6550aaca0 -r 2e4de8e0a51f ChangeLog
--- a/ChangeLog Mon Feb 28 21:13:25 2011 +0100
+++ b/ChangeLog Mon Feb 28 22:21:52 2011 +0100
@@ -1,5 +1,26 @@
2011-02-28 Niels Möller <nisse at lysator.liu.se>
+ * tune/tuneup.c (tune_mod_1): Measure mpn_mod_1_1_1 and
+ mpn_mod_1_1_2, to set MOD_1_1P_METHOD.
+
+ * tune/speed.c (routine): Added mpn_mod_1_1_1 and mpn_mod_1_1_2.
+
+ * tune/speed.h: Declare speed_mpn_mod_1_1_1, speed_mpn_mod_1_1_2,
+ mpn_mod_1_1p_1, mpn_mod_1_1p_2, mpn_mod_1_1p_cps_1, and
+ mpn_mod_1_1p_cps_2.
+
+ * tune/common.c (speed_mpn_mod_1_1_1): New function.
+ (speed_mpn_mod_1_1_2): New function.
+
+ * tune/Makefile.am (libspeed_la_SOURCES): Added mod_1_1-1.c
+ mod_1_1-2.c.
+
+ * tune/mod_1_1-1.c: New file.
+ * tune/mod_1_1-2.c: New file.
+
+ * mpn/generic/mod_1_1.c: Implemented an algorithm with fewer
+ multiplications, configured via MOD_1_1P_METHOD.
+
* mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Simplified
computation of B2modb, use B^2 mod (normalized b).
(mpn_mod_1_1p): Corresponding changes. Don't shift b.
diff -r 7af6550aaca0 -r 2e4de8e0a51f mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c Mon Feb 28 21:13:25 2011 +0100
+++ b/mpn/generic/mod_1_1.c Mon Feb 28 22:21:52 2011 +0100
@@ -29,6 +29,61 @@
#include "gmp-impl.h"
#include "longlong.h"
+#ifndef MOD_1_1P_METHOD
+# define MOD_1_1P_METHOD 1
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
+ * carry out, in the form of a mask. */
+
+#if defined (__GNUC__)
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_mssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tsbb\t%k0, %k0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((USItype)(s2)), \
+ "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tsbb\t%q0, %q0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+/* FIXME: How to do carry -> mask on powerpc? */
+#if 0 && HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "r" ((UDItype)(a1)), "r" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rI" ((UDItype)(b0)))
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (s2) = - (__c1 + (__s1 < __c0)); \
+ } while (0)
+#endif
+
+#if MOD_1_1P_METHOD == 1
void
mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
{
@@ -103,3 +158,104 @@
return r >> cnt;
}
+#endif /* MOD_1_1P_METHOD == 1 */
+
+#if MOD_1_1P_METHOD == 2
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B2modb;
+ int cnt;
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ if (LIKELY (cnt != 0))
+ {
+ mp_limb_t B1modb = -b;
+ B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+ }
+ B2modb = - b * bi;
+ ASSERT (B2modb <= b); // NB: equality iff b = B/2
+ cps[3] = B2modb;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+{
+ int cnt;
+ mp_limb_t bi, B1modb;
+ mp_limb_t r0, r1;
+ mp_limb_t r;
+
+ ASSERT (n >= 2); /* fix tuneup.c if this is changed */
+
+ r0 = ap[n-2];
+ r1 = ap[n-1];
+
+ if (n > 2)
+ {
+ mp_limb_t B2modb, B2mb;
+ mp_limb_t p0, p1;
+ mp_limb_t r2;
+ mp_size_t j;
+
+ B2modb = bmodb[3];
+ B2mb = B2modb - b;
+
+ umul_ppmm (p1, p0, r1, B2modb);
+ add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
+
+ for (j = n-4; j >= 0; j--)
+ {
+ mp_limb_t cy;
+ /* mp_limb_t t = r0 + B2mb; */
+ umul_ppmm (p1, p0, r1, B2modb);
+
+ ADDC_LIMB (cy, r0, r0, r2 & B2modb);
+ /* Alternative, for cmov: if (cy) r0 = t; */
+ r0 -= (-cy) & b;
+ add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
+ }
+
+ r1 -= (r2 & b);
+ }
+
+ cnt = bmodb[1];
+
+ if (LIKELY (cnt != 0))
+ {
+ mp_limb_t t;
+ mp_limb_t B1modb = bmodb[2];
+
+ umul_ppmm (r1, t, r1, B1modb);
+ r0 += t;
+ r1 += (r0 < t);
+
+ /* Normalize */
+ r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
+ r0 <<= cnt;
+
+ /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows
+ that. */
+ }
+ else
+ {
+ mp_limb_t mask = - (r1 >= b);
+ r1 -= mask & b;
+ }
+
+ bi = bmodb[0];
+
+ udiv_rnnd_preinv (r, r1, r0, b, bi);
+ return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 2 */
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/Makefile.am
--- a/tune/Makefile.am Mon Feb 28 21:13:25 2011 +0100
+++ b/tune/Makefile.am Mon Feb 28 22:21:52 2011 +0100
@@ -44,7 +44,7 @@
freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
hgcd_lehmer.c jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
- mod_1_div.c mod_1_inv.c modlinv.c \
+ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/common.c
--- a/tune/common.c Mon Feb 28 21:13:25 2011 +0100
+++ b/tune/common.c Mon Feb 28 22:21:52 2011 +0100
@@ -704,6 +704,16 @@
SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
}
double
+speed_mpn_mod_1_1_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
+}
+double
+speed_mpn_mod_1_1_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
+}
+double
speed_mpn_mod_1_2 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/mod_1_1-1.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/mod_1_1-1.c Mon Feb 28 22:21:52 2011 +0100
@@ -0,0 +1,28 @@
+/* mpn/generic/mod_1_1.c method 1.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 1
+#define __gmpn_mod_1_1p mpn_mod_1_1p_1
+#define __gmpn_mod_1_1p_cps mpn_mod_1_1p_cps_1
+
+#include "mpn/generic/mod_1_1.c"
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/mod_1_1-2.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/mod_1_1-2.c Mon Feb 28 22:21:52 2011 +0100
@@ -0,0 +1,28 @@
+/* mpn/generic/mod_1_1.c method 2.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 2
+#define __gmpn_mod_1_1p mpn_mod_1_1p_2
+#define __gmpn_mod_1_1p_cps mpn_mod_1_1p_cps_2
+
+#include "mpn/generic/mod_1_1.c"
diff -r 7af6550aaca0 -r 2e4de8e0a51f tune/speed.c
--- a/tune/speed.c Mon Feb 28 21:13:25 2011 +0100
+++ b/tune/speed.c Mon Feb 28 22:21:52 2011 +0100
@@ -213,6 +213,8 @@
More information about the gmp-commit
mailing list