[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Oct 24 23:39:18 CEST 2013
details: /var/hg/gmp/rev/59f01a51bd44
changeset: 16079:59f01a51bd44
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Oct 24 23:36:13 2013 +0200
description:
Add initial div_qr_1n_pi2 and div_qr_1u_pi2.
details: /var/hg/gmp/rev/04cd0d8d3f36
changeset: 16080:04cd0d8d3f36
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Oct 24 23:36:38 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 5 +
mpn/generic/div_qr_1n_pi2.c | 184 +++++++++++++++++++++++++++++++++++++
mpn/generic/div_qr_1u_pi2.c | 217 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 406 insertions(+), 0 deletions(-)
diffs (truncated from 421 to 300 lines):
diff -r 2b7cdc0258ca -r 04cd0d8d3f36 ChangeLog
--- a/ChangeLog Thu Oct 24 19:29:33 2013 +0200
+++ b/ChangeLog Thu Oct 24 23:36:38 2013 +0200
@@ -1,3 +1,8 @@
+2013-10-24 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/div_qr_1u_pi2.c: New file.
+ * mpn/generic/div_qr_1n_pi2.c: New file.
+
2013-10-24 Niels Möller <nisse at lysator.liu.se>
* mpn/x86_64/div_qr_1n_pi1.asm: Bugfixes, for case n == 1 and
diff -r 2b7cdc0258ca -r 04cd0d8d3f36 mpn/generic/div_qr_1n_pi2.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_1n_pi2.c Thu Oct 24 23:36:38 2013 +0200
@@ -0,0 +1,184 @@
+/* mpn_div_qr_1u_pi2.
+
+ THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS
+ ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/* ISSUES:
+
+ * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+ * Are there any problems with generating n quotient limbs in the q area? It
+ surely simplifies things.
+
+ * Not yet adequately tested.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
+ carry-out into an additional sum operand.
+*/
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((USItype)(s2)), \
+ "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (s2) += __c1 + (__s1 < __c0); \
+ } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+ mp_limb_t dip[2];
+ mp_limb_t d;
+ int norm_cnt;
+};
+
+mp_limb_t
+mpn_div_qr_1n_pi2 (mp_ptr qp,
+ mp_srcptr up, mp_size_t un,
+ struct precomp_div_1_pi2 *pd)
+{
+ mp_limb_t most_significant_q_limb;
+ mp_size_t i;
+ mp_limb_t r, u2, u1, u0;
+ mp_limb_t d0, di1, di0;
+ mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
+ mp_limb_t cnd;
+
+ ASSERT (un >= 2);
+ ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
+ ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
+ ASSERT_MPN (up, un);
+
+#define q3 q3a
+#define q2 q2b
+#define q1 q1b
+
+ up += un - 3;
+ r = up[2];
+ d0 = pd->d;
+
+ most_significant_q_limb = (r >= d0);
+ r -= d0 & -most_significant_q_limb;
+
+ qp += un - 3;
+ qp[2] = most_significant_q_limb;
+
+ di1 = pd->dip[1];
+ di0 = pd->dip[0];
+
+ for (i = un - 3; i >= 0; i -= 2)
+ {
+ u2 = r;
+ u1 = up[1];
+ u0 = up[0];
+
+ /* Dividend in {r,u1,u0} */
+
+ umul_ppmm (q1d,q0d, u1, di0);
+ umul_ppmm (q2b,q1b, u1, di1);
+ q2b++; /* cannot spill */
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
+
+ umul_ppmm (q2c,q1c, u2, di0);
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
+ umul_ppmm (q3a,q2a, u2, di1);
+
+ add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
+
+ q3 += r;
+
+ r = u0 - q2 * d0;
+
+ cnd = (r >= q1);
+ r += d0 & -cnd;
+ sub_ddmmss (q3,q2, q3,q2, 0,cnd);
+
+ if (UNLIKELY (r >= d0))
+ {
+ r -= d0;
+ add_ssaaaa (q3,q2, q3,q2, 0,1);
+ }
+
+ qp[0] = q2;
+ qp[1] = q3;
+
+ up -= 2;
+ qp -= 2;
+ }
+
+ if ((un & 1) == 0)
+ {
+ u2 = r;
+ u1 = up[1];
+
+ udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
+ qp[1] = q3;
+ }
+
+ return r;
+
+#undef q3
+#undef q2
+#undef q1
+}
diff -r 2b7cdc0258ca -r 04cd0d8d3f36 mpn/generic/div_qr_1u_pi2.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_1u_pi2.c Thu Oct 24 23:36:38 2013 +0200
@@ -0,0 +1,217 @@
+/* mpn_div_qr_1u_pi2.
+
+ THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS
+ ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/* ISSUES:
+
+ * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
+
+ * Are there any problems with generating n quotient limbs in the q area? It
+ surely simplifies things.
+
+ * Not yet adequately tested.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
+ carry-out into an additional sum operand.
+*/
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((USItype)(s2)), \
+ "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (s2) += __c1 + (__s1 < __c0); \
+ } while (0)
+#endif
+
+struct precomp_div_1_pi2
+{
+ mp_limb_t dip[2];
+ mp_limb_t d;
+ int norm_cnt;
+};
+
+mp_limb_t
More information about the gmp-commit
mailing list