[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Oct 20 16:36:31 CEST 2013
details: /var/hg/gmp/rev/177b28dd8ae7
changeset: 16057:177b28dd8ae7
user: Niels Möller <nisse at lysator.liu.se>
date: Sun Oct 20 08:00:13 2013 +0200
description:
New function mpn_div_qr_1.
details: /var/hg/gmp/rev/e365e0526242
changeset: 16058:e365e0526242
user: Niels Möller <nisse at lysator.liu.se>
date: Sun Oct 20 08:15:42 2013 +0200
description:
speed support for mpn_div_qr_1.
details: /var/hg/gmp/rev/066e1b19ff4f
changeset: 16059:066e1b19ff4f
user: Niels Möller <nisse at lysator.liu.se>
date: Sun Oct 20 16:34:09 2013 +0200
description:
Implemented tuning of mpn_div_qr_1.
diffstat:
ChangeLog | 38 ++++++
configure.ac | 1 +
gmp-h.in | 3 +
gmp-impl.h | 15 ++
mpn/generic/div_qr_1.c | 116 +++++++++++++++++++
mpn/generic/div_qr_1n_pi1.c | 266 ++++++++++++++++++++++++++++++++++++++++++++
tests/mpn/t-div.c | 10 +
tune/Makefile.am | 1 +
tune/common.c | 22 +++
tune/div_qr_1_tune.c | 34 +++++
tune/div_qr_1n_pi1_1.c | 28 ++++
tune/div_qr_1n_pi1_2.c | 28 ++++
tune/speed.c | 5 +
tune/speed.h | 72 +++++++++++
tune/tuneup.c | 65 ++++++++++-
15 files changed, 700 insertions(+), 4 deletions(-)
diffs (truncated from 877 to 300 lines):
diff -r e90e979ce2f7 -r 066e1b19ff4f ChangeLog
--- a/ChangeLog Thu Oct 17 17:19:19 2013 +0200
+++ b/ChangeLog Sun Oct 20 16:34:09 2013 +0200
@@ -1,3 +1,41 @@
+2013-10-20 Niels Möller <nisse at lysator.liu.se>
+
+ * tune/common.c (speed_mpn_div_qr_1): New function, replacing...
+ (speed_mpn_div_qr_1n, speed_mpn_div_qr_1u): ... deleted functions
+ (speed_mpn_div_qr_1n_pi1, speed_mpn_div_qr_1n_pi1_1)
+ (speed_mpn_div_qr_1n_pi1_2): New functions.
+ * gmp-impl.h [TUNE_PROGRAM_BUILD]: Declare div_qr_1-related tuning
+ variables.
+ * tune/tuneup.c (speed_mpn_div_qr_1_tune, tune_div_qr_1): New
+ functions.
+ (div_qr_1n_pi1_method, div_qr_1_norm_threshold)
+ (div_qr_1_unnorm_threshold): New globals.
+ * tune/speed.c (routine): Replaced mpn_div_qr_1n and mpn_div_qr_1u
+ by mpn_div_qr_1, requiring ".r" parameter. Added mpn_div_qr_1n_pi1
+ and variants.
+ * tune/speed.h (SPEED_ROUTINE_MPN_DIV_QR_1): Use the "r" parameter
+ as divisor.
+ * tune/div_qr_1n_pi1_2.c: New file.
+ * tune/div_qr_1n_pi1_1.c: New file.
+ * tune/div_qr_1_tune.c: New file.
+ * tune/Makefile.am (libspeed_la_SOURCES): Added div_qr_1n_pi1_1.c,
+ div_qr_1n_pi1_2.c, and div_qr_1_tune.c.
+
+ * tune/speed.c (routine): Added mpn_div_qr_1n and mpn_div_qr_1u.
+ * tune/speed.h (SPEED_ROUTINE_MPN_DIV_QR_1): New macro.
+ (speed_mpn_div_qr_1n, speed_mpn_div_qr_1u): Declare.
+ * tune/common.c (speed_mpn_div_qr_1n, speed_mpn_div_qr_1u): New
+ functions.
+
+ * gmp-impl.h (mpn_div_qr_1n_pi1): Declare function.
+ * gmp-h.in (mpn_div_qr_1): Declare function.
+ * configure.ac (gmp_mpn_functions): Added div_qr_1 and
+ div_qr_1n_pi1.
+ * mpn/generic/div_qr_1.c (mpn_div_qr_1): New file and function.
+ * mpn/generic/div_qr_1n_pi1.c (mpn_div_qr_1n_pi1): New file and
+ function.
+ * tests/mpn/t-div.c (main): Test mpn_div_qr_1.
+
2013-10-17 Torbjorn Granlund <tege at gmplib.org>
* configure.ac (alpha): Pass -mieee via gcc_cflags_maybe.
diff -r e90e979ce2f7 -r 066e1b19ff4f configure.ac
--- a/configure.ac Thu Oct 17 17:19:19 2013 +0200
+++ b/configure.ac Sun Oct 20 16:34:09 2013 +0200
@@ -2821,6 +2821,7 @@
toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts \
toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts \
invertappr invert binvert mulmod_bnm1 sqrmod_bnm1 \
+ div_qr_1 div_qr_1n_pi1 \
div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1 \
sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \
dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q \
diff -r e90e979ce2f7 -r 066e1b19ff4f gmp-h.in
--- a/gmp-h.in Thu Oct 17 17:19:19 2013 +0200
+++ b/gmp-h.in Sun Oct 20 16:34:09 2013 +0200
@@ -1491,6 +1491,9 @@
#define mpn_divrem_2 __MPN(divrem_2)
__GMP_DECLSPEC mp_limb_t mpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+#define mpn_div_qr_1 __MPN(div_qr_1)
+__GMP_DECLSPEC mp_limb_t mpn_div_qr_1 (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
+
#define mpn_div_qr_2 __MPN(div_qr_2)
__GMP_DECLSPEC mp_limb_t mpn_div_qr_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
diff -r e90e979ce2f7 -r 066e1b19ff4f gmp-impl.h
--- a/gmp-impl.h Thu Oct 17 17:19:19 2013 +0200
+++ b/gmp-impl.h Sun Oct 20 16:34:09 2013 +0200
@@ -1416,6 +1416,9 @@
#define mpn_fft_next_size __MPN(fft_next_size)
__GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
+#define mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1)
+ __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
#define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
__GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
@@ -4904,6 +4907,18 @@
#define GCDEXT_DC_THRESHOLD gcdext_dc_threshold
extern mp_size_t gcdext_dc_threshold;
+#undef DIV_QR_1N_PI1_METHOD
+#define DIV_QR_1N_PI1_METHOD div_qr_1n_pi1_method
+extern int div_qr_1n_pi1_method;
+
+#undef DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD div_qr_1_norm_threshold
+extern mp_size_t div_qr_1_norm_threshold;
+
+#undef DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD div_qr_1_unnorm_threshold
+extern mp_size_t div_qr_1_unnorm_threshold;
+
#undef DIVREM_1_NORM_THRESHOLD
#define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
extern mp_size_t divrem_1_norm_threshold;
diff -r e90e979ce2f7 -r 066e1b19ff4f mpn/generic/div_qr_1.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_1.c Sun Oct 20 16:34:09 2013 +0200
@@ -0,0 +1,116 @@
+/* mpn_div_qr_1 -- mpn by limb division.
+
+ Contributed to the GNU project by Niels Möller and Torbjörn Granlund
+
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* FIXME: Add proper tuning */
+#ifndef DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD 3
+#endif
+#ifndef DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD 3
+#endif
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+/* Divides {up, n} by d. Writes the n-1 low quotient limbs at {qp,
+ * n-1}, and the high quote limb at *qh. Returns remainder. */
+mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_limb_t *qh, mp_srcptr up, mp_size_t n,
+ mp_limb_t d)
+{
+ unsigned cnt;
+ mp_limb_t uh;
+
+ ASSERT (n > 0);
+ ASSERT (d > 0);
+
+ if (d & GMP_NUMB_HIGHBIT)
+ {
+ /* Normalized case */
+ mp_limb_t dinv, q;
+
+ uh = up[--n];
+
+ q = (uh >= d);
+ *qh = q;
+ uh -= (-q) & d;
+
+ if (BELOW_THRESHOLD (n, DIV_QR_1_NORM_THRESHOLD))
+ {
+ cnt = 0;
+ plain:
+ while (n > 0)
+ {
+ mp_limb_t ul = up[--n];
+ udiv_qrnnd (qp[n], uh, uh, ul, d);
+ }
+ return uh >> cnt;
+ }
+ invert_limb (dinv, d);
+ return mpn_div_qr_1n_pi1 (qp, up, n, uh, d, dinv);
+ }
+ else
+ {
+ /* Unnormalized case */
+ mp_limb_t dinv, ul;
+
+ if (! UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+ {
+ uh = up[--n];
+ udiv_qrnnd (*qh, uh, 0, uh, d);
+ cnt = 0;
+ goto plain;
+ }
+
+ count_leading_zeros (cnt, d);
+ d <<= cnt;
+
+#if HAVE_NATIVE_div_qr_1u_pi1
+ /* FIXME: Call loop doing on-the-fly normalization */
+#endif
+
+ /* Shift up front, use qp area for shifted copy. A bit messy,
+ since we have only n-1 limbs available, and shift the high
+ limb manually. */
+ uh = up[--n];
+ ul = (uh << cnt) | mpn_lshift (qp, up, n, cnt);
+ uh >>= (GMP_LIMB_BITS - cnt);
+
+ if (UDIV_NEEDS_NORMALIZATION
+ && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+ {
+ udiv_qrnnd (*qh, uh, uh, ul, d);
+ up = qp;
+ goto plain;
+ }
+ invert_limb (dinv, d);
+
+ udiv_qrnnd_preinv (*qh, uh, uh, ul, d, dinv);
+ return mpn_div_qr_1n_pi1 (qp, qp, n, uh, d, dinv) >> cnt;
+ }
+}
diff -r e90e979ce2f7 -r 066e1b19ff4f mpn/generic/div_qr_1n_pi1.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_1n_pi1.c Sun Oct 20 16:34:09 2013 +0200
@@ -0,0 +1,266 @@
+/* mpn_div_qr_1n_pi1
+
+ Contributed to the GNU project by Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE.
+
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+#ifndef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 2
+#endif
+
+/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
+
+#if defined (__GNUC__)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %k2\n\t" \
+ "adc %4, %k1\n\t" \
+ "sbb %k0, %k0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %q2\n\t" \
+ "adc %4, %q1\n\t" \
+ "sbb %q0, %q0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addxcc %r3, %4, %1\n\t" \
+ "subx %%g0, %%g0, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
More information about the gmp-commit
mailing list