[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Oct 20 16:36:31 CEST 2013


details:   /var/hg/gmp/rev/177b28dd8ae7
changeset: 16057:177b28dd8ae7
user:      Niels Möller <nisse at lysator.liu.se>
date:      Sun Oct 20 08:00:13 2013 +0200
description:
New function mpn_div_qr_1.

details:   /var/hg/gmp/rev/e365e0526242
changeset: 16058:e365e0526242
user:      Niels Möller <nisse at lysator.liu.se>
date:      Sun Oct 20 08:15:42 2013 +0200
description:
speed support for mpn_div_qr_1.

details:   /var/hg/gmp/rev/066e1b19ff4f
changeset: 16059:066e1b19ff4f
user:      Niels Möller <nisse at lysator.liu.se>
date:      Sun Oct 20 16:34:09 2013 +0200
description:
Implemented tuning of mpn_div_qr_1.

diffstat:

 ChangeLog                   |   38 ++++++
 configure.ac                |    1 +
 gmp-h.in                    |    3 +
 gmp-impl.h                  |   15 ++
 mpn/generic/div_qr_1.c      |  116 +++++++++++++++++++
 mpn/generic/div_qr_1n_pi1.c |  266 ++++++++++++++++++++++++++++++++++++++++++++
 tests/mpn/t-div.c           |   10 +
 tune/Makefile.am            |    1 +
 tune/common.c               |   22 +++
 tune/div_qr_1_tune.c        |   34 +++++
 tune/div_qr_1n_pi1_1.c      |   28 ++++
 tune/div_qr_1n_pi1_2.c      |   28 ++++
 tune/speed.c                |    5 +
 tune/speed.h                |   72 +++++++++++
 tune/tuneup.c               |   65 ++++++++++-
 15 files changed, 700 insertions(+), 4 deletions(-)

diffs (truncated from 877 to 300 lines):

diff -r e90e979ce2f7 -r 066e1b19ff4f ChangeLog
--- a/ChangeLog	Thu Oct 17 17:19:19 2013 +0200
+++ b/ChangeLog	Sun Oct 20 16:34:09 2013 +0200
@@ -1,3 +1,41 @@
+2013-10-20  Niels Möller  <nisse at lysator.liu.se>
+
+	* tune/common.c (speed_mpn_div_qr_1): New function, replacing...
+	(speed_mpn_div_qr_1n, speed_mpn_div_qr_1u): ... deleted functions
+	(speed_mpn_div_qr_1n_pi1, speed_mpn_div_qr_1n_pi1_1)
+	(speed_mpn_div_qr_1n_pi1_2): New functions.
+	* gmp-impl.h [TUNE_PROGRAM_BUILD]: Declare div_qr_1-related tuning
+	variables.
+	* tune/tuneup.c (speed_mpn_div_qr_1_tune, tune_div_qr_1): New
+	functions.
+	(div_qr_1n_pi1_method, div_qr_1_norm_threshold)
+	(div_qr_1_unnorm_threshold): New globals.
+	* tune/speed.c (routine): Replaced mpn_div_qr_1n and mpn_div_qr_1u
+	by mpn_div_qr_1, requiring ".r" parameter. Added mpn_div_qr_1n_pi1
+	and variants.
+	* tune/speed.h (SPEED_ROUTINE_MPN_DIV_QR_1): Use the "r" parameter
+	as divisor.
+	* tune/div_qr_1n_pi1_2.c: New file.
+	* tune/div_qr_1n_pi1_1.c: New file.
+	* tune/div_qr_1_tune.c: New file.
+	* tune/Makefile.am (libspeed_la_SOURCES): Added div_qr_1n_pi1_1.c,
+	div_qr_1n_pi1_2.c, and div_qr_1_tune.c.
+
+	* tune/speed.c (routine): Added mpn_div_qr_1n and mpn_div_qr_1u.
+	* tune/speed.h (SPEED_ROUTINE_MPN_DIV_QR_1): New macro.
+	(speed_mpn_div_qr_1n, speed_mpn_div_qr_1u): Declare.
+	* tune/common.c (speed_mpn_div_qr_1n, speed_mpn_div_qr_1u): New
+	functions.
+
+	* gmp-impl.h (mpn_div_qr_1n_pi1): Declare function.
+	* gmp-h.in (mpn_div_qr_1): Declare function.
+	* configure.ac (gmp_mpn_functions): Added div_qr_1 and
+	div_qr_1n_pi1.
+	* mpn/generic/div_qr_1.c (mpn_div_qr_1): New file and function.
+	* mpn/generic/div_qr_1n_pi1.c (mpn_div_qr_1n_pi1): New file and
+	function.
+	* tests/mpn/t-div.c (main): Test mpn_div_qr_1.
+
 2013-10-17  Torbjorn Granlund  <tege at gmplib.org>
 
 	* configure.ac (alpha): Pass -mieee via gcc_cflags_maybe.
diff -r e90e979ce2f7 -r 066e1b19ff4f configure.ac
--- a/configure.ac	Thu Oct 17 17:19:19 2013 +0200
+++ b/configure.ac	Sun Oct 20 16:34:09 2013 +0200
@@ -2821,6 +2821,7 @@
   toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts	   \
   toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts	   \
   invertappr invert binvert mulmod_bnm1 sqrmod_bnm1			   \
+  div_qr_1 div_qr_1n_pi1						   \
   div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1					   \
   sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q				   \
   dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q				   \
diff -r e90e979ce2f7 -r 066e1b19ff4f gmp-h.in
--- a/gmp-h.in	Thu Oct 17 17:19:19 2013 +0200
+++ b/gmp-h.in	Sun Oct 20 16:34:09 2013 +0200
@@ -1491,6 +1491,9 @@
 #define mpn_divrem_2 __MPN(divrem_2)
 __GMP_DECLSPEC mp_limb_t mpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
 
+#define mpn_div_qr_1 __MPN(div_qr_1)
+__GMP_DECLSPEC mp_limb_t mpn_div_qr_1 (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
+
 #define mpn_div_qr_2 __MPN(div_qr_2)
 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
 
diff -r e90e979ce2f7 -r 066e1b19ff4f gmp-impl.h
--- a/gmp-impl.h	Thu Oct 17 17:19:19 2013 +0200
+++ b/gmp-impl.h	Sun Oct 20 16:34:09 2013 +0200
@@ -1416,6 +1416,9 @@
 #define   mpn_fft_next_size __MPN(fft_next_size)
 __GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
 
+#define   mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
 #define   mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
   __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
 
@@ -4904,6 +4907,18 @@
 #define GCDEXT_DC_THRESHOLD		gcdext_dc_threshold
 extern mp_size_t			gcdext_dc_threshold;
 
+#undef  DIV_QR_1N_PI1_METHOD
+#define DIV_QR_1N_PI1_METHOD		div_qr_1n_pi1_method
+extern int				div_qr_1n_pi1_method;
+
+#undef  DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD		div_qr_1_norm_threshold
+extern mp_size_t			div_qr_1_norm_threshold;
+
+#undef  DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD	div_qr_1_unnorm_threshold
+extern mp_size_t			div_qr_1_unnorm_threshold;
+
 #undef  DIVREM_1_NORM_THRESHOLD
 #define DIVREM_1_NORM_THRESHOLD		divrem_1_norm_threshold
 extern mp_size_t			divrem_1_norm_threshold;
diff -r e90e979ce2f7 -r 066e1b19ff4f mpn/generic/div_qr_1.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_1.c	Sun Oct 20 16:34:09 2013 +0200
@@ -0,0 +1,116 @@
+/* mpn_div_qr_1 -- mpn by limb division.
+
+   Contributed to the GNU project by Niels Möller and Torbjörn Granlund
+
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003, 2013 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* FIXME: Add proper tuning */
+#ifndef DIV_QR_1_NORM_THRESHOLD
+#define DIV_QR_1_NORM_THRESHOLD 3
+#endif
+#ifndef DIV_QR_1_UNNORM_THRESHOLD
+#define DIV_QR_1_UNNORM_THRESHOLD 3
+#endif
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+/* Divides {up, n} by d. Writes the n-1 low quotient limbs at {qp,
+ * n-1}, and the high quote limb at *qh. Returns remainder. */
+mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_limb_t *qh, mp_srcptr up, mp_size_t n,
+	      mp_limb_t d)
+{
+  unsigned cnt;
+  mp_limb_t uh;
+
+  ASSERT (n > 0);
+  ASSERT (d > 0);
+
+  if (d & GMP_NUMB_HIGHBIT)
+    {
+      /* Normalized case */
+      mp_limb_t dinv, q;
+
+      uh = up[--n];
+
+      q = (uh >= d);
+      *qh = q;
+      uh -= (-q) & d;
+
+      if (BELOW_THRESHOLD (n, DIV_QR_1_NORM_THRESHOLD))
+	{
+	  cnt = 0;
+	plain:
+	  while (n > 0)
+	    {
+	      mp_limb_t ul = up[--n];
+	      udiv_qrnnd (qp[n], uh, uh, ul, d);
+	    }
+	  return uh >> cnt;
+	}
+      invert_limb (dinv, d);
+      return mpn_div_qr_1n_pi1 (qp, up, n, uh, d, dinv);
+    }
+  else
+    {
+      /* Unnormalized case */
+      mp_limb_t dinv, ul;
+
+      if (! UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+	{
+	  uh = up[--n];
+	  udiv_qrnnd (*qh, uh, 0, uh, d);
+	  cnt = 0;
+	  goto plain;
+	}
+
+      count_leading_zeros (cnt, d);
+      d <<= cnt;
+
+#if HAVE_NATIVE_div_qr_1u_pi1
+      /* FIXME: Call loop doing on-the-fly normalization */
+#endif
+
+      /* Shift up front, use qp area for shifted copy. A bit messy,
+	 since we have only n-1 limbs available, and shift the high
+	 limb manually. */
+      uh = up[--n];
+      ul = (uh << cnt) | mpn_lshift (qp, up, n, cnt);
+      uh >>= (GMP_LIMB_BITS - cnt);
+
+      if (UDIV_NEEDS_NORMALIZATION
+	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
+	{
+	  udiv_qrnnd (*qh, uh, uh, ul, d);
+	  up = qp;
+	  goto plain;
+	}
+      invert_limb (dinv, d);
+
+      udiv_qrnnd_preinv (*qh, uh, uh, ul, d, dinv);
+      return mpn_div_qr_1n_pi1 (qp, qp, n, uh, d, dinv) >> cnt;
+    }
+}
diff -r e90e979ce2f7 -r 066e1b19ff4f mpn/generic/div_qr_1n_pi1.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_1n_pi1.c	Sun Oct 20 16:34:09 2013 +0200
@@ -0,0 +1,266 @@
+/* mpn_div_qr_1n_pi1
+
+   Contributed to the GNU project by Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
+
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nail bits not supported
+#endif
+
+#ifndef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 2
+#endif
+
+/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
+
+#if defined (__GNUC__)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %k2\n\t"					\
+	     "adc	%4, %k1\n\t"					\
+	     "sbb	%k0, %k0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
+	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ (  "add	%6, %q2\n\t"					\
+	     "adc	%4, %q1\n\t"					\
+	     "sbb	%q0, %q0"					\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
+	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
+	     "addxcc	%r3, %4, %1\n\t"				\
+	     "subx	%%g0, %%g0, %0"					\
+	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
+	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
+	 __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\


More information about the gmp-commit mailing list