[Gmp-commit] /var/hg/gmp: For hgcd2, add a div1 function handling q <= 7 spec...

mercurial at gmplib.org mercurial at gmplib.org
Thu Sep 5 19:10:29 UTC 2019


details:   /var/hg/gmp/rev/beaa1373af16
changeset: 17865:beaa1373af16
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Thu Sep 05 21:10:05 2019 +0200
description:
For hgcd2, add a div1 function handling q <= 7 specially.

* mpn/generic/hgcd2.c (div1): Return both r and q as a
mp_double_limb_t, replacing the DIV1 macro.
(div1) [HGCD2_METHOD == 3]: New implementation handling q <= 7
specially and without branches. Based on Torbj?rn's mail to the
gmp-devel list.
* tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add
corresponding speed support.
* tune/hgcd2-3.c: New file.
* tune/tuneup.c (print_define_with_speedup): New function, to
output a comment with speedup compared to next-best method.
(tune_hgcd2): Update tuning.

diffstat:

 ChangeLog           |  14 ++++++++
 mpn/generic/hgcd2.c |  91 ++++++++++++++++++++++++++++++++++++++++------------
 tune/Makefile.am    |   2 +-
 tune/common.c       |   5 ++
 tune/hgcd2-3.c      |  39 ++++++++++++++++++++++
 tune/speed.c        |   1 +
 tune/speed.h        |   3 +
 tune/tuneup.c       |  50 +++++++++++++++++++++++-----
 8 files changed, 172 insertions(+), 33 deletions(-)

diffs (truncated from 337 to 300 lines):

diff -r e1d151705ddf -r beaa1373af16 ChangeLog
--- a/ChangeLog	Wed Sep 04 21:46:41 2019 +0200
+++ b/ChangeLog	Thu Sep 05 21:10:05 2019 +0200
@@ -1,3 +1,17 @@
+2019-09-05  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/generic/hgcd2.c (div1): Return both r and q as a
+	mp_double_limb_t, replacing the DIV1 macro.
+	(div1) [HGCD2_METHOD == 3]: New implementation handling q <= 7
+	specially and without branches. Based on Torbjörn's mail to the
+	gmp-devel list.
+	* tune/speed.c, tune/speed.h, tune/common.c, tune/Makefile.am: Add
+	corresponding speed support.
+	* tune/hgcd2-3.c: New file.
+	* tune/tuneup.c (print_define_with_speedup): New function, to
+	output a comment with speedup compared to next-best method.
+	(tune_hgcd2): Update tuning.
+
 2019-09-04  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/generic/hgcd2.c (HGCD2_METHOD): New parameter.
diff -r e1d151705ddf -r beaa1373af16 mpn/generic/hgcd2.c
--- a/mpn/generic/hgcd2.c	Wed Sep 04 21:46:41 2019 +0200
+++ b/mpn/generic/hgcd2.c	Thu Sep 05 21:10:05 2019 +0200
@@ -36,32 +36,40 @@
 #include "longlong.h"
 
 #ifndef HGCD2_METHOD
-#define HGCD2_METHOD 2
+#define HGCD2_METHOD 3
 #endif
 
 #if GMP_NAIL_BITS != 0
 #error Nails not implemented
 #endif
 
+/* Single-limb division optimized for small quotients. Returned value
+   holds d0 = r, d1 = q */
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0);
+
 #if HGCD2_METHOD == 1
 
-#define DIV1(q, r, a, b) do { \
-    (q) = (a)/(b);	      \
-    (r) = (a) - (q)*(b);      \
-  } while (0)
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  res.d1 = n0 / d0;
+  res.d0 = n0 - q * d0;
+
+  return res;
+}
 
 #elif HGCD2_METHOD == 2
 
 /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
    the remainder. */
 
-/* Single-limb division optimized for small quotients. */
-static inline mp_limb_t
-div1 (mp_ptr rp,
-      mp_limb_t n0,
-      mp_limb_t d0)
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
 {
-  mp_limb_t q = 0;
+  mp_double_limb_t res;
+  mp_limb_t q;
 
   if ((mp_limb_signed_t) n0 < 0)
     {
@@ -105,14 +113,49 @@
 	  cnt--;
 	}
     }
-  *rp = n0;
-  return q;
+  res.d0 = n0;
+  res.d1 = q;
+  return res;
 }
-#define DIV1(q, r, a, b) do {			\
-    mp_limb_t __div1_r;				\
-    (q) = div1 (&__div1_r, a, b);		\
-    (r) = __div1_r;				\
-  } while (0)
+
+#elif HGCD2_METHOD == 3
+
+static inline mp_double_limb_t
+div1 (mp_limb_t n0, mp_limb_t d0)
+{
+  mp_double_limb_t res;
+  mp_limb_t q;
+  if (UNLIKELY ((d0 >> (GMP_LIMB_BITS - 3)) != 0)
+      || UNLIKELY (n0 >= (d0 << 3)))
+    {
+      res.d1 = n0 / d0;
+      res.d0 = n0 - res.d1 * d0;
+    }
+  else
+    {
+      mp_limb_t q, mask;
+
+      d0 <<= 2;
+
+      mask = -(mp_limb_t) (n0 >= d0);
+      q = 4 & mask;
+      n0 -= d0 & mask;
+
+      d0 >>= 1;
+      mask = -(mp_limb_t) (n0 >= d0);
+      q += 2 & mask;
+      n0 -= d0 & mask;
+
+      d0 >>= 1;
+      mask = -(mp_limb_t) (n0 >= d0);
+      q += 1 & mask;
+      n0 -= d0 & mask;
+
+      res.d0 = n0;
+      res.d1 = q;
+    }
+  return res;
+}
 #else
 #error Unknown HGCD2_METHOD
 #endif
@@ -374,8 +417,10 @@
 	}
       else
 	{
-	  mp_limb_t q;
-	  DIV1 (q, ah, ah, bh);
+	  mp_double_limb_t rq = div1 (ah, bh);
+	  mp_limb_t q = rq.d1;
+	  ah = rq.d0;
+
 	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
 	    {
 	      /* A is too small, but q is correct. */
@@ -402,8 +447,10 @@
 	}
       else
 	{
-	  mp_limb_t q;
-	  DIV1 (q, bh, bh, ah);
+	  mp_double_limb_t rq = div1 (bh, ah);
+	  mp_limb_t q = rq.d1;
+	  bh = rq.d0;
+
 	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
 	    {
 	      /* B is too small, but q is correct. */
diff -r e1d151705ddf -r beaa1373af16 tune/Makefile.am
--- a/tune/Makefile.am	Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/Makefile.am	Thu Sep 05 21:10:05 2019 +0200
@@ -58,7 +58,7 @@
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
   hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c	\
   jacbase1.c jacbase2.c jacbase3.c jacbase4.c				\
-  hgcd2-1.c hgcd2-2.c							\
+  hgcd2-1.c hgcd2-2.c hgcd2-3.c						\
   mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
   set_strb.c set_strs.c set_strp.c time.c
diff -r e1d151705ddf -r beaa1373af16 tune/common.c
--- a/tune/common.c	Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/common.c	Thu Sep 05 21:10:05 2019 +0200
@@ -1648,6 +1648,11 @@
 {
   SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2);
 }
+double
+speed_mpn_hgcd2_3 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3);
+}
 
 double
 speed_mpn_hgcd (struct speed_params *s)
diff -r e1d151705ddf -r beaa1373af16 tune/hgcd2-3.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd2-3.c	Thu Sep 05 21:10:05 2019 +0200
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 3.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_METHOD
+#define HGCD2_METHOD 3
+#define __gmpn_hgcd2 mpn_hgcd2_3
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3
+
+#include "mpn/generic/hgcd2.c"
diff -r e1d151705ddf -r beaa1373af16 tune/speed.c
--- a/tune/speed.c	Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/speed.c	Thu Sep 05 21:10:05 2019 +0200
@@ -288,6 +288,7 @@
   { "mpn_hgcd2",         speed_mpn_hgcd2, FLAG_NODATA },
   { "mpn_hgcd2_1",       speed_mpn_hgcd2_1, FLAG_NODATA },
   { "mpn_hgcd2_2",       speed_mpn_hgcd2_2, FLAG_NODATA },
+  { "mpn_hgcd2_3",       speed_mpn_hgcd2_3, FLAG_NODATA },
   { "mpn_hgcd",          speed_mpn_hgcd             },
   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
diff -r e1d151705ddf -r beaa1373af16 tune/speed.h
--- a/tune/speed.h	Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/speed.h	Thu Sep 05 21:10:05 2019 +0200
@@ -217,6 +217,7 @@
 double speed_mpn_hgcd2 (struct speed_params *);
 double speed_mpn_hgcd2_1 (struct speed_params *);
 double speed_mpn_hgcd2_2 (struct speed_params *);
+double speed_mpn_hgcd2_3 (struct speed_params *);
 double speed_mpn_hgcd (struct speed_params *);
 double speed_mpn_hgcd_lehmer (struct speed_params *);
 double speed_mpn_hgcd_appr (struct speed_params *);
@@ -487,6 +488,8 @@
 		 struct hgcd_matrix1 *M);
 int mpn_hgcd2_2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
 		 struct hgcd_matrix1 *M);
+int mpn_hgcd2_3 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+		 struct hgcd_matrix1 *M);
 
 mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
 mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
diff -r e1d151705ddf -r beaa1373af16 tune/tuneup.c
--- a/tune/tuneup.c	Wed Sep 04 21:46:41 2019 +0200
+++ b/tune/tuneup.c	Thu Sep 05 21:10:05 2019 +0200
@@ -518,6 +518,15 @@
   print_define_end_remark (name, value, remark);
 }
 
+void
+print_define_with_margin (const char *name, mp_size_t value,
+			  mp_size_t runner_up, double speedup)
+{
+  char buf[100];
+  snprintf (buf, sizeof(buf), "%.2f%% faster than %ld",
+	    100.0 * (speedup - 1), runner_up);
+  print_define_remark (name, value, buf);
+}
 
 void
 one (mp_size_t *threshold, struct param_t *param)
@@ -1902,26 +1911,47 @@
 tune_hgcd2 (void)
 {
   static struct param_t  param;
-  double   t1, t2;
+  double   t[3+1];
   int      method;
+  int      runner_up_method;
+  double   runner_up_ratio;
 
   s.size = 1;
-  t1 = tuneup_measure (speed_mpn_hgcd2_1, &param, &s);
+  t[1] = tuneup_measure (speed_mpn_hgcd2_1, &param, &s);
+  if (option_trace >= 1)
+    printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t[1]);
+
+  t[2] = tuneup_measure (speed_mpn_hgcd2_2, &param, &s);
   if (option_trace >= 1)
-    printf ("size=%ld, mpn_hgcd2_1 %.9f\n", (long) s.size, t1);
-
-  t2 = tuneup_measure (speed_mpn_hgcd2_2, &param, &s);


More information about the gmp-commit mailing list