Tuning of hgcd2 div2

Niels Möller nisse at lysator.liu.se
Fri Jan 24 05:05:15 UTC 2020


Below is an attempt to add automatic tuning of HGCD2_DIV2_METHOD.

On my laptop, method 1 (using div1 in the common case) is 9% faster than
the current default method 2 (bitwise division).

I wonder if it's really useful to support all variants in speed, or if
that could be deleted, to simplify a bit?

Regards,
/Niels

diff -r a615debc4e93 tune/Makefile.am
--- a/tune/Makefile.am	Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/Makefile.am	Fri Jan 24 05:54:15 2020 +0100
@@ -58,7 +58,7 @@
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
   hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c	\
   jacbase1.c jacbase2.c jacbase3.c jacbase4.c				\
-  hgcd2-1.c hgcd2-2.c hgcd2-3.c hgcd2-4.c hgcd2-5.c			\
+  $(MPN_HGCD2_SRCS) \
   mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
   set_strb.c set_strs.c set_strp.c time.c
@@ -151,12 +151,26 @@
   mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c		\
   nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c	\
   toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+MPN_HGCD2_SRCS = \
+	hgcd2_1_1.c hgcd2_2_1.c hgcd2_3_1.c hgcd2_4_1.c hgcd2_5_1.c \
+	hgcd2_1_2.c hgcd2_2_2.c hgcd2_3_2.c hgcd2_4_2.c hgcd2_5_2.c
 
 $(TUNE_MPN_SRCS_BASIC):
 	for i in $(TUNE_MPN_SRCS_BASIC); do \
 	  echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \
 	  echo "#include \"mpn/generic/$$i\"" >>$$i; \
 	done
+$(MPN_HGCD2_SRCS):
+	for i in 1 2 3 4 5; do for j in 1 2 ; do \
+	echo '#include "gmp-impl.h"' > hgcd2_$${i}_$${j}.c; \
+	echo "#undef HGCD2_DIV1_METHOD" >> hgcd2_$${i}_$${j}.c; \
+	echo "#undef HGCD2_DIV2_METHOD" >> hgcd2_$${i}_$${j}.c; \
+	echo "#define HGCD2_DIV1_METHOD $$i" >> hgcd2_$${i}_$${j}.c; \
+	echo "#define HGCD2_DIV2_METHOD $$j" >> hgcd2_$${i}_$${j}.c; \
+	echo "#define __gmpn_hgcd2 mpn_hgcd2_$${i}_$${j}" >> hgcd2_$${i}_$${j}.c; \
+	echo "#define __gmpn_hgcd_mul_matrix1_vector unused_$${i}_$${j}" >> hgcd2_$${i}_$${j}.c; \
+	echo '#include "mpn/generic/hgcd2.c"' >> hgcd2_$${i}_$${j}.c; \
+	done; done
 
 divrem_1.c:
 	echo "#define TUNE_PROGRAM_BUILD 1"                >divrem_1.c
diff -r a615debc4e93 tune/common.c
--- a/tune/common.c	Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/common.c	Fri Jan 24 05:54:15 2020 +0100
@@ -1639,29 +1639,54 @@
   SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2);
 }
 double
-speed_mpn_hgcd2_1 (struct speed_params *s)
+speed_mpn_hgcd2_1_1 (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1);
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1_1);
 }
 double
-speed_mpn_hgcd2_2 (struct speed_params *s)
+speed_mpn_hgcd2_2_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2_1);
+}
+double
+speed_mpn_hgcd2_3_1 (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2);
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3_1);
+}
+double
+speed_mpn_hgcd2_4_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4_1);
 }
 double
-speed_mpn_hgcd2_3 (struct speed_params *s)
+speed_mpn_hgcd2_5_1 (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3);
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5_1);
+}
+double
+speed_mpn_hgcd2_1_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1_2);
 }
 double
-speed_mpn_hgcd2_4 (struct speed_params *s)
+speed_mpn_hgcd2_2_2 (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4);
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2_2);
 }
 double
-speed_mpn_hgcd2_5 (struct speed_params *s)
+speed_mpn_hgcd2_3_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3_2);
+}
+double
+speed_mpn_hgcd2_4_2 (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5);
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4_2);
+}
+double
+speed_mpn_hgcd2_5_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5_2);
 }
 
 double
diff -r a615debc4e93 tune/hgcd2-1.c
--- a/tune/hgcd2-1.c	Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 1.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 1
-#define __gmpn_hgcd2 mpn_hgcd2_1
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_1
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-2.c
--- a/tune/hgcd2-2.c	Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 2.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 2
-#define __gmpn_hgcd2 mpn_hgcd2_2
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_2
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-3.c
--- a/tune/hgcd2-3.c	Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 3.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 3
-#define __gmpn_hgcd2 mpn_hgcd2_3
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-4.c
--- a/tune/hgcd2-4.c	Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 4.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 4
-#define __gmpn_hgcd2 mpn_hgcd2_4
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_4
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-5.c
--- a/tune/hgcd2-5.c	Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 5.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 5
-#define __gmpn_hgcd2 mpn_hgcd2_5
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_5
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/speed.c
--- a/tune/speed.c	Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/speed.c	Fri Jan 24 05:54:15 2020 +0100
@@ -286,11 +286,16 @@
   { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
 
   { "mpn_hgcd2",         speed_mpn_hgcd2, FLAG_NODATA },
-  { "mpn_hgcd2_1",       speed_mpn_hgcd2_1, FLAG_NODATA },
-  { "mpn_hgcd2_2",       speed_mpn_hgcd2_2, FLAG_NODATA },
-  { "mpn_hgcd2_3",       speed_mpn_hgcd2_3, FLAG_NODATA },
-  { "mpn_hgcd2_4",       speed_mpn_hgcd2_4, FLAG_NODATA },
-  { "mpn_hgcd2_5",       speed_mpn_hgcd2_5, FLAG_NODATA },
+  { "mpn_hgcd2_1_1",     speed_mpn_hgcd2_1_1, FLAG_NODATA },
+  { "mpn_hgcd2_2_1",     speed_mpn_hgcd2_2_1, FLAG_NODATA },
+  { "mpn_hgcd2_3_1",     speed_mpn_hgcd2_3_1, FLAG_NODATA },
+  { "mpn_hgcd2_4_1",     speed_mpn_hgcd2_4_1, FLAG_NODATA },
+  { "mpn_hgcd2_5_1",     speed_mpn_hgcd2_5_1, FLAG_NODATA },
+  { "mpn_hgcd2_1_2",     speed_mpn_hgcd2_1_2, FLAG_NODATA },
+  { "mpn_hgcd2_2_2",     speed_mpn_hgcd2_2_2, FLAG_NODATA },
+  { "mpn_hgcd2_3_2",     speed_mpn_hgcd2_3_2, FLAG_NODATA },
+  { "mpn_hgcd2_4_2",     speed_mpn_hgcd2_4_2, FLAG_NODATA },
+  { "mpn_hgcd2_5_2",     speed_mpn_hgcd2_5_2, FLAG_NODATA },
   { "mpn_hgcd",          speed_mpn_hgcd             },
   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
diff -r a615debc4e93 tune/speed.h
--- a/tune/speed.h	Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/speed.h	Fri Jan 24 05:54:15 2020 +0100
@@ -216,11 +216,16 @@
 double speed_mpn_fib2_ui (struct speed_params *);
 double speed_mpn_matrix22_mul (struct speed_params *);
 double speed_mpn_hgcd2 (struct speed_params *);
-double speed_mpn_hgcd2_1 (struct speed_params *);
-double speed_mpn_hgcd2_2 (struct speed_params *);
-double speed_mpn_hgcd2_3 (struct speed_params *);
-double speed_mpn_hgcd2_4 (struct speed_params *);
-double speed_mpn_hgcd2_5 (struct speed_params *);
+double speed_mpn_hgcd2_1_1 (struct speed_params *);
+double speed_mpn_hgcd2_2_1 (struct speed_params *);
+double speed_mpn_hgcd2_3_1 (struct speed_params *);
+double speed_mpn_hgcd2_4_1 (struct speed_params *);
+double speed_mpn_hgcd2_5_1 (struct speed_params *);
+double speed_mpn_hgcd2_1_2 (struct speed_params *);
+double speed_mpn_hgcd2_2_2 (struct speed_params *);
+double speed_mpn_hgcd2_3_2 (struct speed_params *);
+double speed_mpn_hgcd2_4_2 (struct speed_params *);
+double speed_mpn_hgcd2_5_2 (struct speed_params *);
 double speed_mpn_hgcd (struct speed_params *);
 double speed_mpn_hgcd_lehmer (struct speed_params *);
 double speed_mpn_hgcd_appr (struct speed_params *);
@@ -490,11 +495,16 @@
 int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
 int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
 
-int mpn_hgcd2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_3 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_4 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_5 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_1_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_3_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_4_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_5_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_1_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_3_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_4_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_5_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
 
 mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
 mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
diff -r a615debc4e93 tune/tuneup.c
--- a/tune/tuneup.c	Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/tuneup.c	Fri Jan 24 05:54:15 2020 +0100
@@ -1962,25 +1962,57 @@
 tune_hgcd2 (void)
 {
   static struct param_t  param;
-  hgcd2_func_t *f[5] =
-    { mpn_hgcd2_1,
-      mpn_hgcd2_2,
-      mpn_hgcd2_3,
-      mpn_hgcd2_4,
-      mpn_hgcd2_5 };
-  speed_function_t speed_f[5] =
-    { speed_mpn_hgcd2_1,
-      speed_mpn_hgcd2_2,
-      speed_mpn_hgcd2_3,
-      speed_mpn_hgcd2_4,
-      speed_mpn_hgcd2_5 };
-  int best;
-
+  hgcd2_func_t *f[2][5] =
+    {
+      {
+        mpn_hgcd2_1_1,
+	mpn_hgcd2_2_1,
+	mpn_hgcd2_3_1,
+	mpn_hgcd2_4_1,
+	mpn_hgcd2_5_1,
+      },
+      {
+        mpn_hgcd2_1_2,
+	mpn_hgcd2_2_2,
+	mpn_hgcd2_3_2,
+	mpn_hgcd2_4_2,
+	mpn_hgcd2_5_2,
+      },
+    };
+  speed_function_t speed_f[2][5] =
+    {
+      {
+	speed_mpn_hgcd2_1_1,
+	speed_mpn_hgcd2_2_1,
+	speed_mpn_hgcd2_3_1,
+	speed_mpn_hgcd2_4_1,
+	speed_mpn_hgcd2_5_1
+      },
+      {
+	speed_mpn_hgcd2_1_2,
+	speed_mpn_hgcd2_2_2,
+	speed_mpn_hgcd2_3_2,
+	speed_mpn_hgcd2_4_2,
+	speed_mpn_hgcd2_5_2
+      }
+    };
+  int best1, best2;
+  speed_function_t speed_method2[2];
+
+  /* Select div1 method, always using div2 method 2. */
   s.size = 1;
-  best = one_method (5, speed_f, "mpn_hgcd2", "HGCD2_DIV1_METHOD", &param);
+  best1 = one_method (5, speed_f[1], "mpn_hgcd2 div1", "HGCD2_DIV1_METHOD",
+		      &param);
+
+  /* Select div2 method */
+  speed_method2[0] = speed_f[0][best1];
+  speed_method2[1] = speed_f[1][best1];
+
+  best2 = one_method (2, speed_method2, "mpn_hgcd2 div2", "HGCD2_DIV2_METHOD",
+		      &param);
 
   /* Use selected function when tuning hgcd and gcd */
-  hgcd2_func = f[best];
+  hgcd2_func = f[best2][best1];
 }
 
 void

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.



More information about the gmp-devel mailing list