Tuning of hgcd2 div2
Niels Möller
nisse at lysator.liu.se
Fri Jan 24 05:05:15 UTC 2020
Below is an attempt to add automatic tuning of HGCD2_DIV2_METHOD.
On my laptop, method 1 (using div1 in the common case) is 9% faster than
the current default method 2 (bitwise division).
I wonder if it's really useful to support all variants in speed, or if
that could be deleted, to simplify a bit?
Regards,
/Niels
diff -r a615debc4e93 tune/Makefile.am
--- a/tune/Makefile.am Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/Makefile.am Fri Jan 24 05:54:15 2020 +0100
@@ -58,7 +58,7 @@
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
- hgcd2-1.c hgcd2-2.c hgcd2-3.c hgcd2-4.c hgcd2-5.c \
+ $(MPN_HGCD2_SRCS) \
mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
@@ -151,12 +151,26 @@
mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c \
nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+MPN_HGCD2_SRCS = \
+ hgcd2_1_1.c hgcd2_2_1.c hgcd2_3_1.c hgcd2_4_1.c hgcd2_5_1.c \
+ hgcd2_1_2.c hgcd2_2_2.c hgcd2_3_2.c hgcd2_4_2.c hgcd2_5_2.c
$(TUNE_MPN_SRCS_BASIC):
for i in $(TUNE_MPN_SRCS_BASIC); do \
echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \
echo "#include \"mpn/generic/$$i\"" >>$$i; \
done
+$(MPN_HGCD2_SRCS):
+ for i in 1 2 3 4 5; do for j in 1 2 ; do \
+ echo '#include "gmp-impl.h"' > hgcd2_$${i}_$${j}.c; \
+ echo "#undef HGCD2_DIV1_METHOD" >> hgcd2_$${i}_$${j}.c; \
+ echo "#undef HGCD2_DIV2_METHOD" >> hgcd2_$${i}_$${j}.c; \
+ echo "#define HGCD2_DIV1_METHOD $$i" >> hgcd2_$${i}_$${j}.c; \
+ echo "#define HGCD2_DIV2_METHOD $$j" >> hgcd2_$${i}_$${j}.c; \
+ echo "#define __gmpn_hgcd2 mpn_hgcd2_$${i}_$${j}" >> hgcd2_$${i}_$${j}.c; \
+ echo "#define __gmpn_hgcd_mul_matrix1_vector unused_$${i}_$${j}" >> hgcd2_$${i}_$${j}.c; \
+ echo '#include "mpn/generic/hgcd2.c"' >> hgcd2_$${i}_$${j}.c; \
+ done; done
divrem_1.c:
echo "#define TUNE_PROGRAM_BUILD 1" >divrem_1.c
diff -r a615debc4e93 tune/common.c
--- a/tune/common.c Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/common.c Fri Jan 24 05:54:15 2020 +0100
@@ -1639,29 +1639,54 @@
SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2);
}
double
-speed_mpn_hgcd2_1 (struct speed_params *s)
+speed_mpn_hgcd2_1_1 (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1);
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1_1);
}
double
-speed_mpn_hgcd2_2 (struct speed_params *s)
+speed_mpn_hgcd2_2_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2_1);
+}
+double
+speed_mpn_hgcd2_3_1 (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2);
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3_1);
+}
+double
+speed_mpn_hgcd2_4_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4_1);
}
double
-speed_mpn_hgcd2_3 (struct speed_params *s)
+speed_mpn_hgcd2_5_1 (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3);
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5_1);
+}
+double
+speed_mpn_hgcd2_1_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1_2);
}
double
-speed_mpn_hgcd2_4 (struct speed_params *s)
+speed_mpn_hgcd2_2_2 (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4);
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2_2);
}
double
-speed_mpn_hgcd2_5 (struct speed_params *s)
+speed_mpn_hgcd2_3_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3_2);
+}
+double
+speed_mpn_hgcd2_4_2 (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5);
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4_2);
+}
+double
+speed_mpn_hgcd2_5_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5_2);
}
double
diff -r a615debc4e93 tune/hgcd2-1.c
--- a/tune/hgcd2-1.c Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 1.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 1
-#define __gmpn_hgcd2 mpn_hgcd2_1
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_1
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-2.c
--- a/tune/hgcd2-2.c Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 2.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 2
-#define __gmpn_hgcd2 mpn_hgcd2_2
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_2
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-3.c
--- a/tune/hgcd2-3.c Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 3.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 3
-#define __gmpn_hgcd2 mpn_hgcd2_3
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-4.c
--- a/tune/hgcd2-4.c Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 4.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 4
-#define __gmpn_hgcd2 mpn_hgcd2_4
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_4
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/hgcd2-5.c
--- a/tune/hgcd2-5.c Fri Jan 24 04:38:57 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/* mpn/generic/hgcd2.c method 5.
-
-Copyright 2019 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp-impl.h"
-
-#undef HGCD2_DIV1_METHOD
-#define HGCD2_DIV1_METHOD 5
-#define __gmpn_hgcd2 mpn_hgcd2_5
-/* Not used, but renamed to not get duplicate definitions */
-#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_5
-
-#include "mpn/generic/hgcd2.c"
diff -r a615debc4e93 tune/speed.c
--- a/tune/speed.c Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/speed.c Fri Jan 24 05:54:15 2020 +0100
@@ -286,11 +286,16 @@
{ "mpn_matrix22_mul", speed_mpn_matrix22_mul },
{ "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA },
- { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA },
- { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA },
- { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA },
- { "mpn_hgcd2_4", speed_mpn_hgcd2_4, FLAG_NODATA },
- { "mpn_hgcd2_5", speed_mpn_hgcd2_5, FLAG_NODATA },
+ { "mpn_hgcd2_1_1", speed_mpn_hgcd2_1_1, FLAG_NODATA },
+ { "mpn_hgcd2_2_1", speed_mpn_hgcd2_2_1, FLAG_NODATA },
+ { "mpn_hgcd2_3_1", speed_mpn_hgcd2_3_1, FLAG_NODATA },
+ { "mpn_hgcd2_4_1", speed_mpn_hgcd2_4_1, FLAG_NODATA },
+ { "mpn_hgcd2_5_1", speed_mpn_hgcd2_5_1, FLAG_NODATA },
+ { "mpn_hgcd2_1_2", speed_mpn_hgcd2_1_2, FLAG_NODATA },
+ { "mpn_hgcd2_2_2", speed_mpn_hgcd2_2_2, FLAG_NODATA },
+ { "mpn_hgcd2_3_2", speed_mpn_hgcd2_3_2, FLAG_NODATA },
+ { "mpn_hgcd2_4_2", speed_mpn_hgcd2_4_2, FLAG_NODATA },
+ { "mpn_hgcd2_5_2", speed_mpn_hgcd2_5_2, FLAG_NODATA },
{ "mpn_hgcd", speed_mpn_hgcd },
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
{ "mpn_hgcd_appr", speed_mpn_hgcd_appr },
diff -r a615debc4e93 tune/speed.h
--- a/tune/speed.h Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/speed.h Fri Jan 24 05:54:15 2020 +0100
@@ -216,11 +216,16 @@
double speed_mpn_fib2_ui (struct speed_params *);
double speed_mpn_matrix22_mul (struct speed_params *);
double speed_mpn_hgcd2 (struct speed_params *);
-double speed_mpn_hgcd2_1 (struct speed_params *);
-double speed_mpn_hgcd2_2 (struct speed_params *);
-double speed_mpn_hgcd2_3 (struct speed_params *);
-double speed_mpn_hgcd2_4 (struct speed_params *);
-double speed_mpn_hgcd2_5 (struct speed_params *);
+double speed_mpn_hgcd2_1_1 (struct speed_params *);
+double speed_mpn_hgcd2_2_1 (struct speed_params *);
+double speed_mpn_hgcd2_3_1 (struct speed_params *);
+double speed_mpn_hgcd2_4_1 (struct speed_params *);
+double speed_mpn_hgcd2_5_1 (struct speed_params *);
+double speed_mpn_hgcd2_1_2 (struct speed_params *);
+double speed_mpn_hgcd2_2_2 (struct speed_params *);
+double speed_mpn_hgcd2_3_2 (struct speed_params *);
+double speed_mpn_hgcd2_4_2 (struct speed_params *);
+double speed_mpn_hgcd2_5_2 (struct speed_params *);
double speed_mpn_hgcd (struct speed_params *);
double speed_mpn_hgcd_lehmer (struct speed_params *);
double speed_mpn_hgcd_appr (struct speed_params *);
@@ -490,11 +495,16 @@
int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
-int mpn_hgcd2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_3 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_4 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
-int mpn_hgcd2_5 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_1_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_3_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_4_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_5_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_1_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_3_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_4_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_5_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
diff -r a615debc4e93 tune/tuneup.c
--- a/tune/tuneup.c Fri Jan 24 04:38:57 2020 +0100
+++ b/tune/tuneup.c Fri Jan 24 05:54:15 2020 +0100
@@ -1962,25 +1962,57 @@
tune_hgcd2 (void)
{
static struct param_t param;
- hgcd2_func_t *f[5] =
- { mpn_hgcd2_1,
- mpn_hgcd2_2,
- mpn_hgcd2_3,
- mpn_hgcd2_4,
- mpn_hgcd2_5 };
- speed_function_t speed_f[5] =
- { speed_mpn_hgcd2_1,
- speed_mpn_hgcd2_2,
- speed_mpn_hgcd2_3,
- speed_mpn_hgcd2_4,
- speed_mpn_hgcd2_5 };
- int best;
-
+ hgcd2_func_t *f[2][5] =
+ {
+ {
+ mpn_hgcd2_1_1,
+ mpn_hgcd2_2_1,
+ mpn_hgcd2_3_1,
+ mpn_hgcd2_4_1,
+ mpn_hgcd2_5_1,
+ },
+ {
+ mpn_hgcd2_1_2,
+ mpn_hgcd2_2_2,
+ mpn_hgcd2_3_2,
+ mpn_hgcd2_4_2,
+ mpn_hgcd2_5_2,
+ },
+ };
+ speed_function_t speed_f[2][5] =
+ {
+ {
+ speed_mpn_hgcd2_1_1,
+ speed_mpn_hgcd2_2_1,
+ speed_mpn_hgcd2_3_1,
+ speed_mpn_hgcd2_4_1,
+ speed_mpn_hgcd2_5_1
+ },
+ {
+ speed_mpn_hgcd2_1_2,
+ speed_mpn_hgcd2_2_2,
+ speed_mpn_hgcd2_3_2,
+ speed_mpn_hgcd2_4_2,
+ speed_mpn_hgcd2_5_2
+ }
+ };
+ int best1, best2;
+ speed_function_t speed_method2[2];
+
+ /* Select div1 method, always using div2 method 2. */
s.size = 1;
- best = one_method (5, speed_f, "mpn_hgcd2", "HGCD2_DIV1_METHOD", ¶m);
+ best1 = one_method (5, speed_f[1], "mpn_hgcd2 div1", "HGCD2_DIV1_METHOD",
+ ¶m);
+
+ /* Select div2 method */
+ speed_method2[0] = speed_f[0][best1];
+ speed_method2[1] = speed_f[1][best1];
+
+ best2 = one_method (2, speed_method2, "mpn_hgcd2 div2", "HGCD2_DIV2_METHOD",
+ ¶m);
/* Use selected function when tuning hgcd and gcd */
- hgcd2_func = f[best];
+ hgcd2_func = f[best2][best1];
}
void
--
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list