[Gmp-commit] /home/hgfiles/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Jan 8 16:03:09 CET 2011
details: /home/hgfiles/gmp/rev/2b9b4b02d612
changeset: 13734:2b9b4b02d612
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 08 14:11:23 2011 +0100
description:
(gmp_mpn_functions_optional): Add mul_5 and mul_6.
details: /home/hgfiles/gmp/rev/67495936414a
changeset: 13735:67495936414a
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 08 14:28:22 2011 +0100
description:
Declare more addmul_N adn mul_N primitives.
details: /home/hgfiles/gmp/rev/dd03aab308ac
changeset: 13736:dd03aab308ac
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 08 14:46:48 2011 +0100
description:
(refmpn_mul_5, refmpn_mul_6): New functions.
details: /home/hgfiles/gmp/rev/871742722134
changeset: 13737:871742722134
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 08 15:23:23 2011 +0100
description:
Support testing of mpn_mul_5 and mpn_mul_6.
details: /home/hgfiles/gmp/rev/60bb4796a088
changeset: 13738:60bb4796a088
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 08 16:02:45 2011 +0100
description:
Add a comment.
diffstat:
ChangeLog | 15 ++++++++++++++
configure.in | 4 ++-
gmp-impl.h | 6 +++++
mpn/asm-defs.m4 | 6 +++++
mpn/x86_64/sqr_basecase.asm | 11 +++++++--
tests/devel/try.c | 20 ++++++++++++++++++-
tests/refmpn.c | 10 +++++++++
tests/tests.h | 47 ++++++++++++++++----------------------------
8 files changed, 84 insertions(+), 35 deletions(-)
diffs (253 lines):
diff -r 60ee2df20fea -r 60bb4796a088 ChangeLog
--- a/ChangeLog Sat Jan 08 13:05:28 2011 +0100
+++ b/ChangeLog Sat Jan 08 16:02:45 2011 +0100
@@ -1,5 +1,20 @@
2011-01-08 Torbjorn Granlund <tege at gmplib.org>
+ * tests/devel/try.c (types enum): Add TYPE_MUL_5 and TYPE_MUL_6.
+ (param_init): Support new types.
+ (choice_array): Support testing of mpn_mul_5 and mpn_mul_6.
+ (call): Support new routines.
+
+ * tests/refmpn.c (refmpn_mul_5, refmpn_mul_6): New functions.
+ * tests/tests.h (refmpn_mul_5, refmpn_mul_6): Declare.
+ Remove parameter names from some other functions.
+
+ * gmp-impl.h (mpn_mul_5, mpn_mul_6): Declare.
+ * mpn/asm-defs.m4: Likewise, also declare mpn_addmul_5, mpn_addmul_6,
+ mpn_addmul_7, and mpn_addmul_8.
+
+ * configure.in (gmp_mpn_functions_optional): Add mul_5 and mul_6.
+
* tune/speed.c (routine): Add measuring of mpn_mul_5 and mpn_mul_6.
* tune/common.c (speed_mpn_mul_5, speed_mpn_mul_6): New functions.
* tune/speed.h: Declare new functions.
diff -r 60ee2df20fea -r 60bb4796a088 configure.in
--- a/configure.in Sat Jan 08 13:05:28 2011 +0100
+++ b/configure.in Sat Jan 08 16:02:45 2011 +0100
@@ -2500,7 +2500,7 @@
gmp_mpn_functions_optional="umul udiv \
invert_limb sqr_diagonal \
- mul_2 mul_3 mul_4 \
+ mul_2 mul_3 mul_4 mul_5 mul_6 \
addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8 \
addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n \
addlsh2_n sublsh2_n rsblsh2_n \
@@ -3044,6 +3044,8 @@
#undef HAVE_NATIVE_mpn_mul_2
#undef HAVE_NATIVE_mpn_mul_3
#undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_5
+#undef HAVE_NATIVE_mpn_mul_6
#undef HAVE_NATIVE_mpn_mul_basecase
#undef HAVE_NATIVE_mpn_nand_n
#undef HAVE_NATIVE_mpn_nior_n
diff -r 60ee2df20fea -r 60bb4796a088 gmp-impl.h
--- a/gmp-impl.h Sat Jan 08 13:05:28 2011 +0100
+++ b/gmp-impl.h Sat Jan 08 16:02:45 2011 +0100
@@ -894,6 +894,12 @@
#define mpn_mul_4 __MPN(mul_4)
__GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+#define mpn_mul_5 __MPN(mul_5)
+__GMP_DECLSPEC mp_limb_t mpn_mul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
+#define mpn_mul_6 __MPN(mul_6)
+__GMP_DECLSPEC mp_limb_t mpn_mul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
#ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
#define mpn_mul_basecase __MPN(mul_basecase)
__GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
diff -r 60ee2df20fea -r 60bb4796a088 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4 Sat Jan 08 13:05:28 2011 +0100
+++ b/mpn/asm-defs.m4 Sat Jan 08 16:02:45 2011 +0100
@@ -1316,6 +1316,10 @@
define_mpn(addmul_2)
define_mpn(addmul_3)
define_mpn(addmul_4)
+define_mpn(addmul_5)
+define_mpn(addmul_6)
+define_mpn(addmul_7)
+define_mpn(addmul_8)
define_mpn(add_n_sub_n)
define_mpn(add_n_sub_nc)
define_mpn(addaddmul_1msb0)
@@ -1368,6 +1372,8 @@
define_mpn(mul_2)
define_mpn(mul_3)
define_mpn(mul_4)
+define_mpn(mul_5)
+define_mpn(mul_6)
define_mpn(mul_basecase)
define_mpn(mul_n)
define_mpn(mullo_basecase)
diff -r 60ee2df20fea -r 60bb4796a088 mpn/x86_64/sqr_basecase.asm
--- a/mpn/x86_64/sqr_basecase.asm Sat Jan 08 13:05:28 2011 +0100
+++ b/mpn/x86_64/sqr_basecase.asm Sat Jan 08 16:02:45 2011 +0100
@@ -2,7 +2,7 @@
dnl Contributed to the GNU project by Torbjorn Granlund.
-dnl Copyright 2008, 2009 Free Software Foundation, Inc.
+dnl Copyright 2008, 2009, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -25,14 +25,19 @@
C optimization tool suite written by David Harvey and Torbjorn Granlund.
C NOTES
+C * There is a major stupidity in that we call mpn_mul_1 initially, for a
+C large trip count. Instead, we should follow the generic/sqr_basecase.c
+C code which uses addmul_2s from the start, conditionally leaving a 1x1
+C multiply to the end. (In assembly code, one would stop invoking
+C addmul_2s loops when perhaps 3x2s respectively a 2x2s remains.)
C * This code only handles operands up to SQR_TOOM2_THRESHOLD_MAX. That
C means we can safely use 32-bit operations for all sizes, unlike in e.g.,
C mpn_addmul_1.
C * The jump table could probably be optimized, at least for non-pic.
C * The special code for n=1,2,3 was quickly written. It is probably too
C large and unnecessarily slow.
-C * Consider combining small cases code so that the n=k-1 code jumps into
-C the middle of the n=k code.
+C * Consider combining small cases code so that the n=k-1 code jumps into the
+C middle of the n=k code.
C * Avoid saving registers for small cases code.
C * Needed variables:
C n r11 input size
diff -r 60ee2df20fea -r 60bb4796a088 tests/devel/try.c
--- a/tests/devel/try.c Sat Jan 08 13:05:28 2011 +0100
+++ b/tests/devel/try.c Sat Jan 08 16:02:45 2011 +0100
@@ -559,7 +559,7 @@
TYPE_MUL_1, TYPE_MUL_1C,
- TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4,
+ TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6,
TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C,
@@ -695,6 +695,16 @@
p->msize = 4;
REFERENCE (refmpn_mul_4);
+ p = ¶m[TYPE_MUL_5];
+ COPY (TYPE_MUL_2);
+ p->msize = 5;
+ REFERENCE (refmpn_mul_5);
+
+ p = ¶m[TYPE_MUL_6];
+ COPY (TYPE_MUL_2);
+ p->msize = 6;
+ REFERENCE (refmpn_mul_6);
+
p = ¶m[TYPE_ADDMUL_1];
p->retval = 1;
@@ -1521,6 +1531,12 @@
#if HAVE_NATIVE_mpn_mul_4
{ TRY(mpn_mul_4), TYPE_MUL_4, 4 },
#endif
+#if HAVE_NATIVE_mpn_mul_5
+ { TRY(mpn_mul_5), TYPE_MUL_5, 5 },
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+ { TRY(mpn_mul_6), TYPE_MUL_6, 6 },
+#endif
{ TRY(mpn_rshift), TYPE_RSHIFT },
{ TRY(mpn_lshift), TYPE_LSHIFT },
@@ -2058,6 +2074,8 @@
case TYPE_MUL_2:
case TYPE_MUL_3:
case TYPE_MUL_4:
+ case TYPE_MUL_5:
+ case TYPE_MUL_6:
if (size == 1)
abort ();
e->retval = CALLING_CONVENTIONS (function)
diff -r 60ee2df20fea -r 60bb4796a088 tests/refmpn.c
--- a/tests/refmpn.c Sat Jan 08 13:05:28 2011 +0100
+++ b/tests/refmpn.c Sat Jan 08 16:02:45 2011 +0100
@@ -873,6 +873,16 @@
{
return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
}
+mp_limb_t
+refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+ return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
+}
+mp_limb_t
+refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+ return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
+}
#define AORSMUL_1C(operation_n) \
{ \
diff -r 60ee2df20fea -r 60bb4796a088 tests/tests.h
--- a/tests/tests.h Sat Jan 08 13:05:28 2011 +0100
+++ b/tests/tests.h Sat Jan 08 16:02:45 2011 +0100
@@ -185,26 +185,16 @@
mp_size_t size));
mp_limb_t refmpn_addlsh2_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
mp_size_t size));
-mp_limb_t refmpn_addlsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size, unsigned int));
-mp_limb_t refmpn_addmul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier));
-mp_limb_t refmpn_addmul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier, mp_limb_t carry));
-mp_limb_t refmpn_addmul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_5 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_6 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_7 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_8 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
+mp_limb_t refmpn_addlsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr, mp_srcptr, mp_size_t, unsigned));
+mp_limb_t refmpn_addmul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t refmpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+mp_limb_t refmpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
mp_limb_t refmpn_add_n_sub_n __GMP_PROTO ((mp_ptr r1p, mp_ptr r2p,
mp_srcptr s1p, mp_srcptr s2p,
@@ -292,16 +282,13 @@
mp_limb_t divisor, mp_limb_t carry));
mp_limb_t refmpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr p, mp_size_t n));
-mp_limb_t refmpn_mul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier));
-mp_limb_t refmpn_mul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier, mp_limb_t carry));
-mp_limb_t refmpn_mul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
- mp_srcptr mult));
-mp_limb_t refmpn_mul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
- mp_srcptr mult));
-mp_limb_t refmpn_mul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
- mp_srcptr mult));
+mp_limb_t refmpn_mul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t refmpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+mp_limb_t refmpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_mul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+mp_limb_t refmpn_mul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
void refmpn_mul_basecase __GMP_PROTO ((mp_ptr prodp,
mp_srcptr up, mp_size_t usize,
More information about the gmp-commit
mailing list