[Gmp-commit] /home/hgfiles/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Jan 22 18:10:06 CET 2011
details: /home/hgfiles/gmp/rev/ea1884c2e91d
changeset: 13757:ea1884c2e91d
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 22 14:19:31 2011 +0100
description:
Add new internal mpn routines addmul_2s and sqr_diag_addlsh1.
details: /home/hgfiles/gmp/rev/4b6555afd6a4
changeset: 13758:4b6555afd6a4
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 22 17:55:28 2011 +0100
description:
Rewrite mpn_sqr_basecase.
details: /home/hgfiles/gmp/rev/42cadec09a14
changeset: 13759:42cadec09a14
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 22 18:01:19 2011 +0100
description:
Define some shorter convenience mnemonics.
details: /home/hgfiles/gmp/rev/7ba9324af20a
changeset: 13760:7ba9324af20a
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 22 18:04:52 2011 +0100
description:
mpn/ia64/sqr_diag_addlsh1.asm: New file.
diffstat:
ChangeLog | 17 +++++
configure.in | 4 +-
gmp-impl.h | 7 ++
mpn/asm-defs.m4 | 2 +
mpn/generic/sqr_basecase.c | 55 ++++++++++------
mpn/ia64/ia64-defs.m4 | 12 +++
mpn/ia64/sqr_diag_addlsh1.asm | 133 ++++++++++++++++++++++++++++++++++++++++++
7 files changed, 208 insertions(+), 22 deletions(-)
diffs (truncated from 360 to 300 lines):
diff -r 33ef11b9a199 -r 7ba9324af20a ChangeLog
--- a/ChangeLog Sat Jan 22 14:03:18 2011 +0100
+++ b/ChangeLog Sat Jan 22 18:04:52 2011 +0100
@@ -1,3 +1,20 @@
+2011-01-22 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/ia64/sqr_diag_addlsh1.asm: New file.
+
+ * mpn/ia64/ia64-defs.m4: Define some shorter convenience mnemonics.
+
+ * mpn/generic/sqr_basecase.c (MPN_SQR_DIAG_ADDLSH1): New macro, using
+ new function mpn_sqr_diag_addlsh1 or defining its equivalent.
+
+ * gmp-impl.h (mpn_addmul_2s): Declare.
+ (mpn_sqr_diag_addlsh1): Declare.
+ * mpn/asm-defs.m4 (define_mpn): Add addmul_2s and sqr_diag_addlsh1.
+
+ * configure.in: Add HAVE_NATIVEs for mpn_sqr_diag_addlsh1 and
+ mpn_addmul_2s.
+ (gmp_mpn_functions_optional): Add sqr_diag_addlsh1.
+
2011-01-21 Marco Bodrato <bodrato at mail.dm.unipi.it>
* tests/devel/try.c: Initial support for mpn_bdiv_q_1.
diff -r 33ef11b9a199 -r 7ba9324af20a configure.in
--- a/configure.in Sat Jan 22 14:03:18 2011 +0100
+++ b/configure.in Sat Jan 22 18:04:52 2011 +0100
@@ -2500,7 +2500,7 @@
# divrem_1 and pre_divrem_1.
gmp_mpn_functions_optional="umul udiv \
- invert_limb sqr_diagonal \
+ invert_limb sqr_diagonal sqr_diag_addlsh1 \
mul_2 mul_3 mul_4 mul_5 mul_6 \
addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8 \
addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n \
@@ -3011,6 +3011,7 @@
#undef HAVE_NATIVE_mpn_addmul_6
#undef HAVE_NATIVE_mpn_addmul_7
#undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_addmul_2s
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdiv_dbm1c
@@ -3065,6 +3066,7 @@
#undef HAVE_NATIVE_mpn_rshift
#undef HAVE_NATIVE_mpn_sqr_basecase
#undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sqr_diag_addlsh1
#undef HAVE_NATIVE_mpn_sub_n
#undef HAVE_NATIVE_mpn_sub_nc
#undef HAVE_NATIVE_mpn_sublsh1_n
diff -r 33ef11b9a199 -r 7ba9324af20a gmp-impl.h
--- a/gmp-impl.h Sat Jan 22 14:03:18 2011 +0100
+++ b/gmp-impl.h Sat Jan 22 18:04:52 2011 +0100
@@ -792,6 +792,10 @@
#define mpn_addmul_8 __MPN(addmul_8)
__GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+/* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */
+#define mpn_addmul_2s __MPN(addmul_2s)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2s __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+
/* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
returns the carry out (0, 1 or 2). */
#define mpn_addlsh1_n __MPN(addlsh1_n)
@@ -1076,6 +1080,9 @@
#define mpn_sqr_diagonal __MPN(sqr_diagonal)
__GMP_DECLSPEC void mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
+__GMP_DECLSPEC void mpn_sqr_diag_addlsh1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
#define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
__GMP_DECLSPEC void mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t));
diff -r 33ef11b9a199 -r 7ba9324af20a mpn/asm-defs.m4
--- a/mpn/asm-defs.m4 Sat Jan 22 14:03:18 2011 +0100
+++ b/mpn/asm-defs.m4 Sat Jan 22 18:04:52 2011 +0100
@@ -1320,6 +1320,7 @@
define_mpn(addmul_6)
define_mpn(addmul_7)
define_mpn(addmul_8)
+define_mpn(addmul_2s)
define_mpn(add_n_sub_n)
define_mpn(add_n_sub_nc)
define_mpn(addaddmul_1msb0)
@@ -1404,6 +1405,7 @@
define_mpn(set_str)
define_mpn(sqr_basecase)
define_mpn(sqr_diagonal)
+define_mpn(sqr_diag_addlsh1)
define_mpn(sub_n)
define_mpn(sublsh1_n)
define_mpn(sublsh2_n)
diff -r 33ef11b9a199 -r 7ba9324af20a mpn/generic/sqr_basecase.c
--- a/mpn/generic/sqr_basecase.c Sat Jan 22 14:03:18 2011 +0100
+++ b/mpn/generic/sqr_basecase.c Sat Jan 22 18:04:52 2011 +0100
@@ -6,7 +6,7 @@
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
-2005, 2008 Free Software Foundation, Inc.
+2005, 2008, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -45,6 +45,29 @@
} while (0)
#endif
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ mpn_sqr_diag_addlsh1 (rp, tp, up, n)
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ do { \
+ mp_limb_t cy; \
+ MPN_SQR_DIAGONAL (rp, up, n); \
+ cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
+ rp[2 * n - 1] += cy; \
+ } while (0)
+#else
+ do { \
+ mp_limb_t cy; \
+ MPN_SQR_DIAGONAL (rp, up, n); \
+ cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
+ cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
+ rp[2 * n - 1] += cy; \
+ } while (0)
+#endif
+#endif
+
#undef READY_WITH_mpn_sqr_basecase
@@ -84,9 +107,13 @@
{
if (n == 2)
{
+#if HAVE_NATIVE_mpn_mul_2
+ rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
rp[0] = 0;
rp[1] = 0;
rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
return;
}
@@ -101,15 +128,7 @@
tp[2 * n - 3] = cy;
}
- MPN_SQR_DIAGONAL (rp, up, n);
-
-#if HAVE_NATIVE_mpn_addlsh1_n
- cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
- cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
- cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
- rp[2 * n - 1] += cy;
+ MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
}
#define READY_WITH_mpn_sqr_basecase
#endif
@@ -194,9 +213,13 @@
if (n == 2)
{
+#if HAVE_NATIVE_mpn_mul_2
+ rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
rp[0] = 0;
rp[1] = 0;
rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
return;
}
@@ -283,18 +306,8 @@
cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
tp[n + i - 2] = cy;
}
- MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
- {
- mp_limb_t cy;
-#if HAVE_NATIVE_mpn_addlsh1_n
- cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
- cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
- cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
- rp[2 * n - 1] += cy;
- }
+ MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
}
}
#endif
diff -r 33ef11b9a199 -r 7ba9324af20a mpn/ia64/ia64-defs.m4
--- a/mpn/ia64/ia64-defs.m4 Sat Jan 22 14:03:18 2011 +0100
+++ b/mpn/ia64/ia64-defs.m4 Sat Jan 22 18:04:52 2011 +0100
@@ -120,5 +120,17 @@
')')
define(`ASSERT_label_counter',1)
+define(`getfsig', `getf.sig')
+define(`setfsig', `setf.sig')
+define(`cmpeq', `cmp.eq')
+define(`cmpne', `cmp.ne')
+define(`cmpltu', `cmp.ltu')
+define(`cmpleu', `cmp.leu')
+define(`cmpgtu', `cmp.gtu')
+define(`cmpgeu', `cmp.geu')
+define(`cmple', `cmp.le')
+define(`cmpgt', `cmp.gt')
+define(`cmpeqor', `cmp.eq.or')
+define(`cmpequc', `cmp.eq.unc')
divert
diff -r 33ef11b9a199 -r 7ba9324af20a mpn/ia64/sqr_diag_addlsh1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/ia64/sqr_diag_addlsh1.asm Sat Jan 22 18:04:52 2011 +0100
@@ -0,0 +1,133 @@
+dnl IA-64 mpn_sqr_diag_addlsh1
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 2 Unrolling could bring it to 1.5 + epsilon
+
+C Exact performance table. The 2nd line is this code, the 3rd line is ctop-
+C less code. In an assembly sqr_basecase, the ctop-full numbers will become a
+C few cycles better since we can mitigate the many I0 instructions.
+C
+C 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+C - 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50 52 54 56 Needs updating
+C - 13 16 17 18 20 21 23 25 26 30 31 31 33 34 36 38 39 42 43
+
+C We should keep in mind that this code takes linear time in a O(n^2) context
+C and that it will only be used under SQR_TOOM2_THRESHOLD, which might become
+C around 60. Keeping overhead down for smallish operands (< 10) is more
+C important than optimal cycle counts.
+
+C TODO
+C * Make sure we don't depend on uninitialised r-registers, f-registers, or
+C * p-registers.
+C * Optimise by doing first two loop iterations in function header.
+
+C INPUT PARAMETERS
+define(`rp_param', `r32') define(`rp', `r14') C size: 2n
+define(`tp_param', `r33') define(`tp', `r15') C size: 2n - 2
+define(`up_param', `r34') define(`up', `r31') C size: n
+define(`n', `r35')
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+
+ .prologue
+ .save ar.pfs, r2
+ .save ar.lc, r3
+ .body
+
+.mmi; alloc r2 = ar.pfs, 4,24,0,24 C M
+ nop 4711
+ mov r3 = ar.lc C I0
+.mmi; mov tp = tp_param C M I
+ mov up = up_param C M I
+ mov rp = rp_param C M I
+ ;;
+.mmi; ld8 r36 = [tp], 8 C M
+ add r20 = -2, n C M I
+ mov r9 = ar.ec C I0
+ ;;
+.mmi; ld8 r32 = [tp], 8 C M
+ mov r16 = 0 C M I
More information about the gmp-commit
mailing list