[Gmp-commit] /home/hgfiles/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Dec 28 15:37:23 CET 2009
details: /home/hgfiles/gmp/rev/4cf30e0589b3
changeset: 13243:4cf30e0589b3
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 28 14:54:27 2009 +0100
description:
Optimise shifting for Core i.
details: /home/hgfiles/gmp/rev/4722549eec70
changeset: 13244:4722549eec70
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 28 15:32:16 2009 +0100
description:
Make mpn_lshiftc standard internal function.
details: /home/hgfiles/gmp/rev/4db5125f1d87
changeset: 13245:4db5125f1d87
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 28 15:33:06 2009 +0100
description:
Test mpn_lshiftc.
details: /home/hgfiles/gmp/rev/a05582cfd8e8
changeset: 13246:a05582cfd8e8
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 28 15:35:42 2009 +0100
description:
Support measuring mpn_lshiftc.
details: /home/hgfiles/gmp/rev/9f83d5200872
changeset: 13247:9f83d5200872
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 28 15:37:05 2009 +0100
description:
(tune_mu_div, tune_mu_bdiv): Set step_factor.
diffstat:
ChangeLog | 24 ++++++-
configure.in | 4 +-
mpn/generic/lshiftc.c | 63 ++++++++++++++++
mpn/generic/mul_fft.c | 38 ---------
mpn/x86_64/core2/lshift.asm | 6 +-
mpn/x86_64/core2/lshiftc.asm | 138 ++++++++++++++++++++++++++++++++++++
mpn/x86_64/core2/rshift.asm | 6 +-
mpn/x86_64/lshiftc.asm | 164 +++++++++++++++++++++++++++++++++++++++++++
tests/devel/try.c | 18 +++-
tests/refmpn.c | 23 ++++++
tests/tests.h | 13 +--
tune/common.c | 5 +
tune/speed.c | 1 +
tune/speed.h | 1 +
tune/tuneup.c | 6 +-
15 files changed, 449 insertions(+), 61 deletions(-)
diffs (truncated from 719 to 300 lines):
diff -r 052c56e76bcd -r 9f83d5200872 ChangeLog
--- a/ChangeLog Sun Dec 27 20:30:53 2009 +0100
+++ b/ChangeLog Mon Dec 28 15:37:05 2009 +0100
@@ -1,8 +1,30 @@
+2009-12-28 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/tuneup.c (tune_mu_div, tune_mu_bdiv): Set step_factor.
+
+ * tune/common.c, tune/speed.c, tune/speed.h: Support measuring
+ mpn_lshiftc.
+
+ * tests/devel/try.c: Test mpn_lshiftc.
+ * tests/refmpn.c (refmpn_com): New function.
+ (refmpn_lshiftc): Likewise.
+
+ * configure.in (gmp_mpn_functions_optional) Move lshiftc from here...
+ (gmp_mpn_functions): ...to here.
+ * mpn/generic/lshiftc.c: New file.
+ * mpn/x86_64/lshiftc.asm: New file.
+ * mpn/x86_64/core2/lshiftc.asm: New file.
+ * mpn/generic/mul_fft.c (mpn_lshiftc): Remove.
+
+ * mpn/x86_64/core2/lshift.asm: Tweak for better Core iN performance.
+ * mpn/x86_64/core2/rshift.asm: Likewise.
+
2009-12-27 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/mul.c: Use toom6h and toom8h for almost balanced.
- * mpn/generic/mullo_n.c (mpn_dc_mullo_n): New ratio, to be used in Toom-8 range.
+ * mpn/generic/mullo_n.c (mpn_dc_mullo_n): New ratio, to be used in
+ Toom-8 range.
2009-12-27 Torbjorn Granlund <tege at gmplib.org>
diff -r 052c56e76bcd -r 9f83d5200872 configure.in
--- a/configure.in Sun Dec 27 20:30:53 2009 +0100
+++ b/configure.in Mon Dec 28 15:37:05 2009 +0100
@@ -2488,13 +2488,13 @@
addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n \
addlsh2_n sublsh2_n rsblsh2_n \
addlsh_n sublsh_n rsblsh_n \
- add_n_sub_n addaddmul_1msb0 lshiftc"
+ add_n_sub_n addaddmul_1msb0"
gmp_mpn_functions="$extra_functions \
add add_1 add_n sub sub_1 sub_n neg_n mul_1 addmul_1 \
submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2 \
fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump \
- mod_1_1 mod_1_2 mod_1_3 mod_1_4 \
+ mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc \
mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul \
random random2 pow_1 \
rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp \
diff -r 052c56e76bcd -r 9f83d5200872 mpn/generic/lshiftc.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/lshiftc.c Mon Dec 28 15:37:05 2009 +0100
@@ -0,0 +1,63 @@
+/* mpn_lshiftc -- Shift left low level with complement.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+ and store the n least significant limbs of the result at rp.
+ Return the bits shifted out from the most significant limb.
+
+ Argument constraints:
+ 1. 0 < cnt < GMP_NUMB_BITS.
+ 2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+ mp_limb_t high_limb, low_limb;
+ unsigned int tnc;
+ mp_size_t i;
+ mp_limb_t retval;
+
+ ASSERT (n >= 1);
+ ASSERT (cnt >= 1);
+ ASSERT (cnt < GMP_NUMB_BITS);
+ ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+ up += n;
+ rp += n;
+
+ tnc = GMP_NUMB_BITS - cnt;
+ low_limb = *--up;
+ retval = low_limb >> tnc;
+ high_limb = (low_limb << cnt);
+
+ for (i = n - 1; i != 0; i--)
+ {
+ low_limb = *--up;
+ *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+ high_limb = low_limb << cnt;
+ }
+ *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+ return retval;
+}
diff -r 052c56e76bcd -r 9f83d5200872 mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c Sun Dec 27 20:30:53 2009 +0100
+++ b/mpn/generic/mul_fft.c Mon Dec 28 15:37:05 2009 +0100
@@ -180,44 +180,6 @@
}
}
-/* Shift {up, n} of cnt bits to the left, store the complemented result
- in {rp, n}, and output the shifted bits (not complemented).
- Same as:
- cc = mpn_lshift (rp, up, n, cnt);
- mpn_com_n (rp, rp, n);
- return cc;
-
- Assumes n >= 1, 1 < cnt < GMP_NUMB_BITS, rp >= up.
-*/
-#ifndef HAVE_NATIVE_mpn_lshiftc
-#undef mpn_lshiftc
-static mp_limb_t
-mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
- mp_limb_t high_limb, low_limb;
- unsigned int tnc;
- mp_size_t i;
- mp_limb_t retval;
-
- up += n;
- rp += n;
-
- tnc = GMP_NUMB_BITS - cnt;
- low_limb = *--up;
- retval = low_limb >> tnc;
- high_limb = (low_limb << cnt);
-
- for (i = n - 1; i != 0; i--)
- {
- low_limb = *--up;
- *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
- high_limb = low_limb << cnt;
- }
- *--rp = (~high_limb) & GMP_NUMB_MASK;
-
- return retval;
-}
-#endif
/* r <- a*2^e mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
Assumes a is semi-normalized, i.e. a[n] <= 1.
diff -r 052c56e76bcd -r 9f83d5200872 mpn/x86_64/core2/lshift.asm
--- a/mpn/x86_64/core2/lshift.asm Sun Dec 27 20:30:53 2009 +0100
+++ b/mpn/x86_64/core2/lshift.asm Mon Dec 28 15:37:05 2009 +0100
@@ -1,6 +1,6 @@
dnl x86-64 mpn_lshift optimized for "Core 2".
-dnl Copyright 2007 Free Software Foundation, Inc.
+dnl Copyright 2007, 2009 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -25,7 +25,7 @@
C K10: 4.25
C P4: 14.7
C P6 core2: 1.27
-C P6 corei7: 1.75
+C P6 corei7: 1.5
C INPUT PARAMETERS
@@ -112,8 +112,8 @@
mov %r9, -16(rp)
L(00): shld %cl, %r11, %r10
mov -24(up), %r9
- lea -32(up), up
mov %r10, -24(rp)
+ add $-32, up
lea -32(rp), rp
sub $4, n
jnc L(top)
diff -r 052c56e76bcd -r 9f83d5200872 mpn/x86_64/core2/lshiftc.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/core2/lshiftc.asm Mon Dec 28 15:37:05 2009 +0100
@@ -0,0 +1,138 @@
+dnl x86-64 mpn_lshiftc optimized for "Core 2".
+
+dnl Copyright 2007, 2009 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C K8,K9: ?
+C K10: ?
+C P4: ?
+C P6 core2: 1.5
+C P6 corei7: 1.75
+
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%cl')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_lshiftc)
+ lea -8(rp,n,8), rp
+ lea -8(up,n,8), up
+
+ mov %edx, %eax
+ and $3, %eax
+ jne L(nb00)
+L(b00): C n = 4, 8, 12, ...
+ mov (up), %r10
+ mov -8(up), %r11
+ xor %eax, %eax
+ shld %cl, %r10, %rax
+ mov -16(up), %r8
+ lea 24(rp), rp
+ sub $4, n
+ jmp L(00)
+
+L(nb00):C n = 1, 5, 9, ...
+ cmp $2, %eax
+ jae L(nb01)
+L(b01): mov (up), %r9
+ xor %eax, %eax
+ shld %cl, %r9, %rax
+ sub $2, n
+ jb L(le1)
+ mov -8(up), %r10
+ mov -16(up), %r11
+ lea -8(up), up
+ lea 16(rp), rp
+ jmp L(01)
+L(le1): shl %cl, %r9
+ not %r9
+ mov %r9, (rp)
+ ret
+
+L(nb01):C n = 2, 6, 10, ...
+ jne L(b11)
+L(b10): mov (up), %r8
+ mov -8(up), %r9
+ xor %eax, %eax
+ shld %cl, %r8, %rax
+ sub $3, n
+ jb L(le2)
+ mov -16(up), %r10
+ lea -16(up), up
+ lea 8(rp), rp
+ jmp L(10)
+L(le2): shld %cl, %r9, %r8
+ not %r8
+ mov %r8, (rp)
+ shl %cl, %r9
+ not %r9
+ mov %r9, -8(rp)
+ ret
+
+ ALIGN(16) C performance critical!
+L(b11): C n = 3, 7, 11, ...
+ mov (up), %r11
More information about the gmp-commit
mailing list