[Gmp-commit] /home/hgfiles/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Feb 11 07:52:12 CET 2011
details: /home/hgfiles/gmp/rev/d7088b155a80
changeset: 13836:d7088b155a80
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Feb 10 23:25:42 2011 +0100
description:
Fixed a FIXME comment.
details: /home/hgfiles/gmp/rev/baa61caf15ad
changeset: 13837:baa61caf15ad
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Feb 11 07:12:00 2011 +0100
description:
Faster inner loop (by tege).
details: /home/hgfiles/gmp/rev/5cbb42265900
changeset: 13838:5cbb42265900
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Feb 11 07:26:20 2011 +0100
description:
Tests for mpn_{add,sub,rsb}lsh{,1,2}.
details: /home/hgfiles/gmp/rev/4e3f13fe4c88
changeset: 13839:4e3f13fe4c88
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Fri Feb 11 07:52:05 2011 +0100
description:
aorrlshC_n.asm for k7 and atom.
diffstat:
ChangeLog | 13 ++++
configure.in | 9 +++
gmp-impl.h | 2 +-
mpn/x86/atom/aorrlsh2_n.asm | 42 ++++++++++++++
mpn/x86/atom/rsblsh1_n.asm | 42 ++++++++++++++
mpn/x86/k7/addlsh1_n.asm | 56 +++++++-----------
mpn/x86/k7/aorrlshC_n.asm | 129 ++++++++++++++++++++++++++++++++++++++++++++
tests/devel/try.c | 87 +++++++++++++++++++++++++++++
tests/refmpn.c | 69 +++++++++++++++++++++++
tests/tests.h | 11 +++
10 files changed, 425 insertions(+), 35 deletions(-)
diffs (truncated from 645 to 300 lines):
diff -r 5d639287e647 -r 4e3f13fe4c88 ChangeLog
--- a/ChangeLog Mon Feb 07 08:05:27 2011 +0100
+++ b/ChangeLog Fri Feb 11 07:52:05 2011 +0100
@@ -1,3 +1,16 @@
+2011-02-11 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86/k7/addlsh1_n.asm: Faster core loop (Torbjorn's).
+
+ * configure.in: Add HAVE_NATIVE_{add,sub,rsb}lsh{,1,2}_nc.
+ * tests/tests.h: refmpn_{add,sub,rsb}lsh{,1,2}_nc prototypes.
+ * tests/refmpn.c: New refmpn_{add,sub,rsb}lsh{,1,2}_nc.
+ * tests/devel/try.c: Tests for mpn_{add,sub,rsb}lsh{,1,2}_nc.
+
+ * mpn/x86/k7/aorrlshC_n.asm: New file.
+ * mpn/x86/atom/aorrlsh2_n.asm: Grab k7/aorrlshC_n.asm.
+ * mpn/x86/atom/rsblsh1_n.asm: Grab k7/aorrlshC_n.asm.
+
2011-02-06 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/x86/k7/addlsh1_n.asm: New file.
diff -r 5d639287e647 -r 4e3f13fe4c88 configure.in
--- a/configure.in Mon Feb 07 08:05:27 2011 +0100
+++ b/configure.in Fri Feb 11 07:52:05 2011 +0100
@@ -3006,6 +3006,9 @@
#undef HAVE_NATIVE_mpn_addlsh1_n
#undef HAVE_NATIVE_mpn_addlsh2_n
#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
@@ -3062,6 +3065,9 @@
#undef HAVE_NATIVE_mpn_rsblsh1_n
#undef HAVE_NATIVE_mpn_rsblsh2_n
#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
#undef HAVE_NATIVE_mpn_rsh1add_n
#undef HAVE_NATIVE_mpn_rsh1add_nc
#undef HAVE_NATIVE_mpn_rsh1sub_n
@@ -3075,6 +3081,9 @@
#undef HAVE_NATIVE_mpn_sublsh1_n
#undef HAVE_NATIVE_mpn_sublsh2_n
#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
#undef HAVE_NATIVE_mpn_submul_1c
#undef HAVE_NATIVE_mpn_udiv_qrnnd
#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
diff -r 5d639287e647 -r 4e3f13fe4c88 gmp-impl.h
--- a/gmp-impl.h Mon Feb 07 08:05:27 2011 +0100
+++ b/gmp-impl.h Fri Feb 11 07:52:05 2011 +0100
@@ -830,7 +830,7 @@
__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
/* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
- returns the borrow out (FIXME 0, 1, 2 or 3). */
+ returns the borrow out (0, ..., 4). */
#define mpn_sublsh2_n __MPN(sublsh2_n)
__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
#define mpn_sublsh2_nc __MPN(sublsh2_nc)
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/atom/aorrlsh2_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorrlsh2_n.asm Fri Feb 11 07:52:05 2011 +0100
@@ -0,0 +1,42 @@
+dnl Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(M4_inst, adcl)
+ define(M4_opp, subl)
+ define(M4_function, mpn_addlsh2_n)
+ define(M4_function_c, mpn_addlsh2_nc)
+',`ifdef(`OPERATION_rsblsh2_n', `
+ define(M4_inst, sbbl)
+ define(M4_opp, addl)
+ define(M4_function, mpn_rsblsh2_n)
+ define(M4_function_c, mpn_rsblsh2_nc)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/atom/rsblsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/rsblsh1_n.asm Fri Feb 11 07:52:05 2011 +0100
@@ -0,0 +1,42 @@
+dnl Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(M4_inst, adcl)
+ define(M4_opp, subl)
+ define(M4_function, mpn_addlsh1_n)
+ define(M4_function_c, mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+ define(M4_inst, sbbl)
+ define(M4_opp, addl)
+ define(M4_function, mpn_rsblsh1_n)
+ define(M4_function_c, mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/k7/aorrlshC_n.asm')
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/k7/addlsh1_n.asm
--- a/mpn/x86/k7/addlsh1_n.asm Mon Feb 07 08:05:27 2011 +0100
+++ b/mpn/x86/k7/addlsh1_n.asm Fri Feb 11 07:52:05 2011 +0100
@@ -75,7 +75,7 @@
define(`up', `%esi')
define(`vp', `%ebp')
- mov $0x20000000, %eax
+ mov $0x2aaaaaab, %eax
push %ebx FRAME_pushl()
movl PARAM_SIZE, %ebx C size
@@ -94,9 +94,9 @@
push vp FRAME_pushl()
movl PARAM_DBLD, vp
- leal 4(,%edx,4), %ecx C count*4+4 = -(size\8)*4
+ leal 3(%edx,%edx,2), %ecx C count*3+3 = -(size\6)*3
xorl %edx, %edx
- leal (%ebx,%ecx,2), %ebx C size + (count*4+4)*2 = size % 8
+ leal (%ebx,%ecx,2), %ebx C size + (count*3+3)*2 = size % 6
orl %ebx, %ebx
jz L(exact)
@@ -106,15 +106,15 @@
mov (vp), %eax
adc %eax, %eax
rcr %edx C restore 1st saved carry bit
+ lea 4(vp), vp
adc (up), %eax
- mov %eax, (rp)
+ lea 4(up), up
adc %edx, %edx C save a carry bit in edx
- lea 4(rp), rp
- lea 4(up), up
- lea 4(vp), vp
ifdef(`CPU_P6',`
adc %edx, %edx ') C save another carry bit in edx
decl %ebx
+ mov %eax, (rp)
+ lea 4(rp), rp
jnz L(oop)
movl vp, VAR_TMP
L(exact):
@@ -131,8 +131,6 @@
adc %ebx, %ebx
mov 8(vp), %ecx
adc %ecx, %ecx
- mov 12(vp), vp
- adc vp, vp
rcr %edx C restore 1st saved carry bit
@@ -142,40 +140,30 @@
mov %ebx, 4(rp)
adc 8(up), %ecx
mov %ecx, 8(rp)
- adc 12(up), vp
- mov vp, 12(rp)
- movl VAR_TMP, vp
+ mov 12(vp), %eax
+ adc %eax, %eax
+ mov 16(vp), %ebx
+ adc %ebx, %ebx
+ mov 20(vp), %ecx
+ adc %ecx, %ecx
- mov 16(vp), %eax
- adc %eax, %eax
- mov 20(vp), %ebx
- adc %ebx, %ebx
- mov 24(vp), %ecx
- adc %ecx, %ecx
- mov 28(vp), vp
- adc vp, vp
-
+ lea 24(vp), vp
adc %edx, %edx C save a carry bit in edx
- adc 16(up), %eax
- mov %eax, 16(rp)
- adc 20(up), %ebx
- mov %ebx, 20(rp)
- adc 24(up), %ecx
- mov %ecx, 24(rp)
- adc 28(up), vp
- mov vp, 28(rp)
+ adc 12(up), %eax
+ mov %eax, 12(rp)
+ adc 16(up), %ebx
+ mov %ebx, 16(rp)
+ adc 20(up), %ecx
- movl VAR_TMP, vp
- lea 32(rp), rp
- lea 32(up), up
- lea 32(vp), vp
+ lea 24(up), up
ifdef(`CPU_P6',`
adc %edx, %edx ') C save another carry bit in edx
+ mov %ecx, 20(rp)
incl VAR_COUNT
- movl vp, VAR_TMP
+ lea 24(rp), rp
jne L(top)
L(end):
diff -r 5d639287e647 -r 4e3f13fe4c88 mpn/x86/k7/aorrlshC_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/k7/aorrlshC_n.asm Fri Feb 11 07:52:05 2011 +0100
@@ -0,0 +1,129 @@
+dnl AMD K7 mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size,);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size,);
+C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
More information about the gmp-commit
mailing list