[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Feb 21 22:18:05 CET 2011
details: /var/hg/gmp/rev/f273f3687988
changeset: 13874:f273f3687988
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Sun Feb 20 11:15:32 2011 +0100
description:
sublsh2 for Atom/32.
details: /var/hg/gmp/rev/8da5a004ff8f
changeset: 13875:8da5a004ff8f
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Sun Feb 20 11:31:54 2011 +0100
description:
lshift for atom/32.
details: /var/hg/gmp/rev/d82d695896cd
changeset: 13876:d82d695896cd
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Feb 21 22:06:51 2011 +0100
description:
Special lshift.1 for atom/32.
details: /var/hg/gmp/rev/c5b80d4df0ed
changeset: 13877:c5b80d4df0ed
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Feb 21 22:17:00 2011 +0100
description:
Special lhisft1 for atom/32
details: /var/hg/gmp/rev/a8c2fdcab134
changeset: 13878:a8c2fdcab134
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Feb 21 22:17:58 2011 +0100
description:
Merge
diffstat:
ChangeLog | 28 ++++
gmp-impl.h | 2 +-
mpn/x86/aors_n.asm | 10 +-
mpn/x86/aorsmul_1.asm | 20 +-
mpn/x86/atom/addlsh1_n.asm | 23 ---
mpn/x86/atom/aorrlsh1_n.asm | 42 ++++++
mpn/x86/atom/aorslshC_n.asm | 219 ++++++++++++++++++++++++++++++++++++
mpn/x86/atom/aorsmul_1.asm | 14 +-
mpn/x86/atom/lshift.asm | 170 +++++++++++++++++++++++++++-
mpn/x86/atom/rsblsh1_n.asm | 42 ------
mpn/x86/atom/rshift.asm | 26 ++--
mpn/x86/atom/sublsh2_n.asm | 46 +++++++
mpn/x86/bdiv_dbm1c.asm | 40 +++--
mpn/x86/copyd.asm | 10 +-
mpn/x86/copyi.asm | 10 +-
mpn/x86/k6/aorsmul_1.asm | 14 +-
mpn/x86/k6/mul_1.asm | 14 +-
mpn/x86/k7/addlsh1_n.asm | 10 +-
mpn/x86/k7/aorsmul_1.asm | 12 +-
mpn/x86/k7/invert_limb.asm | 10 +-
mpn/x86/k7/mod_1_1.asm | 10 +-
mpn/x86/k7/mod_1_4.asm | 10 +-
mpn/x86/k7/mul_1.asm | 10 +-
mpn/x86/k7/sublsh1_n.asm | 10 +-
mpn/x86/lshift.asm | 12 +-
mpn/x86/mod_34lsub1.asm | 10 +-
mpn/x86/mul_1.asm | 20 +-
mpn/x86/mul_basecase.asm | 10 +-
mpn/x86/p6/aors_n.asm | 8 +-
mpn/x86/p6/aorsmul_1.asm | 14 +-
mpn/x86/pentium4/sse2/add_n.asm | 8 +-
mpn/x86/pentium4/sse2/addlsh1_n.asm | 10 +-
mpn/x86/pentium4/sse2/addmul_1.asm | 16 +-
mpn/x86/pentium4/sse2/mod_1_1.asm | 12 +-
mpn/x86/pentium4/sse2/mod_1_4.asm | 14 +-
mpn/x86/pentium4/sse2/mul_1.asm | 10 +-
mpn/x86/pentium4/sse2/popcount.asm | 34 ++--
mpn/x86/pentium4/sse2/sub_n.asm | 8 +-
mpn/x86/pentium4/sse2/submul_1.asm | 14 +-
mpn/x86/rshift.asm | 12 +-
mpn/x86/sqr_basecase.asm | 10 +-
mpq/aors.c | 12 +-
mpz/divegcd.c | 55 ++++++++-
43 files changed, 789 insertions(+), 302 deletions(-)
diffs (truncated from 1699 to 300 lines):
diff -r 153ddf846890 -r a8c2fdcab134 ChangeLog
--- a/ChangeLog Sun Feb 20 10:59:12 2011 +0100
+++ b/ChangeLog Mon Feb 21 22:17:58 2011 +0100
@@ -1,7 +1,35 @@
+2011-02-21 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86/atom/aorrlsh1_n.asm: New file (code from rsblsh_1, slightly
+ slower for addlsh_1 for large operands, but much faster for small).
+ * mpn/x86/atom/addlsh1_n.asm: Remove.
+ * mpn/x86/atom/rsblsh1_n.asm: Remove.
+
+2011-02-20 Marc Glisse <marc.glisse at inria.fr>
+
+ * mpq/aors.c: Rewrite to remove redundant division.
+
+2011-02-20 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86/atom/lshift.asm: New file.
+ * mpn/x86/atom/rshift.asm: Normalise mnemonic usage.
+
+ * gmp-impl.h (mpn_divexact_by7): Relax inclusion condition.
+
+ * mpz/divegcd.c (mpz_divexact_by5): New conditionally enabled function.
+ (mpz_divexact_by3): Wrap inside appropriate conditions.
+ (mpz_divexact_gcd): Rewrite.
+
+ * mpn/x86/bdiv_dbm1c.asm: Save a jump.
+
2011-02-20 Marco Bodrato <bodrato at mail.dm.unipi.it>
+ * mpn/x86/atom/aorslshC_n.asm: New file.
+ * mpn/x86/atom/sublsh2_n.asm: New file.
+
* mpn/x86/atom/aors_n.asm: New code.
* mpn/x86/atom/rshift.asm: Atom64 code adapted to 32-bit.
+ * mpn/x86/atom/lshift.asm: Likewise.
2011-02-19 Torbjorn Granlund <tege at gmplib.org>
diff -r 153ddf846890 -r a8c2fdcab134 gmp-impl.h
--- a/gmp-impl.h Sun Feb 20 10:59:12 2011 +0100
+++ b/gmp-impl.h Mon Feb 21 22:17:58 2011 +0100
@@ -1371,7 +1371,7 @@
(7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
#endif
-#if GMP_NUMB_BITS % 6 == 0
+#if GMP_NUMB_BITS % 3 == 0
#define mpn_divexact_by7(dst,src,size) \
(7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
#endif
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/aors_n.asm
--- a/mpn/x86/aors_n.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aors_n.asm Mon Feb 21 22:17:58 2011 +0100
@@ -22,11 +22,11 @@
C cycles/limb
-C P5: 3.375
-C P6: 3.125
-C K6: 3.5
-C K7: 2.25
-C P4: 8.75
+C P5 3.375
+C P6 3.125
+C K6 3.5
+C K7 2.25
+C P4 8.75
ifdef(`OPERATION_add_n',`
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/aorsmul_1.asm
--- a/mpn/x86/aorsmul_1.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aorsmul_1.asm Mon Feb 21 22:17:58 2011 +0100
@@ -22,19 +22,19 @@
include(`../config.m4')
-C cycles/limb
-C P5: 14.75
-C P6 model 0-8,10-12) 7.5
+C cycles/limb
+C P5 14.75
+C P6 model 0-8,10-12 7.5
C P6 model 9 (Banias) 6.7
-C P6 model 13 (Dothan) 6.75
-C P4 model 0 (Willamette) 24.0
-C P4 model 1 (?) 24.0
-C P4 model 2 (Northwood) 24.0
+C P6 model 13 (Dothan) 6.75
+C P4 model 0 (Willamette) 24.0
+C P4 model 1 (?) 24.0
+C P4 model 2 (Northwood) 24.0
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 12.5
-C K7: 5.25
-C K8:
+C AMD K6 12.5
+C AMD K7 5.25
+C AMD K8
ifdef(`OPERATION_addmul_1',`
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/atom/addlsh1_n.asm
--- a/mpn/x86/atom/addlsh1_n.asm Sun Feb 20 10:59:12 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-dnl Intel Atom mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n)
-include_mpn(`x86/k7/addlsh1_n.asm')
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/atom/aorrlsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorrlsh1_n.asm Mon Feb 21 22:17:58 2011 +0100
@@ -0,0 +1,42 @@
+dnl Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(M4_inst, adc)
+ define(M4_opp, sub)
+ define(M4_function, mpn_addlsh1_n)
+ define(M4_function_c, mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+ define(M4_inst, sbb)
+ define(M4_opp, add)
+ define(M4_function, mpn_rsblsh1_n)
+ define(M4_function_c, mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
diff -r 153ddf846890 -r a8c2fdcab134 mpn/x86/atom/aorslshC_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/aorslshC_n.asm Mon Feb 21 22:17:58 2011 +0100
@@ -0,0 +1,219 @@
+dnl Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_ip1_addlshC_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_ip1_addlshC_nc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+C mp_limb_t mpn_ip1_sublshC_n (mp_ptr dst, mp_srcptr src, mp_size_t size,);
+C mp_limb_t mpn_ip1_sublshC_nc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_signed_limb_t borrow);
+
+defframe(PARAM_CORB, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size,);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t borrow);
+
+C if src1 == dst, _ip1 is used
+
+defframe(GPARAM_CORB, 20)
+defframe(GPARAM_SIZE, 16)
+defframe(GPARAM_SRC2, 12)
+
+dnl re-use parameter space
+define(SAVE_EBP,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp', `%edi')
+define(`up', `%esi')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+
+PROLOGUE(M4_ip_function_c)
+deflit(`FRAME',0)
+ movl PARAM_CORB, %ecx
+ movl %ecx, %edx
+ shr $LSH, %edx
+ andl $1, %edx
+ M4_opp %edx, %ecx
+ jmp L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_ip_function)
+deflit(`FRAME',0)
+
+ xor %ecx, %ecx
+ xor %edx, %edx
+L(start_nc):
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ mov %ebx, SAVE_EBX
+ mov PARAM_SIZE, %ebx C size
+L(inplace):
+ incl %ebx C size + 1
+ shr %ebx C (size+1)\2
+ mov %ebp, SAVE_EBP
+ jnc L(entry) C size odd
+
+ add %edx, %edx C size even
+ mov %ecx, %ebp
+ mov (up), %ecx
+ lea -4(rp), rp
+ lea (%ebp,%ecx,M), %eax
+ lea 4(up), up
+ jmp L(enteven)
+
+ ALIGN(16)
+L(oop):
+ lea (%ecx,%eax,M), %ebp
+ shr $RSH, %eax
+ mov 4(up), %ecx
+ add %edx, %edx
+ lea 8(up), up
+ M4_inst %ebp, (rp)
+ lea (%eax,%ecx,M), %eax
+
+L(enteven):
+ M4_inst %eax, 4(rp)
+ lea 8(rp), rp
+
+ sbb %edx, %edx
+ shr $RSH, %ecx
+
+L(entry):
+ mov (up), %eax
+ decl %ebx
More information about the gmp-commit
mailing list