[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Feb 22 17:38:16 CET 2011
details: /var/hg/gmp/rev/3ead136b2f93
changeset: 13881:3ead136b2f93
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue Feb 22 17:21:11 2011 +0100
description:
New lshiftc for atom/32.
details: /var/hg/gmp/rev/dfa98055ca84
changeset: 13882:dfa98055ca84
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue Feb 22 17:35:57 2011 +0100
description:
More x86 c/l tables.
details: /var/hg/gmp/rev/60c5f9ff0189
changeset: 13883:60c5f9ff0189
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue Feb 22 17:38:11 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 7 ++
mpn/x86/atom/aorrlshC_n.asm | 20 +++++-
mpn/x86/atom/aors_n.asm | 56 ++++++++++-----
mpn/x86/atom/aorslshC_n.asm | 17 +++++
mpn/x86/atom/aorsmul_1.asm | 3 +-
mpn/x86/atom/lshift.asm | 20 +++++
mpn/x86/atom/lshiftc.asm | 148 ++++++++++++++++++++++++++++++++++++++++++++
mpn/x86/atom/rshift.asm | 19 +++++
8 files changed, 267 insertions(+), 23 deletions(-)
diffs (truncated from 406 to 300 lines):
diff -r a8c2fdcab134 -r 60c5f9ff0189 ChangeLog
--- a/ChangeLog Mon Feb 21 22:17:58 2011 +0100
+++ b/ChangeLog Tue Feb 22 17:38:11 2011 +0100
@@ -1,5 +1,12 @@
+2011-02-22 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86/atom/lshiftc.asm: New file (a copy of lshift.asm with a handful of neg added).
+
2011-02-21 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/x86/aors_n.asm: Move _nc entry to after main code. Align loop
+ and _n entry for claimed performance. Normalise mnemonic usage.
+
* mpn/x86/atom/aorrlsh1_n.asm: New file (code from rsblsh_1, slightly
slower for addlsh_1 for large operands, but much faster for small).
* mpn/x86/atom/addlsh1_n.asm: Remove.
diff -r a8c2fdcab134 -r 60c5f9ff0189 mpn/x86/atom/aorrlshC_n.asm
--- a/mpn/x86/atom/aorrlshC_n.asm Mon Feb 21 22:17:58 2011 +0100
+++ b/mpn/x86/atom/aorrlshC_n.asm Tue Feb 22 17:38:11 2011 +0100
@@ -22,14 +22,30 @@
include(`../config.m4')
C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size,);
+C mp_size_t size);
C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
C mp_size_t size, mp_limb_t carry);
C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size,);
+C mp_size_t size);
C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
C mp_size_t size, mp_signed_limb_t carry);
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
defframe(PARAM_CORB, 20)
defframe(PARAM_SIZE, 16)
defframe(PARAM_DBLD, 12)
diff -r a8c2fdcab134 -r 60c5f9ff0189 mpn/x86/atom/aors_n.asm
--- a/mpn/x86/atom/aors_n.asm Mon Feb 21 22:17:58 2011 +0100
+++ b/mpn/x86/atom/aors_n.asm Tue Feb 22 17:38:11 2011 +0100
@@ -21,6 +21,22 @@
include(`../config.m4')
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 3
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
ifdef(`OPERATION_add_n', `
define(M4_inst, adcl)
define(M4_function_n, mpn_add_n)
@@ -69,25 +85,20 @@
ASM_START()
TEXT
- ALIGN(8)
+ ALIGN(16)
deflit(`FRAME',0)
-PROLOGUE(M4_function_nc)
- movl PARAM_CARRY, cy C carry
- jmp L(start)
-EPILOGUE()
-
PROLOGUE(M4_function_n)
- xorl cy, cy C carry
+ xor cy, cy C carry
L(start):
- movl PARAM_SIZE, %eax C size
- movl rp, SAVE_RP
- movl PARAM_DST, rp
- movl up, SAVE_UP
+ mov PARAM_SIZE, %eax C size
+ mov rp, SAVE_RP
+ mov PARAM_DST, rp
+ mov up, SAVE_UP
shr %eax C size >> 1
- movl PARAM_SRC1, up
- movl vp, SAVE_VP
- movl PARAM_SRC2, vp
+ mov PARAM_SRC1, up
+ mov vp, SAVE_VP
+ mov PARAM_SRC2, vp
jz L(one) C size == 1
jc L(three) C size % 2 == 1
@@ -105,7 +116,7 @@
shr cy
mov (up), r1
- ALIGN(8)
+ ALIGN(16)
L(oop):
M4_inst (vp), r1
lea 8(up), up
@@ -114,19 +125,24 @@
mov r1, (rp)
L(entry):
M4_inst -4(vp), r2
- decl %eax
+ dec %eax
lea 8(rp), rp
mov (up), r1
mov r2, -4(rp)
jnz L(oop)
-L(end): C %eax is zero here
- movl SAVE_UP, up
+L(end): C %eax is zero here
+ mov SAVE_UP, up
M4_inst (vp), r1
- movl SAVE_VP, vp
+ mov SAVE_VP, vp
mov r1, (rp)
adc %eax, %eax
- movl SAVE_RP, rp
+ mov SAVE_RP, rp
ret
EPILOGUE()
+
+PROLOGUE(M4_function_nc)
+ mov PARAM_CARRY, cy C carry
+ jmp L(start)
+EPILOGUE()
ASM_END()
diff -r a8c2fdcab134 -r 60c5f9ff0189 mpn/x86/atom/aorslshC_n.asm
--- a/mpn/x86/atom/aorslshC_n.asm Mon Feb 21 22:17:58 2011 +0100
+++ b/mpn/x86/atom/aorslshC_n.asm Tue Feb 22 17:38:11 2011 +0100
@@ -44,6 +44,23 @@
C if src1 == dst, _ip1 is used
+C cycles/limb
+C dst!=src1,src2 dst==src1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 7 6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
defframe(GPARAM_CORB, 20)
defframe(GPARAM_SIZE, 16)
defframe(GPARAM_SRC2, 12)
diff -r a8c2fdcab134 -r 60c5f9ff0189 mpn/x86/atom/aorsmul_1.asm
--- a/mpn/x86/atom/aorsmul_1.asm Mon Feb 21 22:17:58 2011 +0100
+++ b/mpn/x86/atom/aorsmul_1.asm Tue Feb 22 17:38:11 2011 +0100
@@ -30,10 +30,11 @@
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
+C Intel Atom 11
C AMD K6
C AMD K7 3.9
C AMD K8
-
+C AMD K10
dnl K7: UNROLL_COUNT cycles/limb
dnl 4 4.42
diff -r a8c2fdcab134 -r 60c5f9ff0189 mpn/x86/atom/lshift.asm
--- a/mpn/x86/atom/lshift.asm Mon Feb 21 22:17:58 2011 +0100
+++ b/mpn/x86/atom/lshift.asm Tue Feb 22 17:38:11 2011 +0100
@@ -21,6 +21,26 @@
include(`../config.m4')
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned cnt);
+
+C cycles/limb
+C cnt!=1 cnt==1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 5 2.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
defframe(PARAM_CNT, 16)
defframe(PARAM_SIZE,12)
defframe(PARAM_SRC, 8)
diff -r a8c2fdcab134 -r 60c5f9ff0189 mpn/x86/atom/lshiftc.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/lshiftc.asm Tue Feb 22 17:38:11 2011 +0100
@@ -0,0 +1,148 @@
+dnl Intel Atom mpn_lshiftc -- mpn left shift with complement.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned cnt);
+
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 5.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`cnt', `%ecx')
+
+ASM_START()
+ TEXT
+
+PROLOGUE(mpn_lshiftc)
+deflit(`FRAME',0)
+ mov PARAM_CNT, cnt
+ mov PARAM_SIZE, %edx
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+
+ lea -4(up,%edx,4), up
+ mov %ebx, SAVE_EBX
More information about the gmp-commit
mailing list