[Gmp-commit] /var/hg/gmp: Small x86/atom adjustement.
mercurial at gmplib.org
mercurial at gmplib.org
Thu Feb 17 11:09:25 CET 2011
details: /var/hg/gmp/rev/22231ea25f79
changeset: 13860:22231ea25f79
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Feb 17 11:09:18 2011 +0100
description:
Small x86/atom adjustement.
diffstat:
ChangeLog | 5 +++++
mpn/x86/atom/aorrlshC_n.asm | 28 ++++++++++++++--------------
mpn/x86/atom/aorsmul_1.asm | 30 +++++++++++++++---------------
3 files changed, 34 insertions(+), 29 deletions(-)
diffs (200 lines):
diff -r df3505e3da32 -r 22231ea25f79 ChangeLog
--- a/ChangeLog Wed Feb 16 21:55:05 2011 +0100
+++ b/ChangeLog Thu Feb 17 11:09:18 2011 +0100
@@ -1,3 +1,8 @@
+2011-02-17 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86/atom/aorsmul_1.asm: Small improvements for small sizes.
+ * mpn/x86/atom/aorrlshC_n.asm: Tiny size improvements.
+
2011-02-16 Torbjorn Granlund <tege at gmplib.org>
* configure.in: Fix k8/k10 32-bit path setup problem.
diff -r df3505e3da32 -r 22231ea25f79 mpn/x86/atom/aorrlshC_n.asm
--- a/mpn/x86/atom/aorrlshC_n.asm Wed Feb 16 21:55:05 2011 +0100
+++ b/mpn/x86/atom/aorrlshC_n.asm Thu Feb 17 11:09:18 2011 +0100
@@ -38,14 +38,14 @@
dnl re-use parameter space
define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_DBLD')
+define(SAVE_EBP,`PARAM_DBLD')
define(SAVE_VP,`PARAM_SRC')
define(SAVE_UP,`PARAM_DST')
define(M, eval(m4_lshift(1,LSH)))
define(`rp', `%edi')
define(`up', `%esi')
-define(`vp', `%ebp')
+define(`vp', `%ebx')
ASM_START()
TEXT
@@ -75,13 +75,13 @@
incl %ecx C size + 1
mov PARAM_SRC, up
mov vp, SAVE_VP
- shr $1, %ecx C (size+1)\2
+ shr %ecx C (size+1)\2
mov PARAM_DBLD, vp
- mov %ebx, SAVE_EBX
+ mov %ebp, SAVE_EBP
mov %ecx, VAR_COUNT
jnc L(entry) C size odd
- shr $1, %edx C size even
+ shr %edx C size even
mov (vp), %ecx
lea 4(vp), vp
lea (%eax,%ecx,M), %edx
@@ -92,14 +92,14 @@
ALIGN(16)
L(oop):
- lea (%eax,%ecx,M), %ebx
+ lea (%eax,%ecx,M), %ebp
shr $RSH, %ecx
mov 4(vp), %eax
+ shr %edx
lea 8(vp), vp
- shr $1, %edx
- M4_inst (up), %ebx
+ M4_inst (up), %ebp
lea (%ecx,%eax,M), %edx
- mov %ebx, (rp)
+ mov %ebp, (rp)
L(enteven):
M4_inst 4(up), %edx
lea 8(up), up
@@ -112,16 +112,16 @@
decl VAR_COUNT
jnz L(oop)
- lea (%eax,%ecx,M), %ebx
+ lea (%eax,%ecx,M), %ebp
shr $RSH, %ecx
- shr $1, %edx
+ shr %edx
mov SAVE_VP, vp
- M4_inst (up), %ebx
+ M4_inst (up), %ebp
mov %ecx, %eax
mov SAVE_UP, up
M4_inst $0, %eax
- mov %ebx, (rp)
- mov SAVE_EBX, %ebx
+ mov %ebp, (rp)
+ mov SAVE_EBP, %ebp
pop rp FRAME_popl()
ret
EPILOGUE()
diff -r df3505e3da32 -r 22231ea25f79 mpn/x86/atom/aorsmul_1.asm
--- a/mpn/x86/atom/aorsmul_1.asm Wed Feb 16 21:55:05 2011 +0100
+++ b/mpn/x86/atom/aorsmul_1.asm Thu Feb 17 11:09:18 2011 +0100
@@ -1,6 +1,6 @@
-dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl Intel Atom mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
-dnl Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -91,14 +91,15 @@
defframe(SAVE_EBP, -16)
deflit(SAVE_SIZE, 16)
+ASM_START()
TEXT
ALIGN(32)
PROLOGUE(M4_function_1)
movl PARAM_SIZE, %edx
- movl PARAM_SRC, %eax
xorl %ecx, %ecx
decl %edx
+ movl PARAM_SRC, %eax
jnz L(start_1)
movl (%eax), %eax
@@ -107,8 +108,8 @@
mull PARAM_MULTIPLIER
M4_inst %eax, (%ecx)
- adcl $0, %edx
movl %edx, %eax
+ adcl $0, %eax
ret
EPILOGUE()
@@ -116,9 +117,9 @@
ALIGN(16)
PROLOGUE(M4_function_1c)
movl PARAM_SIZE, %edx
- movl PARAM_SRC, %eax
decl %edx
+ movl PARAM_SRC, %eax
jnz L(more_than_one_limb)
movl (%eax), %eax
@@ -131,8 +132,8 @@
adcl $0, %edx
M4_inst %eax, (%ecx)
- adcl $0, %edx
movl %edx, %eax
+ adcl $0, %eax
ret
@@ -148,10 +149,10 @@
deflit(`FRAME',16)
movl %ebx, SAVE_EBX
+ movl %edx, %ebx C size-1
movl %esi, SAVE_ESI
- movl %edx, %ebx C size-1
- movl PARAM_SRC, %esi
+ movl %eax, %esi
movl %ebp, SAVE_EBP
cmpl $UNROLL_THRESHOLD, %edx
@@ -165,10 +166,6 @@
C simple loop
- leal 4(%esi,%ebx,4), %esi C point one limb past last
- leal (%edi,%ebx,4), %edi C point at last limb
- negl %ebx
-
C The movl to load the next source limb is done well ahead of the
C mul. This is necessary for full speed, and leads to one limb
C handled separately at the end.
@@ -183,15 +180,17 @@
C ebp multiplier
mull %ebp
+ leal 4(%esi), %esi
addl %eax, %ecx
adcl $0, %edx
+ movl (%esi), %eax
- M4_inst %ecx, (%edi,%ebx,4)
- movl (%esi,%ebx,4), %eax
+ M4_inst %ecx, (%edi)
+ leal 4(%edi), %edi
adcl $0, %edx
- incl %ebx
+ decl %ebx
movl %edx, %ecx
jnz L(simple)
@@ -368,3 +367,4 @@
ret
EPILOGUE()
+ASM_END()
More information about the gmp-commit
mailing list