[Gmp-commit] /var/hg/gmp: Small x86/atom adjustement.

mercurial at gmplib.org mercurial at gmplib.org
Thu Feb 17 11:09:25 CET 2011


details:   /var/hg/gmp/rev/22231ea25f79
changeset: 13860:22231ea25f79
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Feb 17 11:09:18 2011 +0100
description:
Small x86/atom adjustement.

diffstat:

 ChangeLog                   |   5 +++++
 mpn/x86/atom/aorrlshC_n.asm |  28 ++++++++++++++--------------
 mpn/x86/atom/aorsmul_1.asm  |  30 +++++++++++++++---------------
 3 files changed, 34 insertions(+), 29 deletions(-)

diffs (200 lines):

diff -r df3505e3da32 -r 22231ea25f79 ChangeLog
--- a/ChangeLog	Wed Feb 16 21:55:05 2011 +0100
+++ b/ChangeLog	Thu Feb 17 11:09:18 2011 +0100
@@ -1,3 +1,8 @@
+2011-02-17 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/x86/atom/aorsmul_1.asm: Small improvements for small sizes.
+	* mpn/x86/atom/aorrlshC_n.asm: Tiny size improvements.
+
 2011-02-16  Torbjorn Granlund  <tege at gmplib.org>
 
 	* configure.in: Fix k8/k10 32-bit path setup problem.
diff -r df3505e3da32 -r 22231ea25f79 mpn/x86/atom/aorrlshC_n.asm
--- a/mpn/x86/atom/aorrlshC_n.asm	Wed Feb 16 21:55:05 2011 +0100
+++ b/mpn/x86/atom/aorrlshC_n.asm	Thu Feb 17 11:09:18 2011 +0100
@@ -38,14 +38,14 @@
 
 dnl  re-use parameter space
 define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_DBLD')
+define(SAVE_EBP,`PARAM_DBLD')
 define(SAVE_VP,`PARAM_SRC')
 define(SAVE_UP,`PARAM_DST')
 
 define(M, eval(m4_lshift(1,LSH)))
 define(`rp',  `%edi')
 define(`up',  `%esi')
-define(`vp',  `%ebp')
+define(`vp',  `%ebx')
 
 ASM_START()
 	TEXT
@@ -75,13 +75,13 @@
 	incl	%ecx			C size + 1
 	mov	PARAM_SRC, up
 	mov	vp, SAVE_VP
-	shr	$1, %ecx		C (size+1)\2
+	shr	%ecx			C (size+1)\2
 	mov	PARAM_DBLD, vp
-	mov	%ebx, SAVE_EBX
+	mov	%ebp, SAVE_EBP
 	mov	%ecx, VAR_COUNT
 	jnc	L(entry)		C size odd
 
-	shr	$1, %edx		C size even
+	shr	%edx			C size even
 	mov	(vp), %ecx
 	lea	4(vp), vp
 	lea	(%eax,%ecx,M), %edx
@@ -92,14 +92,14 @@
 
 	ALIGN(16)
 L(oop):
-	lea	(%eax,%ecx,M), %ebx
+	lea	(%eax,%ecx,M), %ebp
 	shr	$RSH, %ecx
 	mov	4(vp), %eax
+	shr	%edx
 	lea 	8(vp), vp
-	shr	$1, %edx
-	M4_inst	(up), %ebx
+	M4_inst	(up), %ebp
 	lea	(%ecx,%eax,M), %edx
-	mov	%ebx, (rp)
+	mov	%ebp, (rp)
 L(enteven):
 	M4_inst	4(up), %edx
 	lea	8(up), up
@@ -112,16 +112,16 @@
 	decl	VAR_COUNT
 	jnz	L(oop)
 
-	lea	(%eax,%ecx,M), %ebx
+	lea	(%eax,%ecx,M), %ebp
 	shr	$RSH, %ecx
-	shr	$1, %edx
+	shr	%edx
 	mov	SAVE_VP, vp
-	M4_inst	(up), %ebx
+	M4_inst	(up), %ebp
 	mov	%ecx, %eax
 	mov	SAVE_UP, up
 	M4_inst	$0, %eax
-	mov	%ebx, (rp)
-	mov	SAVE_EBX, %ebx
+	mov	%ebp, (rp)
+	mov	SAVE_EBP, %ebp
 	pop	rp			FRAME_popl()
 	ret
 EPILOGUE()
diff -r df3505e3da32 -r 22231ea25f79 mpn/x86/atom/aorsmul_1.asm
--- a/mpn/x86/atom/aorsmul_1.asm	Wed Feb 16 21:55:05 2011 +0100
+++ b/mpn/x86/atom/aorsmul_1.asm	Thu Feb 17 11:09:18 2011 +0100
@@ -1,6 +1,6 @@
-dnl  AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl  Intel Atom mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
 
-dnl  Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -91,14 +91,15 @@
 defframe(SAVE_EBP, -16)
 deflit(SAVE_SIZE, 16)
 
+ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(M4_function_1)
 	movl	PARAM_SIZE, %edx
-	movl	PARAM_SRC, %eax
 	xorl	%ecx, %ecx
 
 	decl	%edx
+	movl	PARAM_SRC, %eax
 	jnz	L(start_1)
 
 	movl	(%eax), %eax
@@ -107,8 +108,8 @@
 	mull	PARAM_MULTIPLIER
 
 	M4_inst	%eax, (%ecx)
-	adcl	$0, %edx
 	movl	%edx, %eax
+	adcl	$0, %eax
 
 	ret
 EPILOGUE()
@@ -116,9 +117,9 @@
 	ALIGN(16)
 PROLOGUE(M4_function_1c)
 	movl	PARAM_SIZE, %edx
-	movl	PARAM_SRC, %eax
 
 	decl	%edx
+	movl	PARAM_SRC, %eax
 	jnz	L(more_than_one_limb)
 
 	movl	(%eax), %eax
@@ -131,8 +132,8 @@
 	adcl	$0, %edx
 	M4_inst	%eax, (%ecx)
 
-	adcl	$0, %edx
 	movl	%edx, %eax
+	adcl	$0, %eax
 
 	ret
 
@@ -148,10 +149,10 @@
 deflit(`FRAME',16)
 
 	movl	%ebx, SAVE_EBX
+	movl	%edx, %ebx	C size-1
 	movl	%esi, SAVE_ESI
-	movl	%edx, %ebx	C size-1
 
-	movl	PARAM_SRC, %esi
+	movl	%eax, %esi
 	movl	%ebp, SAVE_EBP
 	cmpl	$UNROLL_THRESHOLD, %edx
 
@@ -165,10 +166,6 @@
 
 	C simple loop
 
-	leal	4(%esi,%ebx,4), %esi	C point one limb past last
-	leal	(%edi,%ebx,4), %edi	C point at last limb
-	negl	%ebx
-
 	C The movl to load the next source limb is done well ahead of the
 	C mul.  This is necessary for full speed, and leads to one limb
 	C handled separately at the end.
@@ -183,15 +180,17 @@
 	C ebp	multiplier
 
 	mull	%ebp
+	leal	4(%esi), %esi
 
 	addl	%eax, %ecx
 	adcl	$0, %edx
+	movl	(%esi), %eax
 
-	M4_inst	%ecx, (%edi,%ebx,4)
-	movl	(%esi,%ebx,4), %eax
+	M4_inst	%ecx, (%edi)
+	leal	4(%edi), %edi
 	adcl	$0, %edx
 
-	incl	%ebx
+	decl	%ebx
 	movl	%edx, %ecx
 	jnz	L(simple)
 
@@ -368,3 +367,4 @@
 	ret
 
 EPILOGUE()
+ASM_END()


More information about the gmp-commit mailing list