[Gmp-commit] /home/hgfiles/gmp-5.0: Rewrite not to rely on ZF after 'bt' insn.

mercurial at gmplib.org mercurial at gmplib.org
Sat Nov 6 19:36:23 CET 2010


details:   /home/hgfiles/gmp-5.0/rev/d38bf82a975e
changeset: 13435:d38bf82a975e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Nov 06 19:36:19 2010 +0100
description:
Rewrite not to rely on ZF after 'bt' insn.

diffstat:

 ChangeLog             |   4 ++
 mpn/x86_64/aors_n.asm |  90 +++++++++++++++++++++++++++-----------------------
 2 files changed, 52 insertions(+), 42 deletions(-)

diffs (144 lines):

diff -r 1878a7b7e66e -r d38bf82a975e ChangeLog
--- a/ChangeLog	Fri May 14 11:40:09 2010 +0200
+++ b/ChangeLog	Sat Nov 06 19:36:19 2010 +0100
@@ -1,3 +1,7 @@
+2010-11-06  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/aors_n.asm: Rewrite not to rely on ZF after 'bt' insn.
+
 2010-05-14  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/redc_2.c: Use asm code just for GNU C.
diff -r 1878a7b7e66e -r d38bf82a975e mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm	Fri May 14 11:40:09 2010 +0200
+++ b/mpn/x86_64/aors_n.asm	Sat Nov 06 19:36:19 2010 +0100
@@ -1,6 +1,7 @@
 dnl  AMD64 mpn_add_n, mpn_sub_n
 
-dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,12 +21,13 @@
 include(`../config.m4')
 
 C	     cycles/limb
-C K8,K9:	 1.5
-C K10:		 1.5
-C P4:		 ?
-C P6 core2: 	 4.9
-C P6 corei7:
-C P6 atom:	 4
+C AMD K8,K9	 1.5
+C AMD K10	 1.5
+C Intel P4	 ?
+C Intel core2 	 4.9
+C Intel corei	 ?
+C Intel atom	 4
+C VIA nano	 3.25
 
 C The inner loop of this code is the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
@@ -53,24 +55,57 @@
 	ALIGN(16)
 PROLOGUE(func_nc)
 	mov	R32(n), R32(%rax)
+	shr	$2, n
 	and	$3, R32(%rax)
-	shr	$2, n
 	bt	$0, %r8			C cy flag <- carry parameter
-	jz	L(1)
-	jmp	L(ent)
+	jrcxz	L(lt4)
+
+	mov	(up), %r8
+	mov	8(up), %r9
+	dec	n
+	jmp	L(mid)
+
 EPILOGUE()
 	ALIGN(16)
 PROLOGUE(func)
 	mov	R32(n), R32(%rax)
 	shr	$2, n
-	jz	L(0)
 	and	$3, R32(%rax)
+	jrcxz	L(lt4)
 
-L(ent):	mov	(up), %r8
+	mov	(up), %r8
 	mov	8(up), %r9
 	dec	n
 	jmp	L(mid)
 
+L(lt4):	dec	R32(%rax)
+	mov	(up), %r8
+	jnz	L(2)
+	ADCSBB	(vp), %r8
+	mov	%r8, (rp)
+	adc	%eax, %eax
+	ret
+
+L(2):	dec	R32(%rax)
+	mov	8(up), %r9
+	jnz	L(3)
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	adc	%eax, %eax
+	ret
+
+L(3):	mov	16(up), %r10
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	ADCSBB	16(vp), %r10
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	setc	R8(%rax)
+	ret
+
 	ALIGN(16)
 L(top):	ADCSBB	(vp), %r8
 	ADCSBB	8(vp), %r9
@@ -104,36 +139,7 @@
 
 	inc	R32(%rax)
 	dec	R32(%rax)
-	jnz	L(1)
+	jnz	L(lt4)
 	adc	%eax, %eax
 	ret
-
-L(0):	test	R32(%rax), R32(%rax)
-L(1):	dec	R32(%rax)
-	mov	(up), %r8
-	jnz	L(2)
-	ADCSBB	(vp), %r8
-	mov	%r8, (rp)
-	adc	%eax, %eax
-	ret
-
-L(2):	dec	R32(%rax)
-	mov	8(up), %r9
-	jnz	L(3)
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	adc	%eax, %eax
-	ret
-
-L(3):	mov	16(up), %r10
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-	ADCSBB	16(vp), %r10
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	mov	%r10, 16(rp)
-	setc	%al
-	ret
 EPILOGUE()


More information about the gmp-commit mailing list