[Gmp-commit] /home/hgfiles/gmp-4.3: Don't rely on ZF after 'bt' insn.

mercurial at gmplib.org mercurial at gmplib.org
Thu Nov 11 14:10:03 CET 2010


details:   /home/hgfiles/gmp-4.3/rev/1a2742d53add
changeset: 12553:1a2742d53add
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Nov 11 14:09:58 2010 +0100
description:
Don't rely on ZF after 'bt' insn.

diffstat:

 ChangeLog                  |   7 ++++
 mpn/x86_64/aors_n.asm      |  77 ++++++++++++++++++++++++---------------------
 mpn/x86_64/atom/aors_n.asm |  23 +++++++-----
 3 files changed, 61 insertions(+), 46 deletions(-)

diffs (197 lines):

diff -r 23bc00ec2a06 -r 1a2742d53add ChangeLog
--- a/ChangeLog	Thu Nov 11 12:02:20 2010 +0100
+++ b/ChangeLog	Thu Nov 11 14:09:58 2010 +0100
@@ -1,8 +1,15 @@
 2010-11-11  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/x86_64/atom/aors_n.asm: Don't rely on ZF after 'bt' insn.
+	Use 64-bit 'test' to support operands of 2^32 limbs and more.
+
 	* doc/gmp.texi (Low-level Functions): Remove documentation for
 	non-existing function mpn_sqr_n.
 
+2010-11-06  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/aors_n.asm: Rewrite not to rely on ZF after 'bt' insn.
+
 2010-02-08  Marco Bodrato  <bodrato at mail.dm.unipi.it>
 
 	* mpn/x86/atom/gmp-mparam.h: New file.
diff -r 23bc00ec2a06 -r 1a2742d53add mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm	Thu Nov 11 12:02:20 2010 +0100
+++ b/mpn/x86_64/aors_n.asm	Thu Nov 11 14:09:58 2010 +0100
@@ -1,6 +1,7 @@
 dnl  AMD64 mpn_add_n, mpn_sub_n
 
-dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -52,24 +53,57 @@
 	ALIGN(16)
 PROLOGUE(func_nc)
 	mov	R32(n), R32(%rax)
+	shr	$2, n
 	and	$3, R32(%rax)
-	shr	$2, n
 	bt	$0, %r8			C cy flag <- carry parameter
-	jz	L(1)
-	jmp	L(ent)
+	jrcxz	L(lt4)
+
+	mov	(up), %r8
+	mov	8(up), %r9
+	dec	n
+	jmp	L(mid)
+
 EPILOGUE()
 	ALIGN(16)
 PROLOGUE(func)
 	mov	R32(n), R32(%rax)
 	shr	$2, n
-	jz	L(0)
 	and	$3, R32(%rax)
+	jrcxz	L(lt4)
 
-L(ent):	mov	(up), %r8
+	mov	(up), %r8
 	mov	8(up), %r9
 	dec	n
 	jmp	L(mid)
 
+L(lt4):	dec	R32(%rax)
+	mov	(up), %r8
+	jnz	L(2)
+	ADCSBB	(vp), %r8
+	mov	%r8, (rp)
+	adc	%eax, %eax
+	ret
+
+L(2):	dec	R32(%rax)
+	mov	8(up), %r9
+	jnz	L(3)
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	adc	%eax, %eax
+	ret
+
+L(3):	mov	16(up), %r10
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	ADCSBB	16(vp), %r10
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	setc	R8(%rax)
+	ret
+
 	ALIGN(16)
 L(top):	ADCSBB	(vp), %r8
 	ADCSBB	8(vp), %r9
@@ -103,36 +137,7 @@
 
 	inc	R32(%rax)
 	dec	R32(%rax)
-	jnz	L(1)
+	jnz	L(lt4)
 	adc	%eax, %eax
 	ret
-
-L(0):	test	R32(%rax), R32(%rax)
-L(1):	dec	R32(%rax)
-	mov	(up), %r8
-	jnz	L(2)
-	ADCSBB	(vp), %r8
-	mov	%r8, (rp)
-	adc	%eax, %eax
-	ret
-
-L(2):	dec	R32(%rax)
-	mov	8(up), %r9
-	jnz	L(3)
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	adc	%eax, %eax
-	ret
-
-L(3):	mov	16(up), %r10
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-	ADCSBB	16(vp), %r10
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	mov	%r10, 16(rp)
-	setc	%al
-	ret
 EPILOGUE()
diff -r 23bc00ec2a06 -r 1a2742d53add mpn/x86_64/atom/aors_n.asm
--- a/mpn/x86_64/atom/aors_n.asm	Thu Nov 11 12:02:20 2010 +0100
+++ b/mpn/x86_64/atom/aors_n.asm	Thu Nov 11 14:09:58 2010 +0100
@@ -1,6 +1,6 @@
 dnl  X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
 
-dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -63,15 +63,16 @@
 	jg	L(b3)
 
 L(b1):	mov	(%rsi), %r10
-	test	R32(%rcx), R32(%rcx)
-	bt	$0, R32(%r8)
+	test	%rcx, %rcx
 	jnz	L(gt1)
+	shr	R32(%r8)			C Set CF from argument
 	ADCSBB	(%rdx), %r10
 	mov	%r10, (%rdi)
 	mov	R32(%rcx), R32(%rax)		C zero rax
 	adc	R32(%rax), R32(%rax)
 	ret
-L(gt1):	ADCSBB	(%rdx), %r10
+L(gt1):	shr	R32(%r8)
+	ADCSBB	(%rdx), %r10
 	mov	8(%rsi), %r11
 	lea	16(%rsi), %rsi
 	lea	-16(%rdx), %rdx
@@ -81,12 +82,13 @@
 L(b2):	mov	(%rsi), %r9
 	mov	8(%rsi), %r10
 	lea	-8(%rdx), %rdx
-	test	R32(%rcx), R32(%rcx)
-	bt	$0, R32(%r8)
+	test	%rcx, %rcx
 	jnz	L(gt2)
+	shr	R32(%r8)
 	lea	-40(%rdi), %rdi
 	jmp	L(e2)
-L(gt2):	ADCSBB	8(%rdx), %r9
+L(gt2):	shr	R32(%r8)
+	ADCSBB	8(%rdx), %r9
 	mov	16(%rsi), %r11
 	lea	-8(%rsi), %rsi
 	lea	-8(%rdi), %rdi
@@ -95,12 +97,13 @@
 L(b3):	mov	(%rsi), %rax
 	mov	8(%rsi), %r9
 	mov	16(%rsi), %r10
-	test	R32(%rcx), R32(%rcx)
-	bt	$0, %r8
+	test	%rcx, %rcx
 	jnz	L(gt3)
+	shr	R32(%r8)
 	lea	-32(%rdi), %rdi
 	jmp	L(e3)
-L(gt3):	ADCSBB	(%rdx), %rax
+L(gt3):	shr	R32(%r8)
+	ADCSBB	(%rdx), %rax
 	jmp	L(m3)
 
 L(b0):	mov	(%rsi), %r11


More information about the gmp-commit mailing list