[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Feb 26 17:05:53 CET 2011


details:   /var/hg/gmp/rev/ef472f66a4e0
changeset: 13909:ef472f66a4e0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Feb 26 17:04:48 2011 +0100
description:
>From Marco: Optimise non-loop code.

details:   /var/hg/gmp/rev/4a38cf2fd7c6
changeset: 13910:4a38cf2fd7c6
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Feb 26 17:04:55 2011 +0100
description:
*** empty log message ***

details:   /var/hg/gmp/rev/3588847580e7
changeset: 13911:3588847580e7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Feb 26 17:05:50 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                       |   9 +++++
 mpn/generic/mod_1_1.c           |   5 +++
 mpn/x86/atom/sse2/aorsmul_1.asm |  68 ++++++++++++++++++----------------------
 3 files changed, 45 insertions(+), 37 deletions(-)

diffs (148 lines):

diff -r 3ea54f250ae5 -r 3588847580e7 ChangeLog
--- a/ChangeLog	Fri Feb 25 21:42:43 2011 +0100
+++ b/ChangeLog	Sat Feb 26 17:05:50 2011 +0100
@@ -1,3 +1,12 @@
+2011-02-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/x86/atom/sse2/aorsmul_1.asm: Optimise non-loop code.
+
+2011-02-26  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/mode64/aorsmul_1.asm: Add missing MULFUNC_PROLOGUE.
+	* mpn/m68k/mc68020/aorsmul_1.asm: Likewise.
+
 2011-02-25  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86/atom/sse2/aorsmul_1.asm: New file.
diff -r 3ea54f250ae5 -r 3588847580e7 mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c	Fri Feb 25 21:42:43 2011 +0100
+++ b/mpn/generic/mod_1_1.c	Sat Feb 26 17:05:50 2011 +0100
@@ -50,6 +50,11 @@
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
   cps[2] = B1modb >> cnt;
 
+  /* In the normalized case, this can be simplified to
+   *
+   *   B2modb = - b * bi;
+   *   ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+   */
   udiv_rnd_preinv (B2modb, B1modb, b, bi);
   cps[3] = B2modb >> cnt;
 }
diff -r 3ea54f250ae5 -r 3588847580e7 mpn/x86/atom/sse2/aorsmul_1.asm
--- a/mpn/x86/atom/sse2/aorsmul_1.asm	Fri Feb 25 21:42:43 2011 +0100
+++ b/mpn/x86/atom/sse2/aorsmul_1.asm	Sat Feb 26 17:05:50 2011 +0100
@@ -1,6 +1,6 @@
 dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
 dnl
 dnl  Copyright 2011 Free Software Foundation, Inc.
 dnl
@@ -62,51 +62,46 @@
 	push	%ebx
 	mov	16(%esp), rp
 	mov	20(%esp), up
-	mov	24(%esp), n
+	mov	24(%esp), %eax
 	movd	28(%esp), %mm7
-	mov	n, %eax
+	mov	%eax, n
+	and	$1, %eax
+	jz	L(fi0or2)
+	movd	(up), %mm1
+	pmuludq	%mm7, %mm1
 	shr	$2, n
-	and	$3, %eax
-	jz	L(fi0)
-	cmp	$2, %eax
-	jc	L(fi1)
-	jz	L(fi2)
+	jnc	L(fi1)
 
-L(fi3):	lea	-12(rp), rp
-	movd	(up), %mm1
-	lea	4(up), up
-	pmuludq	%mm7, %mm1
+L(fi3):	lea	4(up), up
+	lea	-12(rp), rp
 	movd	%mm1, %ebx
+	add	$1, n			C increment and clear carry
 	movd	(up), %mm0
-	inc	n
 	jmp	L(lo3)
 
-L(fi0):	lea	-8(rp), rp
+L(fi1):	lea	-4(rp), rp
+	movd	%mm1, %ebx
+	jz	L(wd1)
+	movd	4(up), %mm0
+	lea	-4(up), up
+	pmuludq	%mm7, %mm0
+	jmp	L(lo1)
+
+L(fi0or2):
 	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	shr	$2, n
+	movd	4(up), %mm1
+	jc	L(fi2)
 	lea	-8(up), up
-	pmuludq	%mm7, %mm0
+	lea	-8(rp), rp
 	movd	%mm0, %eax
-	movd	12(up), %mm1
 	pmuludq	%mm7, %mm1
 	jmp	L(lo0)
 
-L(fi1):	lea	-4(rp), rp
-	movd	(up), %mm1
-	lea	-4(up), up
-	pmuludq	%mm7, %mm1
-	movd	%mm1, %ebx
-	test	n, n
-	jz	L(wd1)
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	jmp	L(lo1)
-
-L(fi2):	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	4(up), %mm1
+L(fi2):	test	n, n			C clear carry
 	movd	%mm0, %eax
 	pmuludq	%mm7, %mm1
-	test	n, n
 	jnz	L(lo2)
 	jmp	L(wd2)
 
@@ -148,7 +143,7 @@
 	movd	4(up), %mm1
 	jnz	L(top)
 
-L(end):	adc	$0, %edx
+L(end):	adc	n, %edx			C n is zero here
 	ADDSUB	%ebx, 12(rp)
 	movd	%mm0, %eax
 	pmuludq	%mm7, %mm1
@@ -157,15 +152,14 @@
 	adc	%edx, %eax
 	movd	%mm0, %edx
 	movd	%mm1, %ebx
-	adc	$0, %edx
+	adc	n, %edx
 	ADDSUB	%eax, (rp)
 L(wd1):	psrlq	$32, %mm1
 	adc	%edx, %ebx
-	movd	%mm1, %edx
-	adc	$0, %edx
+	movd	%mm1, %eax
+	adc	n, %eax
 	ADDSUB	%ebx, 4(rp)
-	mov	%edx, %eax
-	adc	$0, %eax
+	adc	n, %eax
 	emms
 	pop	%ebx
 	pop	%esi


More information about the gmp-commit mailing list