[Gmp-commit] /var/hg/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Mar 1 19:27:53 CET 2011


details:   /var/hg/gmp/rev/29211e0f75a1
changeset: 13959:29211e0f75a1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Mar 01 19:23:20 2011 +0100
description:
Swap entry insns to share more code between entry points.

details:   /var/hg/gmp/rev/511149603f44
changeset: 13960:511149603f44
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Mar 01 19:24:40 2011 +0100
description:
Add contributors.

details:   /var/hg/gmp/rev/c1d59877007a
changeset: 13961:c1d59877007a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Mar 01 19:25:28 2011 +0100
description:
Correct cycle counts.

details:   /var/hg/gmp/rev/ea8947c4bf1d
changeset: 13962:ea8947c4bf1d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Mar 01 19:26:39 2011 +0100
description:
Add contributors.

details:   /var/hg/gmp/rev/e0c2725df0f7
changeset: 13963:e0c2725df0f7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Mar 01 19:27:49 2011 +0100
description:
Fix a comment typo.

diffstat:

 ChangeLog                          |   4 ++++
 mpn/x86/pentium4/sse2/addmul_1.asm |  23 +++++++++--------------
 mpn/x86/pentium4/sse2/mul_1.asm    |  31 +++++++++++++------------------
 mpn/x86_64/atom/aorrlsh1_n.asm     |   2 ++
 mpn/x86_64/atom/aorrlsh2_n.asm     |   2 ++
 mpn/x86_64/atom/lshift.asm         |   2 ++
 mpn/x86_64/atom/lshiftc.asm        |   2 ++
 mpn/x86_64/atom/rshift.asm         |   2 ++
 mpn/x86_64/atom/sublsh1_n.asm      |   2 ++
 mpn/x86_64/logops_n.asm            |   4 ++--
 mpn/x86_64/mod_1_1.asm             |   2 +-
 11 files changed, 41 insertions(+), 35 deletions(-)

diffs (221 lines):

diff -r d8d1be62260c -r e0c2725df0f7 ChangeLog
--- a/ChangeLog	Tue Mar 01 19:14:53 2011 +0100
+++ b/ChangeLog	Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,9 @@
 2011-03-01  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/x86/pentium4/sse2/mul_1.asm: Swap entry insns to share more code
+	between entry points.
+	* mpn/x86/pentium4/sse2/addmul_1.asm: Likewise.
+
 	* mpz/divegcd.c: Rewrite, as per Marc Glisse's suggestion.  Also fix
 	problem with passing a longlong limb to a _ui function.
 
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86/pentium4/sse2/addmul_1.asm
--- a/mpn/x86/pentium4/sse2/addmul_1.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86/pentium4/sse2/addmul_1.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -1,6 +1,6 @@
 dnl  mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
 
-dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -40,22 +40,13 @@
 
 	TEXT
 	ALIGN(16)
-PROLOGUE(mpn_addmul_1c)
-	mov	4(%esp), %edx
+PROLOGUE(mpn_addmul_1)
+	pxor	%mm6, %mm6
+L(ent):	mov	4(%esp), %edx
 	mov	8(%esp), %eax
 	mov	12(%esp), %ecx
 	movd	16(%esp), %mm7
-	movd	20(%esp), %mm6
-	jmp	L(ent)
-EPILOGUE()
-	ALIGN(16)
-PROLOGUE(mpn_addmul_1)
-	mov	4(%esp), %edx
-	mov	8(%esp), %eax
-	mov	12(%esp), %ecx
-	movd	16(%esp), %mm7
-	pxor	%mm6, %mm6
-L(ent):	cmp	$4, %ecx
+	cmp	$4, %ecx
 	jnc	L(big)
 
 L(lp0):	movd	(%eax), %mm0
@@ -181,3 +172,7 @@
 	emms
 	ret
 EPILOGUE()
+PROLOGUE(mpn_addmul_1c)
+	movd	20(%esp), %mm6
+	jmp	L(ent)
+EPILOGUE()
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86/pentium4/sse2/mul_1.asm
--- a/mpn/x86/pentium4/sse2/mul_1.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86/pentium4/sse2/mul_1.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -1,6 +1,6 @@
 dnl  mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
 
-dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -20,10 +20,6 @@
 
 include(`../config.m4')
 
-C TODO:
-C  * Tweak eax/edx offsets in loop as to save some lea's
-C  * Perhaps software pipeline small-case code
-
 C                           cycles/limb
 C P6 model 0-8,10-12		-
 C P6 model 9   (Banias)		4.17
@@ -32,6 +28,10 @@
 C P4 model 2   (Northwood)	4
 C P4 model 3-4 (Prescott)	4.55
 
+C TODO:
+C  * Tweak eax/edx offsets in loop as to save some lea's
+C  * Perhaps software pipeline small-case code
+
 C INPUT PARAMETERS
 C rp		sp + 4
 C up		sp + 8
@@ -40,22 +40,13 @@
 
 	TEXT
 	ALIGN(16)
-PROLOGUE(mpn_mul_1c)
-	mov	4(%esp), %edx
+PROLOGUE(mpn_mul_1)
+	pxor	%mm6, %mm6
+L(ent):	mov	4(%esp), %edx
 	mov	8(%esp), %eax
 	mov	12(%esp), %ecx
 	movd	16(%esp), %mm7
-	movd	20(%esp), %mm6
-	jmp	L(ent)
-EPILOGUE()
-	ALIGN(16)
-PROLOGUE(mpn_mul_1)
-	mov	4(%esp), %edx
-	mov	8(%esp), %eax
-	mov	12(%esp), %ecx
-	movd	16(%esp), %mm7
-	pxor	%mm6, %mm6
-L(ent):	cmp	$4, %ecx
+	cmp	$4, %ecx
 	jnc	L(big)
 
 L(lp0):	movd	(%eax), %mm0
@@ -156,3 +147,7 @@
 	emms
 	ret
 EPILOGUE()
+PROLOGUE(mpn_mul_1c)
+	movd	20(%esp), %mm6
+	jmp	L(ent)
+EPILOGUE()
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/aorrlsh1_n.asm
--- a/mpn/x86_64/atom/aorrlsh1_n.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/aorrlsh1_n.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -2,6 +2,8 @@
 dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
 dnl  Optimised for Intel Atom.
 
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
 dnl  Copyright 2011 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/aorrlsh2_n.asm
--- a/mpn/x86_64/atom/aorrlsh2_n.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/aorrlsh2_n.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -2,6 +2,8 @@
 dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
 dnl  Optimised for Intel Atom.
 
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
 dnl  Copyright 2011 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/lshift.asm
--- a/mpn/x86_64/atom/lshift.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/lshift.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
 dnl  AMD64 mpn_lshift -- mpn left shift, optimised for Atom.
 
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
 dnl  Copyright 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/lshiftc.asm
--- a/mpn/x86_64/atom/lshiftc.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/lshiftc.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
 dnl  AMD64 mpn_lshiftc -- mpn left shift with complement, optimised for Atom.
 
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
 dnl  Copyright 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/rshift.asm
--- a/mpn/x86_64/atom/rshift.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/rshift.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
 dnl  AMD64 mpn_rshift -- mpn right shift, optimised for Atom.
 
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
 dnl  Copyright 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/sublsh1_n.asm
--- a/mpn/x86_64/atom/sublsh1_n.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/sublsh1_n.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
 dnl  AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1) optimised for Intel Atom.
 
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
 dnl  Copyright 2011 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/logops_n.asm
--- a/mpn/x86_64/logops_n.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/logops_n.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -21,8 +21,8 @@
 
 
 C	     cycles/limb
-C AMD K8,K9	 1.5
-C AMD K10	 1.75-2 (fluctuating)
+C AMD K8,K9	 1.5	with fluctuations for variant 2 and 3
+C AMD K10	 1.5	with fluctuations for all variants
 C Intel P4	 2.8/3.35/3.60 (variant1/variant2/variant3)
 C Intel core2	 2
 C Intel NHM	 2
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/mod_1_1.asm
--- a/mpn/x86_64/mod_1_1.asm	Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/mod_1_1.asm	Tue Mar 01 19:27:49 2011 +0100
@@ -46,7 +46,7 @@
 C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
 C                       %rdi         %rsi         %rdx                %rcx
 C The pre array contains bi, cnt, B1modb, B2modb
-C Note: This implementaion needs B1modb only when cnt > 0
+C Note: This implementation needs B1modb only when cnt > 0
 
 C The iteration is almost as follows,
 C


More information about the gmp-commit mailing list