[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Mar 1 19:27:53 CET 2011
details: /var/hg/gmp/rev/29211e0f75a1
changeset: 13959:29211e0f75a1
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 01 19:23:20 2011 +0100
description:
Swap entry insns to share more code between entry points.
details: /var/hg/gmp/rev/511149603f44
changeset: 13960:511149603f44
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 01 19:24:40 2011 +0100
description:
Add contributors.
details: /var/hg/gmp/rev/c1d59877007a
changeset: 13961:c1d59877007a
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 01 19:25:28 2011 +0100
description:
Correct cycle counts.
details: /var/hg/gmp/rev/ea8947c4bf1d
changeset: 13962:ea8947c4bf1d
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 01 19:26:39 2011 +0100
description:
Add contributors.
details: /var/hg/gmp/rev/e0c2725df0f7
changeset: 13963:e0c2725df0f7
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 01 19:27:49 2011 +0100
description:
Fix a comment typo.
diffstat:
ChangeLog | 4 ++++
mpn/x86/pentium4/sse2/addmul_1.asm | 23 +++++++++--------------
mpn/x86/pentium4/sse2/mul_1.asm | 31 +++++++++++++------------------
mpn/x86_64/atom/aorrlsh1_n.asm | 2 ++
mpn/x86_64/atom/aorrlsh2_n.asm | 2 ++
mpn/x86_64/atom/lshift.asm | 2 ++
mpn/x86_64/atom/lshiftc.asm | 2 ++
mpn/x86_64/atom/rshift.asm | 2 ++
mpn/x86_64/atom/sublsh1_n.asm | 2 ++
mpn/x86_64/logops_n.asm | 4 ++--
mpn/x86_64/mod_1_1.asm | 2 +-
11 files changed, 41 insertions(+), 35 deletions(-)
diffs (221 lines):
diff -r d8d1be62260c -r e0c2725df0f7 ChangeLog
--- a/ChangeLog Tue Mar 01 19:14:53 2011 +0100
+++ b/ChangeLog Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,9 @@
2011-03-01 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/x86/pentium4/sse2/mul_1.asm: Swap entry insns to share more code
+ between entry points.
+ * mpn/x86/pentium4/sse2/addmul_1.asm: Likewise.
+
* mpz/divegcd.c: Rewrite, as per Marc Glisse's suggestion. Also fix
problem with passing a longlong limb to a _ui function.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86/pentium4/sse2/addmul_1.asm
--- a/mpn/x86/pentium4/sse2/addmul_1.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86/pentium4/sse2/addmul_1.asm Tue Mar 01 19:27:49 2011 +0100
@@ -1,6 +1,6 @@
dnl mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-dnl Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -40,22 +40,13 @@
TEXT
ALIGN(16)
-PROLOGUE(mpn_addmul_1c)
- mov 4(%esp), %edx
+PROLOGUE(mpn_addmul_1)
+ pxor %mm6, %mm6
+L(ent): mov 4(%esp), %edx
mov 8(%esp), %eax
mov 12(%esp), %ecx
movd 16(%esp), %mm7
- movd 20(%esp), %mm6
- jmp L(ent)
-EPILOGUE()
- ALIGN(16)
-PROLOGUE(mpn_addmul_1)
- mov 4(%esp), %edx
- mov 8(%esp), %eax
- mov 12(%esp), %ecx
- movd 16(%esp), %mm7
- pxor %mm6, %mm6
-L(ent): cmp $4, %ecx
+ cmp $4, %ecx
jnc L(big)
L(lp0): movd (%eax), %mm0
@@ -181,3 +172,7 @@
emms
ret
EPILOGUE()
+PROLOGUE(mpn_addmul_1c)
+ movd 20(%esp), %mm6
+ jmp L(ent)
+EPILOGUE()
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86/pentium4/sse2/mul_1.asm
--- a/mpn/x86/pentium4/sse2/mul_1.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86/pentium4/sse2/mul_1.asm Tue Mar 01 19:27:49 2011 +0100
@@ -1,6 +1,6 @@
dnl mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-dnl Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -20,10 +20,6 @@
include(`../config.m4')
-C TODO:
-C * Tweak eax/edx offsets in loop as to save some lea's
-C * Perhaps software pipeline small-case code
-
C cycles/limb
C P6 model 0-8,10-12 -
C P6 model 9 (Banias) 4.17
@@ -32,6 +28,10 @@
C P4 model 2 (Northwood) 4
C P4 model 3-4 (Prescott) 4.55
+C TODO:
+C * Tweak eax/edx offsets in loop as to save some lea's
+C * Perhaps software pipeline small-case code
+
C INPUT PARAMETERS
C rp sp + 4
C up sp + 8
@@ -40,22 +40,13 @@
TEXT
ALIGN(16)
-PROLOGUE(mpn_mul_1c)
- mov 4(%esp), %edx
+PROLOGUE(mpn_mul_1)
+ pxor %mm6, %mm6
+L(ent): mov 4(%esp), %edx
mov 8(%esp), %eax
mov 12(%esp), %ecx
movd 16(%esp), %mm7
- movd 20(%esp), %mm6
- jmp L(ent)
-EPILOGUE()
- ALIGN(16)
-PROLOGUE(mpn_mul_1)
- mov 4(%esp), %edx
- mov 8(%esp), %eax
- mov 12(%esp), %ecx
- movd 16(%esp), %mm7
- pxor %mm6, %mm6
-L(ent): cmp $4, %ecx
+ cmp $4, %ecx
jnc L(big)
L(lp0): movd (%eax), %mm0
@@ -156,3 +147,7 @@
emms
ret
EPILOGUE()
+PROLOGUE(mpn_mul_1c)
+ movd 20(%esp), %mm6
+ jmp L(ent)
+EPILOGUE()
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/aorrlsh1_n.asm
--- a/mpn/x86_64/atom/aorrlsh1_n.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/aorrlsh1_n.asm Tue Mar 01 19:27:49 2011 +0100
@@ -2,6 +2,8 @@
dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
dnl Optimised for Intel Atom.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/aorrlsh2_n.asm
--- a/mpn/x86_64/atom/aorrlsh2_n.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/aorrlsh2_n.asm Tue Mar 01 19:27:49 2011 +0100
@@ -2,6 +2,8 @@
dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
dnl Optimised for Intel Atom.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/lshift.asm
--- a/mpn/x86_64/atom/lshift.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/lshift.asm Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
dnl AMD64 mpn_lshift -- mpn left shift, optimised for Atom.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/lshiftc.asm
--- a/mpn/x86_64/atom/lshiftc.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/lshiftc.asm Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
dnl AMD64 mpn_lshiftc -- mpn left shift with complement, optimised for Atom.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/rshift.asm
--- a/mpn/x86_64/atom/rshift.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/rshift.asm Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
dnl AMD64 mpn_rshift -- mpn right shift, optimised for Atom.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/atom/sublsh1_n.asm
--- a/mpn/x86_64/atom/sublsh1_n.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/atom/sublsh1_n.asm Tue Mar 01 19:27:49 2011 +0100
@@ -1,5 +1,7 @@
dnl AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1) optimised for Intel Atom.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/logops_n.asm
--- a/mpn/x86_64/logops_n.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/logops_n.asm Tue Mar 01 19:27:49 2011 +0100
@@ -21,8 +21,8 @@
C cycles/limb
-C AMD K8,K9 1.5
-C AMD K10 1.75-2 (fluctuating)
+C AMD K8,K9 1.5 with fluctuations for variant 2 and 3
+C AMD K10 1.5 with fluctuations for all variants
C Intel P4 2.8/3.35/3.60 (variant1/variant2/variant3)
C Intel core2 2
C Intel NHM 2
diff -r d8d1be62260c -r e0c2725df0f7 mpn/x86_64/mod_1_1.asm
--- a/mpn/x86_64/mod_1_1.asm Tue Mar 01 19:14:53 2011 +0100
+++ b/mpn/x86_64/mod_1_1.asm Tue Mar 01 19:27:49 2011 +0100
@@ -46,7 +46,7 @@
C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
C %rdi %rsi %rdx %rcx
C The pre array contains bi, cnt, B1modb, B2modb
-C Note: This implementaion needs B1modb only when cnt > 0
+C Note: This implementation needs B1modb only when cnt > 0
C The iteration is almost as follows,
C
More information about the gmp-commit
mailing list