[Gmp-commit] /var/hg/gmp: Shorten critical path.
mercurial at gmplib.org
mercurial at gmplib.org
Tue Mar 13 20:24:43 CET 2012
details: /var/hg/gmp/rev/d16912b36656
changeset: 14756:d16912b36656
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 13 20:24:40 2012 +0100
description:
Shorten critical path.
diffstat:
ChangeLog | 2 ++
mpn/x86_64/core2/gcd_1.asm | 41 +++++++++++++++++++++--------------------
2 files changed, 23 insertions(+), 20 deletions(-)
diffs (81 lines):
diff -r 090afe7a7fcd -r d16912b36656 ChangeLog
--- a/ChangeLog Tue Mar 13 15:57:10 2012 +0100
+++ b/ChangeLog Tue Mar 13 20:24:40 2012 +0100
@@ -2,6 +2,8 @@
* mpn/x86_64/core2/gcd_1.asm: Add hack to support fat builds.
+ * mpn/x86_64/core2/gcd_1.asm: Shorten critical path.
+
2012-03-12 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/core2/gcd_1.asm: New file.
diff -r 090afe7a7fcd -r d16912b36656 mpn/x86_64/core2/gcd_1.asm
--- a/mpn/x86_64/core2/gcd_1.asm Tue Mar 13 15:57:10 2012 +0100
+++ b/mpn/x86_64/core2/gcd_1.asm Tue Mar 13 20:24:40 2012 +0100
@@ -28,16 +28,18 @@
C AMD K8,K9 8.5
C AMD K10 5
C AMD bd1 5
-C AMD bobcat 11
-C Intel P4 24
-C Intel core2 5.5
-C Intel NHM 6
-C Intel SBR 6
+C AMD bobcat 10
+C Intel P4 18
+C Intel core2 4.3
+C Intel NHM 5
+C Intel SBR 5
C Intel atom 17
-C VIA nano 6.5
-
+C VIA nano 5.3
C Numbers measured with: speed -CD -s1-64 mpn_gcd_1
+C TODO
+C * Optimise inner-loop for specific CPUs. The code relies too much on OoO
+C execution.
C INPUT PARAMETERS
define(`up', `%rdi')
@@ -93,27 +95,26 @@
pop %rdx
pop %r8
+ bsf %rax, %rcx
+
test %rax, %rax
-
- mov %rax, %rcx
jnz L(mid)
- mov %rdx, %rax
jmp L(done)
- ALIGN(16) C K10 C2 NHM SBR
-L(top): cmovc %r10, %rax C if x-y carried 0,7 0,6 0,7 0
- cmovc %rcx, %rdx C use x,y-x 0 1 1 1
-L(mid): bsf %rax, %rcx C 1 2 2 2
- mov %rdx, %r10 C 1 3 3 3
- shr R8(%rcx), %rax C 5 4 5 5
- mov %rax, %rcx C 6 5 6 7
- sub %rax, %r10 C 6 5 6 7
- sub %rdx, %rax C 6 5 6 7
+ ALIGN(16) C K10 BD C2 NHM SBR
+L(top): cmovc %r10, %rax C if x-y < 0 0,6 0,5 0,6 0,5 0,6
+ cmovc %r9, %rdx C use x,y-x 0 0 2 1 1
+ bsf %r10, %rcx C 0 0 0 0 0
+L(mid): mov %rdx, %r10 C 1 1 4 3 3
+ shr R8(%rcx), %rax C 4 3 2 3 3
+ mov %rax, %r9 C 5 4 3 4 5
+ sub %rax, %r10 C 5 4 5 4 5
+ sub %rdx, %rax C 5 4 4 4 5
jnz L(top) C
- mov %rcx, %rax
L(done):
+ mov %rdx, %rax
mov %r8, %rcx
shl R8(%rcx), %rax
DOS64_EXIT()
More information about the gmp-commit
mailing list