[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Aug 22 13:24:34 UTC 2019
details: /var/hg/gmp/rev/eecab8f8f3a3
changeset: 17835:eecab8f8f3a3
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 22 15:09:51 2019 +0200
description:
Make sure rdx is zero on return to benefit gcd_22's private calls.
Make the gcd_11 files more similar in register use.
details: /var/hg/gmp/rev/643c931da9bd
changeset: 17836:643c931da9bd
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 22 15:14:09 2019 +0200
description:
Remove as it was never beneficial.
diffstat:
mpn/x86_64/bd2/gcd_11.asm | 39 +++++++++--------
mpn/x86_64/bd4/gcd_11.asm | 38 ++++++++--------
mpn/x86_64/core2/gcd_11.asm | 27 ++++++-----
mpn/x86_64/coreihwl/gcd_11.asm | 94 ------------------------------------------
mpn/x86_64/gcd_11.asm | 35 +++++++--------
5 files changed, 72 insertions(+), 161 deletions(-)
diffs (truncated from 305 to 300 lines):
diff -r bec010f10fff -r 643c931da9bd mpn/x86_64/bd2/gcd_11.asm
--- a/mpn/x86_64/bd2/gcd_11.asm Thu Aug 22 06:37:49 2019 +0200
+++ b/mpn/x86_64/bd2/gcd_11.asm Thu Aug 22 15:14:09 2019 +0200
@@ -39,13 +39,13 @@
C AMD K8,K9 -
C AMD K10 -
C AMD bd1 -
-C AMD bd2 3.27 *
+C AMD bd2 3.27 *
C AMD bd3 ?
C AMD bd4 3.79
C AMD bt1 -
-C AMD bt2 3.64 *
-C AMD zn1 3.25
-C AMD zn2 3.50
+C AMD bt2 3.64 *
+C AMD zn1 3.25 *
+C AMD zn2 3.25 *
C Intel P4 -
C Intel CNR -
C Intel PNR -
@@ -73,21 +73,24 @@
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
- mov v0, %rax C
- sub u0, v0 C
- jz L(end) C
+ mov v0, %rdx
+ sub u0, %rdx
+ jz L(end)
- ALIGN(16) C
-L(top): rep;bsf v0, %rcx C tzcnt!
- mov u0, %r9 C
- sub %rax, u0 C u - v
- cmovc v0, u0 C u = |u - v|
- cmovc %r9, %rax C v = min(u,v)
- shr R8(%rcx), u0 C
- mov %rax, v0 C
- sub u0, v0 C v - u
- jnz L(top) C
+ ALIGN(16)
+L(top): rep;bsf %rdx, %rcx C tzcnt!
+ mov u0, %rax
+ sub v0, u0 C u - v
+ cmovc %rdx, u0 C u = |u - v|
+ cmovc %rax, v0 C v = min(u,v)
+ shr R8(%rcx), u0
+ mov v0, %rdx
+ sub u0, %rdx C v - u
+ jnz L(top)
-L(end): FUNC_EXIT()
+L(end): mov v0, %rax
+ C rax = result
+ C rdx = 0 for the benefit of internal gcd_22 call
+ FUNC_EXIT()
ret
EPILOGUE()
diff -r bec010f10fff -r 643c931da9bd mpn/x86_64/bd4/gcd_11.asm
--- a/mpn/x86_64/bd4/gcd_11.asm Thu Aug 22 06:37:49 2019 +0200
+++ b/mpn/x86_64/bd4/gcd_11.asm Thu Aug 22 15:14:09 2019 +0200
@@ -41,11 +41,11 @@
C AMD bd1 -
C AMD bd2 -
C AMD bd3 -
-C AMD bd4 2.86 *
+C AMD bd4 4.0 *
C AMD bt1 -
C AMD bt2 -
-C AMD zn1 2.66 *
-C AMD zn2 3.48
+C AMD zn1 3.25 *
+C AMD zn2 3.50
C Intel P4 -
C Intel CNR -
C Intel PNR -
@@ -73,22 +73,24 @@
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
- mov v0, %rax C
- sub u0, v0 C
- jz L(end) C
- mov u0, %r9
+ mov u0, %rax
+ mov v0, %rdx
+ sub u0, %rdx C v - u
+ jz L(end)
- ALIGN(16) C
-L(top): rep;bsf v0, %rcx C
- sub %rax, u0 C u - v
- cmovc v0, u0 C u = |u - v|
- cmovc %r9, %rax C v = min(u,v)
- shrx( %rcx, u0, %r9) C
- shrx( %rcx, u0, u0) C
- mov %rax, v0 C
- sub u0, v0 C v - u
- jnz L(top) C
+ ALIGN(16)
+L(top): rep;bsf %rdx, %rcx C tzcnt!
+ sub v0, u0 C u - v
+ cmovc %rdx, u0 C u = |u - v|
+ cmovc %rax, v0 C v = min(u,v)
+ shrx( %rcx, u0, %rax)
+ shrx( %rcx, u0, u0)
+ mov v0, %rdx
+ sub %rax, %rdx C v - u
+ jnz L(top)
-L(end): FUNC_EXIT()
+L(end): C rax = result
+ C rdx = 0 for the benefit of internal gcd_22 call
+ FUNC_EXIT()
ret
EPILOGUE()
diff -r bec010f10fff -r 643c931da9bd mpn/x86_64/core2/gcd_11.asm
--- a/mpn/x86_64/core2/gcd_11.asm Thu Aug 22 06:37:49 2019 +0200
+++ b/mpn/x86_64/core2/gcd_11.asm Thu Aug 22 15:14:09 2019 +0200
@@ -73,20 +73,21 @@
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
- mov v0, %rax C
- jmp L(odd) C
+ jmp L(odd)
- ALIGN(16) C
-L(top): cmovc v0, u0 C u = |u - v|
- cmovc %r9, %rax C v = min(u,v)
- shr R8(%rcx), u0 C
- mov %rax, v0 C
-L(odd): sub u0, v0 C
- bsf v0, %rcx C
- mov u0, %r9 C
- sub %rax, u0 C
- jnz L(top) C
+ ALIGN(16)
+L(top): cmovc %rdx, u0 C u = |u - v|
+ cmovc %rax, v0 C v = min(u,v)
+ shr R8(%rcx), u0
+L(odd): mov v0, %rdx
+ sub u0, %rdx C v - u
+ bsf %rdx, %rcx
+ mov u0, %rax
+ sub v0, u0 C u - v
+ jnz L(top)
-L(end): FUNC_EXIT()
+L(end): C rax = result
+ C rdx = 0 for the benefit of internal gcd_22 call
+ FUNC_EXIT()
ret
EPILOGUE()
diff -r bec010f10fff -r 643c931da9bd mpn/x86_64/coreihwl/gcd_11.asm
--- a/mpn/x86_64/coreihwl/gcd_11.asm Thu Aug 22 06:37:49 2019 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-dnl AMD64 mpn_gcd_11 optimised for Intel HWL, BWL, SKL.
-
-dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn
-dnl Granlund.
-
-dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017, 2019 Free Software
-dnl Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/bit (approx)
-C AMD K8,K9 -
-C AMD K10 -
-C AMD bd1 -
-C AMD bd2 -
-C AMD bd3 -
-C AMD bd4 ?
-C AMD bt1 -
-C AMD bt2 -
-C AMD zn1 ?
-C AMD zn2 ?
-C Intel P4 -
-C Intel CNR -
-C Intel PNR -
-C Intel NHM -
-C Intel WSM -
-C Intel SBR -
-C Intel IBR -
-C Intel HWL 3.51 *
-C Intel BWL 3.29 *
-C Intel SKL 3.41 *
-C Intel atom -
-C Intel SLM -
-C Intel GLM -
-C Intel GLM+ -
-C VIA nano -
-
-define(`u0', `%rdi')
-define(`v0', `%rsi')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_gcd_11)
- FUNC_ENTRY(2)
- mov v0, %rax C
- sub u0, v0 C
- jz L(end) C
- mov u0, %r9 C
-
- ALIGN(16) C
-L(top): bsf v0, %rcx C
- sub %rax, u0 C u - v
- cmovc v0, u0 C u = |u - v|
- cmovc %r9, %rax C v = min(u,v)
- shrx( %rcx, u0, %r9) C
- shrx( %rcx, u0, u0) C
- mov %rax, v0 C
- sub u0, v0 C v - u
- jnz L(top) C
-
-L(end): FUNC_EXIT()
- ret
-EPILOGUE()
diff -r bec010f10fff -r 643c931da9bd mpn/x86_64/gcd_11.asm
--- a/mpn/x86_64/gcd_11.asm Thu Aug 22 06:37:49 2019 +0200
+++ b/mpn/x86_64/gcd_11.asm Thu Aug 22 15:14:09 2019 +0200
@@ -73,30 +73,29 @@
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
- mov u0, %rax
-
- LEA( ctz_table, %rdx)
+ LEA( ctz_table, %r8)
jmp L(ent)
- ALIGN(16) C K8
-L(top): cmovc %rcx, %rax C if x-y < 0 0
- cmovc %rdi, v0 C use x,y-x 0
-L(mid): and $MASK, R32(%rcx) C 0
- movzbl (%rdx,%rcx), R32(%rcx) C 1
- jz L(shift_alot) C 1
- shr R8(%rcx), %rax C 3
- mov %rax, %rdi C 4
-L(ent): mov v0, %rcx C 3
- sub %rax, %rcx C 4
- sub v0, %rax C 4
- jnz L(top) C
+ ALIGN(16)
+L(top): cmovc %rdx, u0 C u = |u - v|
+ cmovc %rax, v0 C v = min(u,v)
+L(mid): and $MASK, R32(%rdx)
+ movzbl (%r8,%rdx), R32(%rcx)
+ jz L(shift_alot)
+ shr R8(%rcx), u0
+L(ent): mov u0, %rax
+ mov v0, %rdx
+ sub u0, %rdx
+ sub v0, u0
+ jnz L(top)
-L(end): mov v0, %rax
+L(end): C rax = result
+ C rdx = 0 for the benefit of internal gcd_22 call
FUNC_EXIT()
ret
L(shift_alot):
- shr $MAXSHIFT, %rax
More information about the gmp-commit
mailing list