[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Aug 30 21:55:32 UTC 2019
details: /var/hg/gmp/rev/3642098e8981
changeset: 17851:3642098e8981
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Aug 30 23:29:44 2019 +0200
description:
Optimise, now runs well on more CPUs.
details: /var/hg/gmp/rev/53c92b317b4a
changeset: 17852:53c92b317b4a
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Aug 30 23:39:12 2019 +0200
description:
Fix typo in FUNC_ENTRY (currently unused). Avoid a register copy before return.
details: /var/hg/gmp/rev/a16b74afe7a5
changeset: 17853:a16b74afe7a5
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Aug 30 23:43:01 2019 +0200
description:
Use coreihwl instead of bd2 gcd_22.
details: /var/hg/gmp/rev/1473936be63e
changeset: 17854:1473936be63e
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Aug 30 23:47:22 2019 +0200
description:
New grabber file.
details: /var/hg/gmp/rev/a046d4e54870
changeset: 17855:a046d4e54870
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Aug 30 23:48:58 2019 +0200
description:
Fix typo in FUNC_ENTRY (currently unused). Avoid a register copy before return.
details: /var/hg/gmp/rev/01aceb2081ea
changeset: 17856:01aceb2081ea
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Aug 30 23:53:19 2019 +0200
description:
Amend c/l table.
diffstat:
mpn/x86_64/bd2/gcd_11.asm | 42 +++++++++++++++++++++---------------------
mpn/x86_64/bd2/gcd_22.asm | 4 ++--
mpn/x86_64/bd4/gcd_11.asm | 6 +++---
mpn/x86_64/bd4/gcd_22.asm | 37 +++++++++++++++++++++++++++++++++++++
mpn/x86_64/core2/gcd_22.asm | 4 ++--
mpn/x86_64/coreihwl/gcd_22.asm | 39 ++++++++++++++++++---------------------
mpn/x86_64/gcd_22.asm | 4 ++--
mpn/x86_64/k10/gcd_22.asm | 4 ++--
mpn/x86_64/zen/gcd_22.asm | 2 +-
9 files changed, 88 insertions(+), 54 deletions(-)
diffs (truncated from 316 to 300 lines):
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd2/gcd_11.asm
--- a/mpn/x86_64/bd2/gcd_11.asm Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/bd2/gcd_11.asm Fri Aug 30 23:53:19 2019 +0200
@@ -36,31 +36,31 @@
C cycles/bit (approx)
-C AMD K8,K9 -
-C AMD K10 -
-C AMD bd1 -
-C AMD bd2 3.27 *
+C AMD K8,K9 ?
+C AMD K10 ?
+C AMD bd1 5.4
+C AMD bd2 3.72
C AMD bd3 ?
-C AMD bd4 3.79
-C AMD bt1 -
-C AMD bt2 3.64 *
-C AMD zn1 3.25 *
-C AMD zn2 3.25 *
-C Intel P4 -
-C Intel CNR -
-C Intel PNR -
-C Intel NHM -
-C Intel WSM -
-C Intel SBR -
-C Intel IBR -
+C AMD bd4 4.12
+C AMD bt1 9.0
+C AMD bt2 3.97
+C AMD zn1 3.36
+C AMD zn2 3.33
+C Intel P4 ?
+C Intel CNR ?
+C Intel PNR ?
+C Intel NHM ?
+C Intel WSM ?
+C Intel SBR ?
+C Intel IBR ?
C Intel HWL ?
C Intel BWL ?
C Intel SKL ?
-C Intel atom -
-C Intel SLM -
-C Intel GLM -
-C Intel GLM+ -
-C VIA nano -
+C Intel atom ?
+C Intel SLM ?
+C Intel GLM ?
+C Intel GLM+ ?
+C VIA nano ?
define(`u0', `%rdi')
define(`v0', `%rsi')
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd2/gcd_22.asm
--- a/mpn/x86_64/bd2/gcd_22.asm Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/bd2/gcd_22.asm Fri Aug 30 23:53:19 2019 +0200
@@ -79,7 +79,7 @@
TEXT
ALIGN(64)
PROLOGUE(mpn_gcd_22)
- FUNC_ENTRY(2)
+ FUNC_ENTRY(4)
mov v0_param, v0
ALIGN(16)
@@ -116,7 +116,7 @@
L(gcd_11):
mov v0, %rdi
- mov u0, %rsi
+C mov u0, %rsi
TCALL( mpn_gcd_11)
L(lowz):C We come here when v0 - u0 = 0
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd4/gcd_11.asm
--- a/mpn/x86_64/bd4/gcd_11.asm Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/bd4/gcd_11.asm Fri Aug 30 23:53:19 2019 +0200
@@ -41,11 +41,11 @@
C AMD bd1 -
C AMD bd2 -
C AMD bd3 -
-C AMD bd4 4.0 *
+C AMD bd4 3.73
C AMD bt1 -
C AMD bt2 -
-C AMD zn1 3.25 *
-C AMD zn2 3.50
+C AMD zn1 3.33
+C AMD zn2 3.48
C Intel P4 -
C Intel CNR -
C Intel PNR -
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd4/gcd_22.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bd4/gcd_22.asm Fri Aug 30 23:53:19 2019 +0200
@@ -0,0 +1,37 @@
+dnl AMD64 mpn_gcd_22.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl ABI_SUPPORT(DOS64) C returns mp_double_limb_t in memory
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_22)
+include_mpn(`x86_64/coreihwl/gcd_22.asm')
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/core2/gcd_22.asm
--- a/mpn/x86_64/core2/gcd_22.asm Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/core2/gcd_22.asm Fri Aug 30 23:53:19 2019 +0200
@@ -79,7 +79,7 @@
TEXT
ALIGN(64)
PROLOGUE(mpn_gcd_22)
- FUNC_ENTRY(2)
+ FUNC_ENTRY(4)
mov v0_param, v0
ALIGN(16)
@@ -111,7 +111,7 @@
L(gcd_11):
mov v0, %rdi
- mov u0, %rsi
+C mov u0, %rsi
TCALL( mpn_gcd_11)
L(lowz):C We come here when v0 - u0 = 0
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/coreihwl/gcd_22.asm
--- a/mpn/x86_64/coreihwl/gcd_22.asm Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/coreihwl/gcd_22.asm Fri Aug 30 23:53:19 2019 +0200
@@ -37,11 +37,11 @@
C AMD bd1 -
C AMD bd2 -
C AMD bd3 -
-C AMD bd4 6.8
+C AMD bd4 6.7
C AMD bt1 -
C AMD bt2 -
-C AMD zn1 5.7
-C AMD zn2 5.9
+C AMD zn1 5.4
+C AMD zn2 5.5
C Intel P4 -
C Intel CNR -
C Intel PNR -
@@ -50,8 +50,8 @@
C Intel SBR -
C Intel IBR -
C Intel HWL 7.1
-C Intel BWL 6.0
-C Intel SKL 6.3
+C Intel BWL 5.5
+C Intel SKL 5.6
C Intel atom -
C Intel SLM -
C Intel GLM -
@@ -62,15 +62,13 @@
define(`u1', `%rdi')
define(`u0', `%rsi')
define(`v1', `%rdx')
-define(`v0_param', `%rcx')
-
-define(`v0', `%rax')
-define(`cnt', `%rcx')
+define(`v0', `%rcx')
define(`s0', `%r8')
define(`s1', `%r9')
define(`t0', `%r10')
define(`t1', `%r11')
+define(`cnt', `%rax')
dnl ABI_SUPPORT(DOS64) C returns mp_double_limb_t in memory
ABI_SUPPORT(STD64)
@@ -79,8 +77,7 @@
TEXT
ALIGN(64)
PROLOGUE(mpn_gcd_22)
- FUNC_ENTRY(2)
- mov v0_param, v0
+ FUNC_ENTRY(4)
ALIGN(16)
L(top): mov v0, t0
@@ -89,12 +86,11 @@
mov v1, t1
sbb u1, t1
- mov u0, s0
- mov u1, s1
-
rep;bsf t0, cnt C tzcnt!
+ mov u0, s0
sub v0, u0
+ mov u1, s1
sbb v1, u1
L(bck): cmovc t0, u0 C u = |u - v|
@@ -104,18 +100,19 @@
xor R32(t0), R32(t0)
sub cnt, t0
+ shlx( t0, u1, s1)
shrx( cnt, u0, u0)
- shlx( t0, u1, t0)
- or t0, u0
- shr R8(cnt), u1
- jnz L(top)
+ shrx( cnt, u1, u1)
+ or s1, u0
test v1, v1
jnz L(top)
+ test u1, u1
+ jnz L(top)
L(gcd_11):
mov v0, %rdi
- mov u0, %rsi
+C mov u0, %rsi
TCALL( mpn_gcd_11)
L(lowz):C We come here when v0 - u0 = 0
@@ -134,8 +131,8 @@
sub v1, u0
jmp L(bck)
-L(end): C mov v0, %rax
+L(end): mov v0, %rax
C mov v1, %rdx
- FUNC_EXIT()
+L(ret): FUNC_EXIT()
ret
EPILOGUE()
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/gcd_22.asm
--- a/mpn/x86_64/gcd_22.asm Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/gcd_22.asm Fri Aug 30 23:53:19 2019 +0200
@@ -93,7 +93,7 @@
TEXT
ALIGN(64)
PROLOGUE(mpn_gcd_22)
- FUNC_ENTRY(2)
+ FUNC_ENTRY(4)
mov v0_param, v0
LEA( ctz_table, %r10)
@@ -134,7 +134,7 @@
L(gcd_11):
mov v0, %rdi
- mov u0, %rsi
+C mov u0, %rsi
TCALL( mpn_gcd_11)
L(count_better):
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/k10/gcd_22.asm
--- a/mpn/x86_64/k10/gcd_22.asm Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/k10/gcd_22.asm Fri Aug 30 23:53:19 2019 +0200
@@ -79,7 +79,7 @@
TEXT
ALIGN(64)
PROLOGUE(mpn_gcd_22)
- FUNC_ENTRY(2)
+ FUNC_ENTRY(4)
mov v0_param, v0
ALIGN(16)
@@ -116,7 +116,7 @@
More information about the gmp-commit
mailing list