[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Aug 30 21:55:32 UTC 2019


details:   /var/hg/gmp/rev/3642098e8981
changeset: 17851:3642098e8981
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Aug 30 23:29:44 2019 +0200
description:
Optimise, now runs well on more CPUs.

details:   /var/hg/gmp/rev/53c92b317b4a
changeset: 17852:53c92b317b4a
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Aug 30 23:39:12 2019 +0200
description:
Fix typo in FUNC_ENTRY (currently unused). Avoid a register copy before return.

details:   /var/hg/gmp/rev/a16b74afe7a5
changeset: 17853:a16b74afe7a5
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Aug 30 23:43:01 2019 +0200
description:
Use coreihwl instead of bd2 gcd_22.

details:   /var/hg/gmp/rev/1473936be63e
changeset: 17854:1473936be63e
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Aug 30 23:47:22 2019 +0200
description:
New grabber file.

details:   /var/hg/gmp/rev/a046d4e54870
changeset: 17855:a046d4e54870
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Aug 30 23:48:58 2019 +0200
description:
Fix typo in FUNC_ENTRY (currently unused). Avoid a register copy before return.

details:   /var/hg/gmp/rev/01aceb2081ea
changeset: 17856:01aceb2081ea
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Aug 30 23:53:19 2019 +0200
description:
Amend c/l table.

diffstat:

 mpn/x86_64/bd2/gcd_11.asm      |  42 +++++++++++++++++++++---------------------
 mpn/x86_64/bd2/gcd_22.asm      |   4 ++--
 mpn/x86_64/bd4/gcd_11.asm      |   6 +++---
 mpn/x86_64/bd4/gcd_22.asm      |  37 +++++++++++++++++++++++++++++++++++++
 mpn/x86_64/core2/gcd_22.asm    |   4 ++--
 mpn/x86_64/coreihwl/gcd_22.asm |  39 ++++++++++++++++++---------------------
 mpn/x86_64/gcd_22.asm          |   4 ++--
 mpn/x86_64/k10/gcd_22.asm      |   4 ++--
 mpn/x86_64/zen/gcd_22.asm      |   2 +-
 9 files changed, 88 insertions(+), 54 deletions(-)

diffs (truncated from 316 to 300 lines):

diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd2/gcd_11.asm
--- a/mpn/x86_64/bd2/gcd_11.asm	Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/bd2/gcd_11.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -36,31 +36,31 @@
 
 
 C	     cycles/bit (approx)
-C AMD K8,K9	 -
-C AMD K10	 -
-C AMD bd1	 -
-C AMD bd2	 3.27  *
+C AMD K8,K9	 ?
+C AMD K10	 ?
+C AMD bd1	 5.4
+C AMD bd2	 3.72
 C AMD bd3	 ?
-C AMD bd4	 3.79
-C AMD bt1	 -
-C AMD bt2	 3.64  *
-C AMD zn1	 3.25  *
-C AMD zn2	 3.25  *
-C Intel P4	 -
-C Intel CNR	 -
-C Intel PNR	 -
-C Intel NHM	 -
-C Intel WSM	 -
-C Intel SBR	 -
-C Intel IBR	 -
+C AMD bd4	 4.12
+C AMD bt1	 9.0
+C AMD bt2	 3.97
+C AMD zn1	 3.36
+C AMD zn2	 3.33
+C Intel P4	 ?
+C Intel CNR	 ?
+C Intel PNR	 ?
+C Intel NHM	 ?
+C Intel WSM	 ?
+C Intel SBR	 ?
+C Intel IBR	 ?
 C Intel HWL	 ?
 C Intel BWL	 ?
 C Intel SKL	 ?
-C Intel atom	 -
-C Intel SLM	 -
-C Intel GLM	 -
-C Intel GLM+	 -
-C VIA nano	 -
+C Intel atom	 ?
+C Intel SLM	 ?
+C Intel GLM	 ?
+C Intel GLM+	 ?
+C VIA nano	 ?
 
 define(`u0',    `%rdi')
 define(`v0',    `%rsi')
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd2/gcd_22.asm
--- a/mpn/x86_64/bd2/gcd_22.asm	Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/bd2/gcd_22.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -79,7 +79,7 @@
 	TEXT
 	ALIGN(64)
 PROLOGUE(mpn_gcd_22)
-	FUNC_ENTRY(2)
+	FUNC_ENTRY(4)
 	mov	v0_param, v0
 
 	ALIGN(16)
@@ -116,7 +116,7 @@
 
 L(gcd_11):
 	mov	v0, %rdi
-	mov	u0, %rsi
+C	mov	u0, %rsi
 	TCALL(	mpn_gcd_11)
 
 L(lowz):C We come here when v0 - u0 = 0
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd4/gcd_11.asm
--- a/mpn/x86_64/bd4/gcd_11.asm	Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/bd4/gcd_11.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -41,11 +41,11 @@
 C AMD bd1	 -
 C AMD bd2	 -
 C AMD bd3	 -
-C AMD bd4	 4.0   *
+C AMD bd4	 3.73
 C AMD bt1	 -
 C AMD bt2	 -
-C AMD zn1	 3.25  *
-C AMD zn2	 3.50
+C AMD zn1	 3.33
+C AMD zn2	 3.48
 C Intel P4	 -
 C Intel CNR	 -
 C Intel PNR	 -
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/bd4/gcd_22.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bd4/gcd_22.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -0,0 +1,37 @@
+dnl  AMD64 mpn_gcd_22.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl ABI_SUPPORT(DOS64)	C returns mp_double_limb_t in memory
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_22)
+include_mpn(`x86_64/coreihwl/gcd_22.asm')
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/core2/gcd_22.asm
--- a/mpn/x86_64/core2/gcd_22.asm	Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/core2/gcd_22.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -79,7 +79,7 @@
 	TEXT
 	ALIGN(64)
 PROLOGUE(mpn_gcd_22)
-	FUNC_ENTRY(2)
+	FUNC_ENTRY(4)
 	mov	v0_param, v0
 
 	ALIGN(16)
@@ -111,7 +111,7 @@
 
 L(gcd_11):
 	mov	v0, %rdi
-	mov	u0, %rsi
+C	mov	u0, %rsi
 	TCALL(	mpn_gcd_11)
 
 L(lowz):C We come here when v0 - u0 = 0
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/coreihwl/gcd_22.asm
--- a/mpn/x86_64/coreihwl/gcd_22.asm	Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/coreihwl/gcd_22.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -37,11 +37,11 @@
 C AMD bd1	 -
 C AMD bd2	 -
 C AMD bd3	 -
-C AMD bd4	 6.8
+C AMD bd4	 6.7
 C AMD bt1	 -
 C AMD bt2	 -
-C AMD zn1	 5.7
-C AMD zn2	 5.9
+C AMD zn1	 5.4
+C AMD zn2	 5.5
 C Intel P4	 -
 C Intel CNR	 -
 C Intel PNR	 -
@@ -50,8 +50,8 @@
 C Intel SBR	 -
 C Intel IBR	 -
 C Intel HWL	 7.1
-C Intel BWL	 6.0
-C Intel SKL	 6.3
+C Intel BWL	 5.5
+C Intel SKL	 5.6
 C Intel atom	 -
 C Intel SLM	 -
 C Intel GLM	 -
@@ -62,15 +62,13 @@
 define(`u1',    `%rdi')
 define(`u0',    `%rsi')
 define(`v1',    `%rdx')
-define(`v0_param', `%rcx')
-
-define(`v0',    `%rax')
-define(`cnt',   `%rcx')
+define(`v0',    `%rcx')
 
 define(`s0',    `%r8')
 define(`s1',    `%r9')
 define(`t0',    `%r10')
 define(`t1',    `%r11')
+define(`cnt',   `%rax')
 
 dnl ABI_SUPPORT(DOS64)	C returns mp_double_limb_t in memory
 ABI_SUPPORT(STD64)
@@ -79,8 +77,7 @@
 	TEXT
 	ALIGN(64)
 PROLOGUE(mpn_gcd_22)
-	FUNC_ENTRY(2)
-	mov	v0_param, v0
+	FUNC_ENTRY(4)
 
 	ALIGN(16)
 L(top):	mov	v0, t0
@@ -89,12 +86,11 @@
 	mov	v1, t1
 	sbb	u1, t1
 
-	mov	u0, s0
-	mov	u1, s1
-
 	rep;bsf	t0, cnt		C tzcnt!
 
+	mov	u0, s0
 	sub	v0, u0
+	mov	u1, s1
 	sbb	v1, u1
 
 L(bck):	cmovc	t0, u0		C u = |u - v|
@@ -104,18 +100,19 @@
 
 	xor	R32(t0), R32(t0)
 	sub	cnt, t0
+	shlx(	t0, u1, s1)
 	shrx(	cnt, u0, u0)
-	shlx(	t0, u1, t0)
-	or	t0, u0
-	shr	R8(cnt), u1
-	jnz	L(top)
+	shrx(	cnt, u1, u1)
+	or	s1, u0
 
 	test	v1, v1
 	jnz	L(top)
+	test	u1, u1
+	jnz	L(top)
 
 L(gcd_11):
 	mov	v0, %rdi
-	mov	u0, %rsi
+C	mov	u0, %rsi
 	TCALL(	mpn_gcd_11)
 
 L(lowz):C We come here when v0 - u0 = 0
@@ -134,8 +131,8 @@
 	sub	v1, u0
 	jmp	L(bck)
 
-L(end):	C mov	v0, %rax
+L(end):	mov	v0, %rax
 	C mov	v1, %rdx
-	FUNC_EXIT()
+L(ret):	FUNC_EXIT()
 	ret
 EPILOGUE()
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/gcd_22.asm
--- a/mpn/x86_64/gcd_22.asm	Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/gcd_22.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -93,7 +93,7 @@
 	TEXT
 	ALIGN(64)
 PROLOGUE(mpn_gcd_22)
-	FUNC_ENTRY(2)
+	FUNC_ENTRY(4)
 	mov	v0_param, v0
 
 	LEA(	ctz_table, %r10)
@@ -134,7 +134,7 @@
 
 L(gcd_11):
 	mov	v0, %rdi
-	mov	u0, %rsi
+C	mov	u0, %rsi
 	TCALL(	mpn_gcd_11)
 
 L(count_better):
diff -r 647ce78081c3 -r 01aceb2081ea mpn/x86_64/k10/gcd_22.asm
--- a/mpn/x86_64/k10/gcd_22.asm	Thu Aug 29 16:59:41 2019 +0200
+++ b/mpn/x86_64/k10/gcd_22.asm	Fri Aug 30 23:53:19 2019 +0200
@@ -79,7 +79,7 @@
 	TEXT
 	ALIGN(64)
 PROLOGUE(mpn_gcd_22)
-	FUNC_ENTRY(2)
+	FUNC_ENTRY(4)
 	mov	v0_param, v0
 
 	ALIGN(16)
@@ -116,7 +116,7 @@
 


More information about the gmp-commit mailing list