[Gmp-commit] /var/hg/gmp: 7 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Aug 3 18:36:59 UTC 2017


details:   /var/hg/gmp/rev/a5b182ee475a
changeset: 17483:a5b182ee475a
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Aug 03 19:22:50 2017 +0200
description:
Update c/l table.

details:   /var/hg/gmp/rev/8e40f1a95f27
changeset: 17484:8e40f1a95f27
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Aug 03 19:23:35 2017 +0200
description:
Purge unused m4 macro.

details:   /var/hg/gmp/rev/169af99bb181
changeset: 17485:169af99bb181
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Aug 03 19:24:56 2017 +0200
description:
Suppress no-op insn.

details:   /var/hg/gmp/rev/f88cebbbddb7
changeset: 17486:f88cebbbddb7
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Aug 03 19:25:43 2017 +0200
description:
Add a comment about performance.

details:   /var/hg/gmp/rev/cdeb747bdd5d
changeset: 17487:cdeb747bdd5d
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Aug 03 19:26:13 2017 +0200
description:
Expand c/l table.

details:   /var/hg/gmp/rev/46d1df872004
changeset: 17488:46d1df872004
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Aug 03 19:27:39 2017 +0200
description:
Purge unused m4 macro.

details:   /var/hg/gmp/rev/214bcc98bbab
changeset: 17489:214bcc98bbab
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Aug 03 19:28:18 2017 +0200
description:
Simplify post-loop addressing.

diffstat:

 mpn/x86_64/bd1/addmul_2.asm            |   5 ---
 mpn/x86_64/bd1/mul_2.asm               |  46 +++++++++++++++++-----------------
 mpn/x86_64/coreibwl/mul_basecase.asm   |  18 +++++++-----
 mpn/x86_64/coreihwl/mullo_basecase.asm |   4 --
 mpn/x86_64/coreisbr/aors_n.asm         |   2 +-
 mpn/x86_64/mulx/adx/addmul_1.asm       |  10 ++++++-
 mpn/x86_64/zen/mul_basecase.asm        |   1 +
 7 files changed, 44 insertions(+), 42 deletions(-)

diffs (182 lines):

diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/bd1/addmul_2.asm
--- a/mpn/x86_64/bd1/addmul_2.asm	Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/bd1/addmul_2.asm	Thu Aug 03 19:28:18 2017 +0200
@@ -55,11 +55,6 @@
 C The loop of this code is the result of running a code generation and
 C optimisation tool suite written by David Harvey and Torbjorn Granlund.
 
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-dowm code.
-define(`I',`$1')
-
-
 define(`rp',      `%rdi')   C rcx
 define(`up',      `%rsi')   C rdx
 define(`n_param', `%rdx')   C r8
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/bd1/mul_2.asm
--- a/mpn/x86_64/bd1/mul_2.asm	Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/bd1/mul_2.asm	Thu Aug 03 19:28:18 2017 +0200
@@ -33,29 +33,29 @@
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9      6.78
-C AMD K10        6.78
-C AMD bull       8.39    8.65
-C AMD pile       8.47
-C AMD steam
-C AMD excavator
-C AMD bobcat    12.1
-C AMD jaguar    11.5
-C Intel P4      24.0
-C Intel core2    8.14
-C Intel NHM      7.78
-C Intel SBR      6.34
-C Intel IBR      6.15
-C Intel HWL      6.04
-C Intel BWL      4.33
-C Intel SKL      4.41
-C Intel atom    39.5
-C Intel SLM     27.8
+C AMD K8,K9	 6.78
+C AMD K10	 6.78
+C AMD bd1	 8.39	 8.65
+C AMD bd2	 8.47
+C AMD bd3
+C AMD bd4
+C AMD zen
+C AMD bt1	12.1
+C AMD bt2	11.5
+C Intel P4	24.0
+C Intel PNR	 8.14
+C Intel NHM	 7.78
+C Intel SBR	 6.34
+C Intel IBR	 6.15
+C Intel HWL	 6.04
+C Intel BWL	 4.33
+C Intel SKL	 4.41
+C Intel atom	39.5
+C Intel SLM	27.8
 C VIA nano
 
 C The loop of this code is the result of running a code generation and
 C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-C Scheme: genxmul --mul
 
 define(`rp',      `%rdi')   C rcx
 define(`up',      `%rsi')   C rdx
@@ -179,13 +179,13 @@
 	add	$4, n
 	jnc	L(top)
 
-L(end):	mov	-8(up,n,8), %rax
+L(end):	mov	-8(up), %rax
 	mul	v1
-	mov	w2, -16(rp,n,8)
+	mov	w2, -16(rp)
 	add	%rax, w0
-	mov	w3, -8(rp,n,8)
+	mov	w3, -8(rp)
 	adc	%rdx, w1
-	mov	w0, (rp,n,8)
+	mov	w0, (rp)
 	mov	w1, %rax
 
 	pop	%rbp
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/coreibwl/mul_basecase.asm
--- a/mpn/x86_64/coreibwl/mul_basecase.asm	Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/coreibwl/mul_basecase.asm	Thu Aug 03 19:28:18 2017 +0200
@@ -33,19 +33,21 @@
 C cycles/limb	mul_1		addmul_1
 C AMD K8,K9	n/a		n/a
 C AMD K10	n/a		n/a
-C AMD bull	n/a		n/a
-C AMD pile	n/a		n/a
-C AMD steam	n/a		n/a
-C AMD excavator	 ?		 ?
-C AMD bobcat	n/a		n/a
-C AMD jaguar	n/a		n/a
+C AMD bd1	n/a		n/a
+C AMD bd2	n/a		n/a
+C AMD bd3	n/a		n/a
+C AMD bd4	 ?		 ?
+C AMD zen	 ?		 ?
+C AMD bt1	n/a		n/a
+C AMD bt2	n/a		n/a
 C Intel P4	n/a		n/a
-C Intel core2	n/a		n/a
+C Intel PNR	n/a		n/a
 C Intel NHM	n/a		n/a
 C Intel SBR	n/a		n/a
 C Intel IBR	n/a		n/a
 C Intel HWL	 1.68		n/a
-C Intel BWL	 1.69	      1.8-1.9
+C Intel BWL	 1.51	      1.67-1.74
+C Intel SKL	 1.52	      1.63-1.71
 C Intel atom	n/a		n/a
 C Intel SLM	n/a		n/a
 C VIA nano	n/a		n/a
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/coreihwl/mullo_basecase.asm
--- a/mpn/x86_64/coreihwl/mullo_basecase.asm	Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/coreihwl/mullo_basecase.asm	Thu Aug 03 19:28:18 2017 +0200
@@ -57,10 +57,6 @@
 C   * Implement proper cor2, replacing current cor0.
 C   * Micro-optimise.
 
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
 define(`rp',       `%rdi')
 define(`up',       `%rsi')
 define(`vp_param', `%rdx')
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/coreisbr/aors_n.asm
--- a/mpn/x86_64/coreisbr/aors_n.asm	Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/coreisbr/aors_n.asm	Thu Aug 03 19:28:18 2017 +0200
@@ -126,7 +126,7 @@
 	ADCSBB	16(vp), %r8
 	ADCSBB	24(vp), %r9
 	lea	16(vp), vp
-	lea	(rp), rp
+C	lea	(rp), rp
 	jmp	L(lo2)
 
 L(e2):	mov	%r10, (rp)
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/mulx/adx/addmul_1.asm
--- a/mpn/x86_64/mulx/adx/addmul_1.asm	Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/mulx/adx/addmul_1.asm	Thu Aug 03 19:28:18 2017 +0200
@@ -36,14 +36,22 @@
 C AMD K8,K9	 -
 C AMD K10	 -
 C AMD bd1	 -
-C AMD bobcat	 -
+C AMD bd2	 -
+C AMD bd3	 -
+C AMD bd4	 -
+C AMD zen	 ?
+C AMD bt1	 -
+C AMD bt2	 -
 C Intel P4	 -
 C Intel PNR	 -
 C Intel NHM	 -
 C Intel SBR	 -
+C Intel IBR	 -
 C Intel HWL	 -
 C Intel BWL	 ?
+C Intel SKL	 ?
 C Intel atom	 -
+C Intel SLM	 -
 C VIA nano	 -
 
 define(`rp',      `%rdi')	dnl rcx
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/zen/mul_basecase.asm
--- a/mpn/x86_64/zen/mul_basecase.asm	Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/zen/mul_basecase.asm	Thu Aug 03 19:28:18 2017 +0200
@@ -41,6 +41,7 @@
 C    mul_1 could osp into addmul_1.
 C  * Let vn_param be vn to save a copy.
 C  * Re-allocate to benefit more from 32-bit encoding.
+C  * Poor performance for e.g. n = 12,16.
 
 define(`rp',       `%rdi')
 define(`up',       `%rsi')


More information about the gmp-commit mailing list