[Gmp-commit] /var/hg/gmp: 7 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Aug 3 18:36:59 UTC 2017
details: /var/hg/gmp/rev/a5b182ee475a
changeset: 17483:a5b182ee475a
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 03 19:22:50 2017 +0200
description:
Update c/l table.
details: /var/hg/gmp/rev/8e40f1a95f27
changeset: 17484:8e40f1a95f27
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 03 19:23:35 2017 +0200
description:
Purge unused m4 macro.
details: /var/hg/gmp/rev/169af99bb181
changeset: 17485:169af99bb181
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 03 19:24:56 2017 +0200
description:
Suppress no-op insn.
details: /var/hg/gmp/rev/f88cebbbddb7
changeset: 17486:f88cebbbddb7
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 03 19:25:43 2017 +0200
description:
Add a comment about performance.
details: /var/hg/gmp/rev/cdeb747bdd5d
changeset: 17487:cdeb747bdd5d
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 03 19:26:13 2017 +0200
description:
Expand c/l table.
details: /var/hg/gmp/rev/46d1df872004
changeset: 17488:46d1df872004
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 03 19:27:39 2017 +0200
description:
Purge unused m4 macro.
details: /var/hg/gmp/rev/214bcc98bbab
changeset: 17489:214bcc98bbab
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Aug 03 19:28:18 2017 +0200
description:
Simplify post-loop addressing.
diffstat:
mpn/x86_64/bd1/addmul_2.asm | 5 ---
mpn/x86_64/bd1/mul_2.asm | 46 +++++++++++++++++-----------------
mpn/x86_64/coreibwl/mul_basecase.asm | 18 +++++++-----
mpn/x86_64/coreihwl/mullo_basecase.asm | 4 --
mpn/x86_64/coreisbr/aors_n.asm | 2 +-
mpn/x86_64/mulx/adx/addmul_1.asm | 10 ++++++-
mpn/x86_64/zen/mul_basecase.asm | 1 +
7 files changed, 44 insertions(+), 42 deletions(-)
diffs (182 lines):
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/bd1/addmul_2.asm
--- a/mpn/x86_64/bd1/addmul_2.asm Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/bd1/addmul_2.asm Thu Aug 03 19:28:18 2017 +0200
@@ -55,11 +55,6 @@
C The loop of this code is the result of running a code generation and
C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-dowm code.
-define(`I',`$1')
-
-
define(`rp', `%rdi') C rcx
define(`up', `%rsi') C rdx
define(`n_param', `%rdx') C r8
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/bd1/mul_2.asm
--- a/mpn/x86_64/bd1/mul_2.asm Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/bd1/mul_2.asm Thu Aug 03 19:28:18 2017 +0200
@@ -33,29 +33,29 @@
include(`../config.m4')
C cycles/limb
-C AMD K8,K9 6.78
-C AMD K10 6.78
-C AMD bull 8.39 8.65
-C AMD pile 8.47
-C AMD steam
-C AMD excavator
-C AMD bobcat 12.1
-C AMD jaguar 11.5
-C Intel P4 24.0
-C Intel core2 8.14
-C Intel NHM 7.78
-C Intel SBR 6.34
-C Intel IBR 6.15
-C Intel HWL 6.04
-C Intel BWL 4.33
-C Intel SKL 4.41
-C Intel atom 39.5
-C Intel SLM 27.8
+C AMD K8,K9 6.78
+C AMD K10 6.78
+C AMD bd1 8.39 8.65
+C AMD bd2 8.47
+C AMD bd3
+C AMD bd4
+C AMD zen
+C AMD bt1 12.1
+C AMD bt2 11.5
+C Intel P4 24.0
+C Intel PNR 8.14
+C Intel NHM 7.78
+C Intel SBR 6.34
+C Intel IBR 6.15
+C Intel HWL 6.04
+C Intel BWL 4.33
+C Intel SKL 4.41
+C Intel atom 39.5
+C Intel SLM 27.8
C VIA nano
C The loop of this code is the result of running a code generation and
C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-C Scheme: genxmul --mul
define(`rp', `%rdi') C rcx
define(`up', `%rsi') C rdx
@@ -179,13 +179,13 @@
add $4, n
jnc L(top)
-L(end): mov -8(up,n,8), %rax
+L(end): mov -8(up), %rax
mul v1
- mov w2, -16(rp,n,8)
+ mov w2, -16(rp)
add %rax, w0
- mov w3, -8(rp,n,8)
+ mov w3, -8(rp)
adc %rdx, w1
- mov w0, (rp,n,8)
+ mov w0, (rp)
mov w1, %rax
pop %rbp
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/coreibwl/mul_basecase.asm
--- a/mpn/x86_64/coreibwl/mul_basecase.asm Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/coreibwl/mul_basecase.asm Thu Aug 03 19:28:18 2017 +0200
@@ -33,19 +33,21 @@
C cycles/limb mul_1 addmul_1
C AMD K8,K9 n/a n/a
C AMD K10 n/a n/a
-C AMD bull n/a n/a
-C AMD pile n/a n/a
-C AMD steam n/a n/a
-C AMD excavator ? ?
-C AMD bobcat n/a n/a
-C AMD jaguar n/a n/a
+C AMD bd1 n/a n/a
+C AMD bd2 n/a n/a
+C AMD bd3 n/a n/a
+C AMD bd4 ? ?
+C AMD zen ? ?
+C AMD bt1 n/a n/a
+C AMD bt2 n/a n/a
C Intel P4 n/a n/a
-C Intel core2 n/a n/a
+C Intel PNR n/a n/a
C Intel NHM n/a n/a
C Intel SBR n/a n/a
C Intel IBR n/a n/a
C Intel HWL 1.68 n/a
-C Intel BWL 1.69 1.8-1.9
+C Intel BWL 1.51 1.67-1.74
+C Intel SKL 1.52 1.63-1.71
C Intel atom n/a n/a
C Intel SLM n/a n/a
C VIA nano n/a n/a
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/coreihwl/mullo_basecase.asm
--- a/mpn/x86_64/coreihwl/mullo_basecase.asm Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/coreihwl/mullo_basecase.asm Thu Aug 03 19:28:18 2017 +0200
@@ -57,10 +57,6 @@
C * Implement proper cor2, replacing current cor0.
C * Micro-optimise.
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
define(`rp', `%rdi')
define(`up', `%rsi')
define(`vp_param', `%rdx')
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/coreisbr/aors_n.asm
--- a/mpn/x86_64/coreisbr/aors_n.asm Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/coreisbr/aors_n.asm Thu Aug 03 19:28:18 2017 +0200
@@ -126,7 +126,7 @@
ADCSBB 16(vp), %r8
ADCSBB 24(vp), %r9
lea 16(vp), vp
- lea (rp), rp
+C lea (rp), rp
jmp L(lo2)
L(e2): mov %r10, (rp)
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/mulx/adx/addmul_1.asm
--- a/mpn/x86_64/mulx/adx/addmul_1.asm Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/mulx/adx/addmul_1.asm Thu Aug 03 19:28:18 2017 +0200
@@ -36,14 +36,22 @@
C AMD K8,K9 -
C AMD K10 -
C AMD bd1 -
-C AMD bobcat -
+C AMD bd2 -
+C AMD bd3 -
+C AMD bd4 -
+C AMD zen ?
+C AMD bt1 -
+C AMD bt2 -
C Intel P4 -
C Intel PNR -
C Intel NHM -
C Intel SBR -
+C Intel IBR -
C Intel HWL -
C Intel BWL ?
+C Intel SKL ?
C Intel atom -
+C Intel SLM -
C VIA nano -
define(`rp', `%rdi') dnl rcx
diff -r 82a9fff89363 -r 214bcc98bbab mpn/x86_64/zen/mul_basecase.asm
--- a/mpn/x86_64/zen/mul_basecase.asm Mon Jul 24 22:20:16 2017 +0200
+++ b/mpn/x86_64/zen/mul_basecase.asm Thu Aug 03 19:28:18 2017 +0200
@@ -41,6 +41,7 @@
C mul_1 could osp into addmul_1.
C * Let vn_param be vn to save a copy.
C * Re-allocate to benefit more from 32-bit encoding.
+C * Poor performance for e.g. n = 12,16.
define(`rp', `%rdi')
define(`up', `%rsi')
More information about the gmp-commit
mailing list