[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Sep 5 21:07:54 UTC 2019
details: /var/hg/gmp/rev/8f9104e94e86
changeset: 17868:8f9104e94e86
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Sep 05 22:47:48 2019 +0200
description:
Rewrite to make better use of Arm conditional execution.
details: /var/hg/gmp/rev/77ce32b8bbeb
changeset: 17869:77ce32b8bbeb
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Sep 05 22:53:04 2019 +0200
description:
Rewrite to make better use of Arm conditional execution.
details: /var/hg/gmp/rev/92765e025e35
changeset: 17870:92765e025e35
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Sep 05 22:56:58 2019 +0200
description:
ChangeLog
details: /var/hg/gmp/rev/46301888618d
changeset: 17871:46301888618d
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Sep 05 23:01:32 2019 +0200
description:
Align function start to 64-byte boundary.
diffstat:
ChangeLog | 66 +++++++++++++++++++++++++++++++++++++++++++++
mpn/arm/v6t2/gcd_22.asm | 64 +++++++++++++++++++------------------------
mpn/arm64/gcd_22.asm | 33 ++++++++++------------
mpn/x86_64/bd2/gcd_11.asm | 2 +-
mpn/x86_64/bd4/gcd_11.asm | 2 +-
mpn/x86_64/core2/gcd_11.asm | 2 +-
mpn/x86_64/gcd_11.asm | 41 +++++++++++++++++++--------
7 files changed, 140 insertions(+), 70 deletions(-)
diffs (truncated from 357 to 300 lines):
diff -r 8c0120dc67b0 -r 46301888618d ChangeLog
--- a/ChangeLog Thu Sep 05 21:25:39 2019 +0200
+++ b/ChangeLog Thu Sep 05 23:01:32 2019 +0200
@@ -1,3 +1,9 @@
+2019-09-05 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm64/gcd_22.asm: Rewrite to make better use of Arm conditional
+ execution.
+ * mpn/arm32/gcd_22.asm: Likewise.
+
2019-09-05 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/hgcd2.c (div1): Return both r and q as a
@@ -27,6 +33,66 @@
* tune/common.c (speed_mpn_hgcd2): New function.
* tune/speed.c (routine): Add mpn_hgcd2.
+2019-09-04 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm/v6t2/gcd_22.asm: New file.
+ * mpn/arm64/gcd_22.asm: New file.
+ * mpn/ia64/gcd_11.asm: New file.
+
+2019-09-01 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/bt1/gcd_11.asm: Replace grabber with bt1 optimised code.
+
+2019-08-30 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/bd4/gcd_22.asm: New grabber file.
+
+ * mpn/x86_64/zen/gcd_22.asm: Use coreihwl instead of bd2 gcd_22.
+
+ * mpn/x86_64/bd2/gcd_22.asm: Fix typo in FUNC_ENTRY (currently unused).
+ Avoid a register copy before return.
+ * mpn/x86_64/core2/gcd_22.asm: Likewise.
+ * mpn/x86_64/k10/gcd_22.asm: Likewise.
+ * mpn/x86_64/gcd_22.asm: Likewise.
+
+ * mpn/x86_64/coreihwl/gcd_22.asm: Optimise, now runs well on more CPUs.
+
+ * mpn/x86_64/gcd_11.asm: Remove PROTECT from symbols as they are
+ actually local.
+ * mpn/x86_64/gcd_22.asm: Likewise.
+
+2019-08-25 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/bd2/gcd_22.asm: Repeat tzcnt for exceptional lowz case.
+ Remove dead code.
+
+ * mpn/powerpc64/mode64/p7/gcd_22.asm: Make logic for determining ABI
+ wrt struct return more robust.
+ * mpn/powerpc64/mode64/p9/gcd_22.asm: Likewise.
+
+2019-08-24 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/bt1/gcd_11.asm: New grabber.
+ * mpn/x86_64/bt1/gcd_22.asm: New grabber.
+ * mpn/x86_64/bt2/gcd_22.asm: New grabber.
+
+ * mpn/x86_64/atom/gcd_22.asm: Remove stale grabber file.
+ * mpn/x86_64/zen/gcd_22.asm: Grab bd2 instead of hwl code.
+ * mpn/x86_64/bd2/gcd_22.asm: New file.
+ * mpn/x86_64/k8/gcd_22.asm: Remove, rely on top-level code instead.
+ * mpn/x86_64/bt1/gcd_22.asm: Remove.
+ * x86_64/gcd_22.asm: New file, improved version of removed bt1 code.
+
+2019-08-22 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/coreihwl/gcd_11.asm: Remove as it was never beneficial.
+
+ * mpn/x86_64/bd2/gcd_11.asm: Make sure rdx is zero on return to benefit
+ gcd_22's private calls. Make gcd_11 files more similar in register use.
+ * mpn/x86_64/bd4/gcd_11.asm: Likewise.
+ * mpn/x86_64/core2/gcd_11.asm: Likewise.
+ * mpn/x86_64/gcd_11.asm:: Likewise.
+
2019-08-22 Niels Möller <nisse at lysator.liu.se>
From Hugh McMaster:
diff -r 8c0120dc67b0 -r 46301888618d mpn/arm/v6t2/gcd_22.asm
--- a/mpn/arm/v6t2/gcd_22.asm Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/arm/v6t2/gcd_22.asm Thu Sep 05 23:01:32 2019 +0200
@@ -34,14 +34,14 @@
C cycles/bit (approx)
C StrongARM -
C XScale -
-C Cortex-A5 13.2
-C Cortex-A7 10.3
-C Cortex-A8 8.4
+C Cortex-A5 10.1
+C Cortex-A7 9.1
+C Cortex-A8 6.3
C Cortex-A9 ?
-C Cortex-A12 9.1
-C Cortex-A15 8.0
+C Cortex-A12 7.7
+C Cortex-A15 5.7
C Cortex-A17 ?
-C Cortex-A53 8.5
+C Cortex-A53 7.0
define(`gp', `r0')
@@ -53,37 +53,31 @@
define(`t0', `r5')
define(`t1', `r6')
-define(`s0', `r7')
-define(`s1', `r8')
-define(`cnt', `r9')
+define(`cnt', `r7')
ASM_START()
PROLOGUE(mpn_gcd_22)
- push { r4-r9 }
+ push { r4-r7 }
- ldr v0, [sp,#24] C
+ ldr v0, [sp,#16] C
L(top): subs t0, u0, v0 C 0 7
beq L(lowz)
- sbc t1, u1, v1 C 1 8
+ sbcs t1, u1, v1 C 1 8
- mov s0, u0
- subs u0, v0, u0 C 0
- mov s1, u1
- sbcs u1, v1, u1 C 1
-
-L(bck): rbit cnt, t0 C 1
- clz cnt, cnt C 2
+ rbit cnt, t0 C 1
- movcc u0, t0 C 6
- movcc u1, t1 C 4
+ negcc t0, t0
+ mvncc t1, t1
+L(bck): movcc v0, u0
+ movcc v1, u1
+
+ clz cnt, cnt C 2
rsb r12, cnt, #32 C 3
- movcs v0, s0 C 6
- movcs v1, s1 C 4
- lsr u0, u0, cnt C 3
- lsl r12, u1, r12 C 4
- lsr u1, u1, cnt C 3
+ lsr u0, t0, cnt C 3
+ lsl r12, t1, r12 C 4
+ lsr u1, t1, cnt C 3
orr u0, u0, r12 C 5
orrs r12, u1, v1
@@ -92,14 +86,14 @@
str r12, [gp,#4] C high result limb <= 0
- mov r8, gp
+ mov r6, gp
mov r0, u0 C pass 1st argument
mov r1, v0 C pass 2nd argument
- mov r9, r14 C preserve link register
+ mov r7, r14 C preserve link register
bl mpn_gcd_11
- str r0, [r8,#0]
- mov r14, r9
- pop { r4-r9 }
+ str r0, [r6,#0]
+ mov r14, r7
+ pop { r4-r7 }
bx r14
L(lowz):C We come here when v0 - u0 = 0
@@ -108,14 +102,12 @@
subs t0, u1, v1
beq L(end)
mov t1, #0
- mov s0, u0
- mov s1, u1
- subs u0, v1, u1
- mov u1, #0
+ rbit cnt, t0 C 1
+ negcc t0, t0
b L(bck)
L(end): str v0, [gp,#0]
str v1, [gp,#4]
- pop { r4-r9 }
+ pop { r4-r7 }
bx r14
EPILOGUE()
diff -r 8c0120dc67b0 -r 46301888618d mpn/arm64/gcd_22.asm
--- a/mpn/arm64/gcd_22.asm Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/arm64/gcd_22.asm Thu Sep 05 23:01:32 2019 +0200
@@ -38,7 +38,7 @@
C Cortex-A55 ?
C Cortex-A57 ?
C Cortex-A72 5.72
-C Cortex-A73 7.12
+C Cortex-A73 6.43
C Cortex-A75 ?
C Cortex-A76 ?
C Cortex-A77 ?
@@ -51,10 +51,8 @@
define(`t0', `x5')
define(`t1', `x6')
-define(`s0', `x7')
-define(`s1', `x8')
-define(`cnt', `x9')
-define(`tnc', `x10')
+define(`cnt', `x7')
+define(`tnc', `x8')
ASM_START()
PROLOGUE(mpn_gcd_22)
@@ -62,22 +60,21 @@
ALIGN(16)
L(top): subs t0, u0, v0 C 0 6
cbz t0, L(lowz)
- sbc t1, u1, v1 C 1 7
+ sbcs t1, u1, v1 C 1 7
+
+ rbit cnt, t0 C 1
- subs s0, v0, u0 C 0
- sbcs s1, v1, u1 C 1 s = v - u, cs = (u < v)
+ cneg t0, t0, cc C 2
+ cinv t1, t1, cc C 2 u = |u - v|
+L(bck): csel v0, v0, u0, cs C 2
+ csel v1, v1, u1, cs C 2 v = min(u,v)
-L(bck): rbit cnt, t0 C 1
clz cnt, cnt C 2
- csel s0, t0, s0, cc C 2
- csel s1, t1, s1, cc C 2 u = |u - v|
sub tnc, xzr, cnt C 3
- csel v0, v0, u0, cc C 2
- csel v1, v1, u1, cc C 2 v = min(u,v)
- lsr u0, s0, cnt C 3
- lsl x14, s1, tnc C 4
- lsr u1, s1, cnt C 3
+ lsr u0, t0, cnt C 3
+ lsl x14, t1, tnc C 4
+ lsr u1, t1, cnt C 3
orr u0, u0, x14 C 5
orr x11, u1, v1
@@ -105,8 +102,8 @@
subs t0, u1, v1
beq L(end)
mov t1, #0
- subs s0, v1, u1
- mov s1, #0
+ rbit cnt, t0 C 1
+ cneg t0, t0, cc C 2
b L(bck) C FIXME: make conditional
L(end): mov x0, v0
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/bd2/gcd_11.asm
--- a/mpn/x86_64/bd2/gcd_11.asm Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/x86_64/bd2/gcd_11.asm Thu Sep 05 23:01:32 2019 +0200
@@ -70,7 +70,7 @@
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
mov v0, %rdx
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/bd4/gcd_11.asm
--- a/mpn/x86_64/bd4/gcd_11.asm Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/x86_64/bd4/gcd_11.asm Thu Sep 05 23:01:32 2019 +0200
@@ -70,7 +70,7 @@
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
mov u0, %rax
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/core2/gcd_11.asm
--- a/mpn/x86_64/core2/gcd_11.asm Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/x86_64/core2/gcd_11.asm Thu Sep 05 23:01:32 2019 +0200
@@ -70,7 +70,7 @@
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
jmp L(odd)
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/gcd_11.asm
More information about the gmp-commit
mailing list