[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Sep 5 21:07:54 UTC 2019


details:   /var/hg/gmp/rev/8f9104e94e86
changeset: 17868:8f9104e94e86
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Sep 05 22:47:48 2019 +0200
description:
Rewrite to make better use of Arm conditional execution.

details:   /var/hg/gmp/rev/77ce32b8bbeb
changeset: 17869:77ce32b8bbeb
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Sep 05 22:53:04 2019 +0200
description:
Rewrite to make better use of Arm conditional execution.

details:   /var/hg/gmp/rev/92765e025e35
changeset: 17870:92765e025e35
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Sep 05 22:56:58 2019 +0200
description:
ChangeLog

details:   /var/hg/gmp/rev/46301888618d
changeset: 17871:46301888618d
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Sep 05 23:01:32 2019 +0200
description:
Align function start to 64-byte boundary.

diffstat:

 ChangeLog                   |  66 +++++++++++++++++++++++++++++++++++++++++++++
 mpn/arm/v6t2/gcd_22.asm     |  64 +++++++++++++++++++------------------------
 mpn/arm64/gcd_22.asm        |  33 ++++++++++------------
 mpn/x86_64/bd2/gcd_11.asm   |   2 +-
 mpn/x86_64/bd4/gcd_11.asm   |   2 +-
 mpn/x86_64/core2/gcd_11.asm |   2 +-
 mpn/x86_64/gcd_11.asm       |  41 +++++++++++++++++++--------
 7 files changed, 140 insertions(+), 70 deletions(-)

diffs (truncated from 357 to 300 lines):

diff -r 8c0120dc67b0 -r 46301888618d ChangeLog
--- a/ChangeLog	Thu Sep 05 21:25:39 2019 +0200
+++ b/ChangeLog	Thu Sep 05 23:01:32 2019 +0200
@@ -1,3 +1,9 @@
+2019-09-05  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm64/gcd_22.asm: Rewrite to make better use of Arm conditional
+	execution.
+	* mpn/arm32/gcd_22.asm: Likewise.
+
 2019-09-05  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/generic/hgcd2.c (div1): Return both r and q as a
@@ -27,6 +33,66 @@
 	* tune/common.c (speed_mpn_hgcd2): New function.
 	* tune/speed.c (routine): Add mpn_hgcd2.
 
+2019-09-04  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm/v6t2/gcd_22.asm: New file.
+	* mpn/arm64/gcd_22.asm: New file.
+	* mpn/ia64/gcd_11.asm: New file.
+
+2019-09-01  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/bt1/gcd_11.asm: Replace grabber with bt1 optimised code.
+
+2019-08-30  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/bd4/gcd_22.asm: New grabber file.
+
+	* mpn/x86_64/zen/gcd_22.asm: Use coreihwl instead of bd2 gcd_22.
+
+	* mpn/x86_64/bd2/gcd_22.asm: Fix typo in FUNC_ENTRY (currently unused).
+	Avoid a register copy before return.
+	* mpn/x86_64/core2/gcd_22.asm: Likewise.
+	* mpn/x86_64/k10/gcd_22.asm: Likewise.
+	* mpn/x86_64/gcd_22.asm: Likewise.
+
+	* mpn/x86_64/coreihwl/gcd_22.asm: Optimise, now runs well on more CPUs.
+
+	* mpn/x86_64/gcd_11.asm: Remove PROTECT from symbols as they are
+	actually local.
+	* mpn/x86_64/gcd_22.asm: Likewise.
+
+2019-08-25  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/bd2/gcd_22.asm: Repeat tzcnt for exceptional lowz case.
+	Remove dead code.
+
+	* mpn/powerpc64/mode64/p7/gcd_22.asm: Make logic for determining ABI
+	wrt struct return more robust.
+	* mpn/powerpc64/mode64/p9/gcd_22.asm: Likewise.
+
+2019-08-24  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/bt1/gcd_11.asm: New grabber.
+	* mpn/x86_64/bt1/gcd_22.asm: New grabber.
+	* mpn/x86_64/bt2/gcd_22.asm: New grabber.
+
+	* mpn/x86_64/atom/gcd_22.asm: Remove stale grabber file.
+	* mpn/x86_64/zen/gcd_22.asm: Grab bd2 instead of hwl code.
+	* mpn/x86_64/bd2/gcd_22.asm: New file.
+	* mpn/x86_64/k8/gcd_22.asm: Remove, rely on top-level code instead.
+	* mpn/x86_64/bt1/gcd_22.asm: Remove.
+	* x86_64/gcd_22.asm: New file, improved version of removed bt1 code.
+
+2019-08-22  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/coreihwl/gcd_11.asm: Remove as it was never beneficial.
+
+	* mpn/x86_64/bd2/gcd_11.asm: Make sure rdx is zero on return to benefit
+	gcd_22's private calls. Make gcd_11 files more similar in register use.
+	* mpn/x86_64/bd4/gcd_11.asm: Likewise.
+	* mpn/x86_64/core2/gcd_11.asm: Likewise.
+	* mpn/x86_64/gcd_11.asm:: Likewise.
+
 2019-08-22  Niels Möller  <nisse at lysator.liu.se>
 
 	From Hugh McMaster:
diff -r 8c0120dc67b0 -r 46301888618d mpn/arm/v6t2/gcd_22.asm
--- a/mpn/arm/v6t2/gcd_22.asm	Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/arm/v6t2/gcd_22.asm	Thu Sep 05 23:01:32 2019 +0200
@@ -34,14 +34,14 @@
 C	     cycles/bit (approx)
 C StrongARM	 -
 C XScale	 -
-C Cortex-A5	13.2
-C Cortex-A7	10.3
-C Cortex-A8	 8.4
+C Cortex-A5	10.1
+C Cortex-A7	 9.1
+C Cortex-A8	 6.3
 C Cortex-A9	 ?
-C Cortex-A12	 9.1
-C Cortex-A15	 8.0
+C Cortex-A12	 7.7
+C Cortex-A15	 5.7
 C Cortex-A17	 ?
-C Cortex-A53	 8.5
+C Cortex-A53	 7.0
 
 
 define(`gp',    `r0')
@@ -53,37 +53,31 @@
 
 define(`t0',    `r5')
 define(`t1',    `r6')
-define(`s0',    `r7')
-define(`s1',    `r8')
-define(`cnt',   `r9')
+define(`cnt',   `r7')
 
 ASM_START()
 PROLOGUE(mpn_gcd_22)
-	push	{ r4-r9 }
+	push	{ r4-r7 }
 
-	ldr	v0, [sp,#24]		C
+	ldr	v0, [sp,#16]		C
 
 L(top):	subs	t0, u0, v0		C 0 7
 	beq	L(lowz)
-	sbc	t1, u1, v1		C 1 8
+	sbcs	t1, u1, v1		C 1 8
 
-	mov	s0, u0
-	subs	u0, v0, u0		C 0
-	mov	s1, u1
-	sbcs	u1, v1, u1		C 1
-
-L(bck):	rbit	cnt, t0			C 1
-	clz	cnt, cnt		C 2
+	rbit	cnt, t0			C 1
 
-	movcc	u0, t0			C 6
-	movcc	u1, t1			C 4
+	negcc	t0, t0
+	mvncc	t1, t1
+L(bck):	movcc	v0, u0
+	movcc	v1, u1
+
+	clz	cnt, cnt		C 2
 	rsb	r12, cnt, #32		C 3
-	movcs	v0, s0			C 6
-	movcs	v1, s1			C 4
 
-	lsr	u0, u0, cnt		C 3
-	lsl	r12, u1, r12		C 4
-	lsr	u1, u1, cnt		C 3
+	lsr	u0, t0, cnt		C 3
+	lsl	r12, t1, r12		C 4
+	lsr	u1, t1, cnt		C 3
 	orr	u0, u0, r12		C 5
 
 	orrs	r12, u1, v1
@@ -92,14 +86,14 @@
 
 	str	r12, [gp,#4]		C high result limb <= 0
 
-	mov	r8, gp
+	mov	r6, gp
 	mov	r0, u0			C pass 1st argument
 	mov	r1, v0			C pass 2nd argument
-	mov	r9, r14			C preserve link register
+	mov	r7, r14			C preserve link register
 	bl	mpn_gcd_11
-	str	r0, [r8,#0]
-	mov	r14, r9
-	pop	{ r4-r9 }
+	str	r0, [r6,#0]
+	mov	r14, r7
+	pop	{ r4-r7 }
 	bx	r14
 
 L(lowz):C We come here when v0 - u0 = 0
@@ -108,14 +102,12 @@
 	subs	t0, u1, v1
 	beq	L(end)
 	mov	t1, #0
-	mov	s0, u0
-	mov	s1, u1
-	subs	u0, v1, u1
-	mov	u1, #0
+	rbit	cnt, t0			C 1
+	negcc	t0, t0
 	b	L(bck)
 
 L(end):	str	v0, [gp,#0]
 	str	v1, [gp,#4]
-	pop	{ r4-r9 }
+	pop	{ r4-r7 }
 	bx	r14
 EPILOGUE()
diff -r 8c0120dc67b0 -r 46301888618d mpn/arm64/gcd_22.asm
--- a/mpn/arm64/gcd_22.asm	Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/arm64/gcd_22.asm	Thu Sep 05 23:01:32 2019 +0200
@@ -38,7 +38,7 @@
 C Cortex-A55	 ?
 C Cortex-A57	 ?
 C Cortex-A72	 5.72
-C Cortex-A73	 7.12
+C Cortex-A73	 6.43
 C Cortex-A75	 ?
 C Cortex-A76	 ?
 C Cortex-A77	 ?
@@ -51,10 +51,8 @@
 
 define(`t0',    `x5')
 define(`t1',    `x6')
-define(`s0',    `x7')
-define(`s1',    `x8')
-define(`cnt',   `x9')
-define(`tnc',   `x10')
+define(`cnt',   `x7')
+define(`tnc',   `x8')
 
 ASM_START()
 PROLOGUE(mpn_gcd_22)
@@ -62,22 +60,21 @@
 	ALIGN(16)
 L(top):	subs	t0, u0, v0		C 0 6
 	cbz	t0, L(lowz)
-	sbc	t1, u1, v1		C 1 7
+	sbcs	t1, u1, v1		C 1 7
+
+	rbit	cnt, t0			C 1
 
-	subs	s0, v0, u0		C 0
-	sbcs	s1, v1, u1		C 1 s = v - u, cs = (u < v)
+	cneg	t0, t0, cc		C 2
+	cinv	t1, t1, cc		C 2 u = |u - v|
+L(bck):	csel	v0, v0, u0, cs		C 2
+	csel	v1, v1, u1, cs		C 2 v = min(u,v)
 
-L(bck):	rbit	cnt, t0			C 1
 	clz	cnt, cnt		C 2
-	csel	s0, t0, s0, cc		C 2
-	csel	s1, t1, s1, cc		C 2 u = |u - v|
 	sub	tnc, xzr, cnt		C 3
-	csel	v0, v0, u0, cc		C 2
-	csel	v1, v1, u1, cc		C 2 v = min(u,v)
 
-	lsr	u0, s0, cnt		C 3
-	lsl	x14, s1, tnc		C 4
-	lsr	u1, s1, cnt		C 3
+	lsr	u0, t0, cnt		C 3
+	lsl	x14, t1, tnc		C 4
+	lsr	u1, t1, cnt		C 3
 	orr	u0, u0, x14		C 5
 
 	orr	x11, u1, v1
@@ -105,8 +102,8 @@
 	subs	t0, u1, v1
 	beq	L(end)
 	mov	t1, #0
-	subs	s0, v1, u1
-	mov	s1, #0
+	rbit	cnt, t0			C 1
+	cneg	t0, t0, cc		C 2
 	b	L(bck)			C FIXME: make conditional
 
 L(end):	mov	x0, v0
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/bd2/gcd_11.asm
--- a/mpn/x86_64/bd2/gcd_11.asm	Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/x86_64/bd2/gcd_11.asm	Thu Sep 05 23:01:32 2019 +0200
@@ -70,7 +70,7 @@
 
 ASM_START()
 	TEXT
-	ALIGN(16)
+	ALIGN(64)
 PROLOGUE(mpn_gcd_11)
 	FUNC_ENTRY(2)
 	mov	v0, %rdx
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/bd4/gcd_11.asm
--- a/mpn/x86_64/bd4/gcd_11.asm	Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/x86_64/bd4/gcd_11.asm	Thu Sep 05 23:01:32 2019 +0200
@@ -70,7 +70,7 @@
 
 ASM_START()
 	TEXT
-	ALIGN(16)
+	ALIGN(64)
 PROLOGUE(mpn_gcd_11)
 	FUNC_ENTRY(2)
 	mov	u0, %rax
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/core2/gcd_11.asm
--- a/mpn/x86_64/core2/gcd_11.asm	Thu Sep 05 21:25:39 2019 +0200
+++ b/mpn/x86_64/core2/gcd_11.asm	Thu Sep 05 23:01:32 2019 +0200
@@ -70,7 +70,7 @@
 
 ASM_START()
 	TEXT
-	ALIGN(16)
+	ALIGN(64)
 PROLOGUE(mpn_gcd_11)
 	FUNC_ENTRY(2)
 	jmp	L(odd)
diff -r 8c0120dc67b0 -r 46301888618d mpn/x86_64/gcd_11.asm


More information about the gmp-commit mailing list