[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Aug 19 19:26:30 CEST 2023


details:   /var/hg/gmp/rev/8c07c1f27b2c
changeset: 18431:8c07c1f27b2c
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Aug 19 18:25:02 2023 +0200
description:
Unroll top-level s390_64 mul_1, addmul_1, submul_1.

details:   /var/hg/gmp/rev/d7265d0824d3
changeset: 18432:d7265d0824d3
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Aug 19 19:26:25 2023 +0200
description:
Provide z15-specific copyd.

diffstat:

 mpn/s390_64/addmul_1.asm  |  118 ++++++++++++++++++++++++++++++++++--------
 mpn/s390_64/mul_1.asm     |   92 ++++++++++++++++++++++++++-------
 mpn/s390_64/submul_1.asm  |  127 +++++++++++++++++++++++++++++++++++++--------
 mpn/s390_64/z15/copyd.asm |   77 +++++++++++++++++++++++++++
 4 files changed, 348 insertions(+), 66 deletions(-)

diffs (truncated from 506 to 300 lines):

diff -r bd0bd2059652 -r d7265d0824d3 mpn/s390_64/addmul_1.asm
--- a/mpn/s390_64/addmul_1.asm	Sun Aug 13 22:08:05 2023 +0200
+++ b/mpn/s390_64/addmul_1.asm	Sat Aug 19 19:26:25 2023 +0200
@@ -1,6 +1,6 @@
 dnl  S/390-64 mpn_addmul_1
 
-dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl  Copyright 2023 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -30,43 +30,115 @@
 
 include(`../config.m4')
 
+C TODO
+C  * Delay saving of registers to handle n < 2 faster.
+
 C            cycles/limb
-C z900		34
-C z990		23
+C z900		 ?
+C z990		 ?
 C z9		 ?
-C z10		28
+C z10		 ?
 C z196		 ?
+C z12		 ?
+C z13		 ?
+C z14		 ?
+C z15		 4.5
 
-C INPUT PARAMETERS
 define(`rp',	`%r2')
 define(`up',	`%r3')
 define(`n',	`%r4')
 define(`v0',	`%r5')
 
-define(`z',	`%r9')
+define(`idx',	`%r10')
+define(`cy',	`%r11')
 
 ASM_START()
+PROLOGUE(mpn_addmul_1c)
+	stmg	%r6, %r13, 48(%r15)
+	lgr	cy, %r6
+	j	L(ent)
+EPILOGUE()
 PROLOGUE(mpn_addmul_1)
-	stmg	%r9, %r12, 72(%r15)
-	lghi	%r12, 0			C zero index register
-	aghi	%r12, 0			C clear carry flag
-	lghi	%r11, 0			C clear carry limb
-	lghi	z, 0			C keep register zero
+	stmg	%r6, %r13, 48(%r15)
+	lghi	cy, 0
+L(ent):	tmll	n, 1
+	la	n, 3(n)
+	je	L(bx0)
+L(bx1):	tmll	n, 2
+	srlg	n, n, 2
+	je	L(b01)
+L(b11):	lg	%r7, 0(up)
+	mlgr	%r6, v0
+	lg	%r9, 8(up)
+	lg	%r13, 16(up)
+	mlgr	%r8, v0
+	mlgr	%r12, v0
+	algr	%r7, cy
+	alcgr	%r9, %r6
+	lghi	cy, 0
+	alcgr	%r8, %r13
+	alcgr	cy, %r12
+	alg	%r7, 0(rp)
+	stg	%r7, 0(rp)
+	lghi	idx, -8
+	j	L(m3)
+
+L(b01):	lg	%r9, 0(up)
+	mlgr	%r8, v0
+	algr	%r9, cy
+	lghi	cy, 0
+	alcgr	cy, %r8
+	alg	%r9, 0(rp)
+	stg	%r9, 0(rp)
+	lghi	idx, 8
+	brctg	n, L(top)
+	j	L(end)
 
-L(top):	lg	%r1, 0(%r12,up)
-	lg	%r10, 0(%r12,rp)
+L(bx0):	tmll	n, 2
+	lghi	idx, 0
+	srlg	n, n, 2
+	jne	L(b00)
+L(b10):	lg	%r9, 0(up)
+	lg	%r13, 8(up)
+	mlgr	%r8, v0
+	mlgr	%r12, v0
+	algr	%r9, cy
+	lghi	cy, 0
+	alcgr	%r8, %r13
+	alcgr	cy, %r12
+	alg	%r9, 0(rp)
+	lghi	idx, -16
+	j	L(m2)
+L(b00):	aghi	cy, 0			C clear CF
+
+L(top):	lg	%r1, 0(idx,up)
+	lg	%r7, 8(idx,up)
 	mlgr	%r0, v0
-	alcgr	%r1, %r10
-	alcgr	%r0, z
-	algr	%r1, %r11
-	lgr	%r11, %r0
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
+	mlgr	%r6, v0
+	lg	%r9, 16(idx,up)
+	lg	%r13, 24(idx,up)
+	mlgr	%r8, v0
+	mlgr	%r12, v0
+	alcgr	%r1, cy
+	alcgr	%r0, %r7
+	alcgr	%r9, %r6
+	lghi	cy, 0
+	alcgr	%r8, %r13
+	alcgr	cy, %r12
+	alg	%r1, 0(idx,rp)
+	alcg	%r0, 8(idx,rp)
+	stg	%r1, 0(idx,rp)
+	stg	%r0, 8(idx,rp)
+L(m3):	alcg	%r9, 16(idx,rp)
+L(m2):	alcg	%r8, 24(idx,rp)
+	stg	%r9, 16(idx,rp)
+	stg	%r8, 24(idx,rp)
+	la	idx, 32(idx)
 	brctg	n, L(top)
 
-	lghi	%r2, 0
-	alcgr	%r2, %r11
-
-	lmg	%r9, %r12, 72(%r15)
+L(end):	lghi	%r2, 0
+	alcgr	%r2, cy
+	lmg	%r6, %r13, 48(%r15)
 	br	%r14
 EPILOGUE()
+	.section .note.GNU-stack
diff -r bd0bd2059652 -r d7265d0824d3 mpn/s390_64/mul_1.asm
--- a/mpn/s390_64/mul_1.asm	Sun Aug 13 22:08:05 2023 +0200
+++ b/mpn/s390_64/mul_1.asm	Sat Aug 19 19:26:25 2023 +0200
@@ -1,6 +1,6 @@
 dnl  S/390-64 mpn_mul_1
 
-dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl  Copyright 2023 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -30,37 +30,89 @@
 
 include(`../config.m4')
 
+C TODO
+C  * Delay saving of registers to handle n < 2 faster.
+
 C            cycles/limb
-C z900		29
-C z990		22
+C z900		 ?
+C z990		 ?
 C z9		 ?
-C z10		20
+C z10		 ?
 C z196		 ?
+C z12		 ?
+C z13		 ?
+C z14		 ?
+C z15		 3.5
 
-C INPUT PARAMETERS
 define(`rp',	`%r2')
 define(`up',	`%r3')
 define(`n',	`%r4')
 define(`v0',	`%r5')
 
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	stmg	%r11, %r12, 88(%r15)
-	lghi	%r12, 0			C zero index register
-	aghi	%r12, 0			C clear carry flag
-	lghi	%r11, 0			C clear carry limb
+define(`idx',	`%r10')
 
-L(top):	lg	%r1, 0(%r12,up)
+ASM_START()
+PROLOGUE(mpn_mul_1c)
+	stmg	%r6, %r10, 48(%r15)
+	lgr	%r8, %r6
+	j	L(ent)
+EPILOGUE()
+PROLOGUE(mpn_mul_1)
+	stmg	%r6, %r10, 48(%r15)
+	lghi	%r8, 0			C clear carry limb
+	lghi	%r6, 0			C clear carry limb
+L(ent):	tmll	n, 1
+	la	n, 3(n)
+	je	L(bx0)
+L(bx1):	tmll	n, 2
+	srlg	n, n, 2
+	je	L(b01)
+L(b11):	lg	%r7, 0(up)
+	mlgr	%r6, v0
+	algr	%r7, %r8
+	stg	%r7, 0(rp)
+	lghi	idx, -8
+	j	L(mid)
+L(b01):	lg	%r9, 0(up)
+	mlgr	%r8, v0
+	algr	%r9, %r6
+	lghi	%r6, 0
+	alcgr	%r8, %r6
+	stg	%r9, 0(rp)
+	lghi	idx, 8
+	brctg	n, L(top)
+	j	L(end)
+L(bx0):	tmll	n, 2
+	srlg	n, n, 2
+	jne	L(b00)
+L(b10):	lghi	idx, -16
+C	aghi	%r8, 0			C clear CF
+	j	L(mid)
+L(b00):	aghi	%r8, 0			C clear CF
+	lghi	idx, 0
+
+L(top):	lg	%r1, 0(idx,up)
+	lg	%r7, 8(idx,up)
 	mlgr	%r0, v0
-	alcgr	%r1, %r11
-	lgr	%r11, %r0		C copy high part to carry limb
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
+	mlgr	%r6, v0
+	alcgr	%r1, %r8
+	alcgr	%r0, %r7
+	stg	%r1, 0(idx,rp)
+	stg	%r0, 8(idx,rp)
+L(mid):	lg	%r1, 16(idx,up)
+	lg	%r9, 24(idx,up)
+	mlgr	%r0, v0
+	mlgr	%r8, v0
+	alcgr	%r1, %r6
+	alcgr	%r0, %r9
+	stg	%r1, 16(idx,rp)
+	stg	%r0, 24(idx,rp)
+	la	idx, 32(idx)
 	brctg	n, L(top)
 
-	lghi	%r2, 0
-	alcgr	%r2, %r11
-
-	lmg	%r11, %r12, 88(%r15)
+L(end):	lghi	%r2, 0
+	alcgr	%r2, %r8
+	lmg	%r6, %r10, 48(%r15)
 	br	%r14
 EPILOGUE()
+	.section .note.GNU-stack
diff -r bd0bd2059652 -r d7265d0824d3 mpn/s390_64/submul_1.asm
--- a/mpn/s390_64/submul_1.asm	Sun Aug 13 22:08:05 2023 +0200
+++ b/mpn/s390_64/submul_1.asm	Sat Aug 19 19:26:25 2023 +0200
@@ -1,6 +1,6 @@
 dnl  S/390-64 mpn_submul_1
 
-dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl  Copyright 2023 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -30,41 +30,122 @@
 
 include(`../config.m4')
 
+C TODO
+C  * Delay saving of registers to handle n < 2 faster.
+C  * Clean up register usage in feed-in code (b11 is worst)
+
 C            cycles/limb
-C z900		35
-C z990		24
+C z900		 ?
+C z990		 ?
 C z9		 ?
-C z10		28
+C z10		 ?
 C z196		 ?
+C z12		 ?
+C z13		 ?
+C z14		 ?


More information about the gmp-commit mailing list