[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Aug 5 16:59:38 CEST 2023


details:   /var/hg/gmp/rev/f31b8c135ee3
changeset: 18422:f31b8c135ee3
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Aug 05 16:56:43 2023 +0200
description:
Improve z13 asm support.

details:   /var/hg/gmp/rev/98c2e3541dfb
changeset: 18423:98c2e3541dfb
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Aug 05 16:59:34 2023 +0200
description:
Remove z13 sqr_basecase.c, replaced by asm.

diffstat:

 mpn/s390_64/z13/addmul_1.asm     |   56 ++--
 mpn/s390_64/z13/lshift.asm       |  124 +++++++++++++
 mpn/s390_64/z13/lshiftc.asm      |  128 +++++++++++++
 mpn/s390_64/z13/mul_1.asm        |   55 ++--
 mpn/s390_64/z13/mul_basecase.asm |  179 +++++++++++-------
 mpn/s390_64/z13/rshift.asm       |  120 ++++++++++++
 mpn/s390_64/z13/sqr_basecase.asm |  369 +++++++++++++++++++++++++++++++++++++++
 mpn/s390_64/z13/sqr_basecase.c   |   82 --------
 mpn/s390_64/z13/submul_1.asm     |   52 ++--
 9 files changed, 931 insertions(+), 234 deletions(-)

diffs (truncated from 1543 to 300 lines):

diff -r 372acfd0c33e -r 98c2e3541dfb mpn/s390_64/z13/addmul_1.asm
--- a/mpn/s390_64/z13/addmul_1.asm	Thu Aug 03 16:18:17 2023 +0200
+++ b/mpn/s390_64/z13/addmul_1.asm	Sat Aug 05 16:59:34 2023 +0200
@@ -44,7 +44,7 @@
 C z12		 ?
 C z13		 ?
 C z14		 ?
-C z15		 2.55
+C z15		 2.5
 
 
 define(`rp',	`%r2')
@@ -67,16 +67,16 @@
 ASM_START()
 
 PROLOGUE(mpn_addmul_1c)
-	stmg	%r6, %r13, 48(%r15)
+	stmg	%r6, %r10, 48(%r15)
 	j	L(ent)
 EPILOGUE()
 
 PROLOGUE(mpn_addmul_1)
-	stmg	%r6, %r13, 48(%r15)
+	stmg	%r6, %r10, 48(%r15)
 	lghi	%r6, 0
 L(ent):	vzero	%v0
 	vzero	%v2
-	srlg	%r11, an, 2
+	srlg	%r10, an, 2
 
 	tmll	an, 1
 	je	L(bx0)
@@ -86,16 +86,17 @@
 	jne	L(b11)
 
 L(b01):	lghi	idx, -24
-	lg	%r13, 0(ap)
-	mlgr	%r12, b0
-	algr	%r13, %r6
-	lghi	%r6, 0
-	alcgr	%r12, %r6
-	vlvgg	%v4, %r13, 1
+	lgr	%r0, %r6
+	lg	%r7, 0(ap)
+	mlgr	%r6, b0
+	algr	%r7, %r0
+	lghi	%r0, 0
+	alcgr	%r6, %r0
+	vlvgg	%v4, %r7, 1
 	vaq	%v2, %v2, %v4
 	vsteg	%v2, 0(rp), 1
 	vmrhg	%v2, %v2, %v2
-	cgije	%r11, 0, L(1)
+	cgije	%r10, 0, L(1)
 	j	L(cj0)
 
 L(b11):	lghi	idx, -8
@@ -114,7 +115,6 @@
 	jne	L(b10)
 
 L(b00):	lghi	idx, -32
-	lgr	%r12, %r6
 L(cj0):	lg	%r1, 32(idx, ap)
 	lg	%r9, 40(idx, ap)
 	mlgr	%r0, b0
@@ -122,20 +122,20 @@
 	vler	%v1, 32(idx, rp), 3
 	vpdi	%v1, %v1, %v1, 4
 	vlvgp	%v6, %r0, %r1
-	vlvgp	%v7, %r9, %r12
+	vlvgp	%v7, %r9, %r6
 	j	L(mid)
 
 L(b10):	lghi	idx, -16
 	lgr	%r8, %r6
-L(cj1):	lg	%r7, 16(idx, ap)
-	lg	%r13, 24(idx, ap)
+L(cj1):	lg	%r1, 16(idx, ap)
+	lg	%r7, 24(idx, ap)
+	mlgr	%r0, b0
 	mlgr	%r6, b0
-	mlgr	%r12, b0
 	vler	%v1, 16(idx, rp), 3
 	vpdi	%v1, %v1, %v1, 4
-	vlvgp	%v6, %r6, %r7
-	vlvgp	%v7, %r13, %r8
-	cgije	%r11, 0, L(end)
+	vlvgp	%v6, %r0, %r1
+	vlvgp	%v7, %r7, %r8
+	cgije	%r10, 0, L(end)
 
 L(top):	lg	%r1, 32(idx, ap)
 	lg	%r9, 40(idx, ap)
@@ -150,11 +150,11 @@
 	vpdi	%v1, %v1, %v1, 4
 	vster	%v3, 16(idx, rp), 3
 	vlvgp	%v6, %r0, %r1
-	vlvgp	%v7, %r9, %r12
-L(mid):	lg	%r7, 48(idx, ap)
-	lg	%r13, 56(idx, ap)
+	vlvgp	%v7, %r9, %r6
+L(mid):	lg	%r1, 48(idx, ap)
+	lg	%r7, 56(idx, ap)
+	mlgr	%r0, b0
 	mlgr	%r6, b0
-	mlgr	%r12, b0
 	vacq	%v5, %v6, %v1, %v0
 	vacccq	%v0, %v6, %v1, %v0
 	vacq	%v3, %v5, %v7, %v2
@@ -163,10 +163,10 @@
 	vler	%v1, 48(idx, rp), 3
 	vpdi	%v1, %v1, %v1, 4
 	vster	%v3, 32(idx, rp), 3
-	vlvgp	%v6, %r6, %r7
-	vlvgp	%v7, %r13, %r8
+	vlvgp	%v6, %r0, %r1
+	vlvgp	%v7, %r7, %r8
 	la	idx, 32(idx)
-	brctg	%r11, L(top)
+	brctg	%r10, L(top)
 
 L(end):	vacq	%v5, %v6, %v1, %v0
 	vacccq	%v0, %v6, %v1, %v0
@@ -177,7 +177,7 @@
 
 	vag	%v2, %v0, %v2
 L(1):	vlgvg	%r2, %v2, 1
-	algr	%r2, %r12
-	lmg	%r6, %r13, 48(%r15)
+	algr	%r2, %r6
+	lmg	%r6, %r10, 48(%r15)
 	br	%r14
 EPILOGUE()
diff -r 372acfd0c33e -r 98c2e3541dfb mpn/s390_64/z13/lshift.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z13/lshift.asm	Sat Aug 05 16:59:34 2023 +0200
@@ -0,0 +1,124 @@
+dnl  S/390-64 mpn_lshift.
+
+dnl  Copyright 2023 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900		 7
+C z990           3
+C z9		 ?
+C z10		 6
+C z196		 ?
+
+C NOTES
+C  * This uses discrete loads and stores in a software pipeline.  Using lmg and
+C    stmg is not faster.
+C  * One could assume more pipelining could approach 2.5 c/l, but we have not
+C    found any 8-way loop that runs better than the current 4-way loop.
+C  * Consider using the same feed-in code for 1 <= n <= 3 as for n mod 4,
+C    similarly to the x86_64 sqr_basecase feed-in.
+
+define(`rp',	`%r2')
+define(`ap',	`%r3')
+define(`n',	`%r4')
+define(`cnt',	`%r5')
+
+define(`tnc',	`%r1')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	sllg	%r1, n, 3
+	lay	ap, -40(%r1, ap)
+	lay	rp, -32(%r1, rp)
+
+	lghi	tnc, 64
+	slgr	tnc, cnt
+
+	lg	%r0, 32(ap)
+
+	tmll	n, 1
+	je	L(bx0)
+L(bx1):
+	clgijne	n, 1, L(gt1)
+
+L(1):	sllg	%r5, %r0, 0(cnt)
+	stg	%r5, 24(rp)
+	srlg	%r2, %r0, 0(tnc)
+	br	%r14
+
+L(gt1):	stmg	%r6, %r7, 48(%r15)
+	lg	%r6, 24(ap)
+	srlg	%r6, %r6, 0(tnc)
+	sllg	%r7, %r0, 0(cnt)
+	ogr	%r6, %r7
+	stg	%r6, 24(rp)
+	lay	ap, -8(ap)
+	lay	rp, -8(rp)
+	lmg	%r6, %r7, 48(%r15)
+
+L(bx0):	tmll	n, 2
+	srlg	n, n, 2
+	jne	L(bx10)
+L(bx00):vleg	%v0, 32(ap), 0
+	la	ap, 16(ap)
+	la	rp, 16(rp)
+	j	L(mid)
+
+L(bx10):vleg	%v1, 32(ap), 0
+	clgije	n, 0, L(end)
+
+L(top):	vl	%v0, 16(ap), 3
+	vpdi	%v2, %v0, %v1, 4
+	veslg	%v4, %v2, 0(cnt)
+	vesrlg	%v6, %v0, 0(tnc)
+	vo	%v6, %v4, %v6
+	vst	%v6, 16(rp), 3
+L(mid):	vl	%v1, 0(ap), 3
+	vpdi	%v3, %v1, %v0, 4
+	veslg	%v5, %v3, 0(cnt)
+	vesrlg	%v7, %v1, 0(tnc)
+	vo	%v7, %v5, %v7
+	vst	%v7, 0(rp), 3
+	lay	ap, -32(ap)
+	lay	rp, -32(rp)
+	brctg	n, L(top)
+
+L(end):	vzero	%v0
+	vleg	%v0, 24(ap), 1
+	vpdi	%v2, %v0, %v1, 4
+	veslg	%v4, %v2, 0(cnt)
+	vesrlg	%v6, %v0, 0(tnc)
+	vo	%v6, %v4, %v6
+	vst	%v6, 16(rp), 3
+
+	srlg	%r2, %r0, 0(tnc)
+	br	%r14
+EPILOGUE()
+	.section	.note.GNU-stack
diff -r 372acfd0c33e -r 98c2e3541dfb mpn/s390_64/z13/lshiftc.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z13/lshiftc.asm	Sat Aug 05 16:59:34 2023 +0200
@@ -0,0 +1,128 @@
+dnl  S/390-64 mpn_lshiftc.
+
+dnl  Copyright 2023 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900		 7
+C z990           3


More information about the gmp-commit mailing list