[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Aug 27 20:47:08 CEST 2023


details:   /var/hg/gmp/rev/bc631043e8b0
changeset: 18446:bc631043e8b0
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Aug 27 19:39:21 2023 +0200
description:
Unroll z14 hamdist.

details:   /var/hg/gmp/rev/1c94d4863498
changeset: 18447:1c94d4863498
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Aug 27 20:24:41 2023 +0200
description:
Properly initiate register used for carry.

details:   /var/hg/gmp/rev/22ee4735f29d
changeset: 18448:22ee4735f29d
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Aug 27 20:27:13 2023 +0200
description:
Use registers differently to give further software pipelining more wiggle-room.

details:   /var/hg/gmp/rev/83c95a896c4c
changeset: 18449:83c95a896c4c
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Aug 27 20:40:51 2023 +0200
description:
Fix typo.

details:   /var/hg/gmp/rev/1d3a34646256
changeset: 18450:1d3a34646256
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Aug 27 20:43:54 2023 +0200
description:
Provide z13 bdiv_dbm1c.

details:   /var/hg/gmp/rev/e3cc6f9e9753
changeset: 18451:e3cc6f9e9753
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Aug 27 20:47:01 2023 +0200
description:
Provide z15 mul_2 and addmul_2.  These could be ported to z13 by replacing VLER by VL+VPDI and VSTER vy VPDI+VST.

diffstat:

 mpn/s390_64/addmul_1.asm       |    9 +-
 mpn/s390_64/mul_1.asm          |    8 +-
 mpn/s390_64/submul_1.asm       |    1 +
 mpn/s390_64/z13/bdiv_dbm1c.asm |   92 +++++++++++++
 mpn/s390_64/z14/hamdist.asm    |   73 ++++++++--
 mpn/s390_64/z15/addmul_2.asm   |  280 +++++++++++++++++++++++++++++++++++++++++
 mpn/s390_64/z15/mul_2.asm      |  251 ++++++++++++++++++++++++++++++++++++
 7 files changed, 688 insertions(+), 26 deletions(-)

diffs (truncated from 809 to 300 lines):

diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/addmul_1.asm
--- a/mpn/s390_64/addmul_1.asm	Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/addmul_1.asm	Sun Aug 27 20:47:01 2023 +0200
@@ -74,7 +74,7 @@
 	mlgr	%r8, v0
 	mlgr	%r12, v0
 	algr	%r7, cy
-	alcgr	%r9, %r6
+	alcgr	%r6, %r9
 	lghi	cy, 0
 	alcgr	%r8, %r13
 	alcgr	cy, %r12
@@ -107,6 +107,7 @@
 	alcgr	%r8, %r13
 	alcgr	cy, %r12
 	alg	%r9, 0(rp)
+	lgr	%r6, %r9
 	lghi	idx, -16
 	j	L(m2)
 L(b00):	clgr	%r0, %r0		C clear CF
@@ -121,7 +122,7 @@
 	mlgr	%r12, v0
 	alcgr	%r1, cy
 	alcgr	%r0, %r7
-	alcgr	%r9, %r6
+	alcgr	%r6, %r9
 	lghi	cy, 0
 	alcgr	%r8, %r13
 	alcgr	cy, %r12
@@ -129,9 +130,9 @@
 	alcg	%r0, 8(idx,rp)
 	stg	%r1, 0(idx,rp)
 	stg	%r0, 8(idx,rp)
-L(m3):	alcg	%r9, 16(idx,rp)
+L(m3):	alcg	%r6, 16(idx,rp)
 L(m2):	alcg	%r8, 24(idx,rp)
-	stg	%r9, 16(idx,rp)
+	stg	%r6, 16(idx,rp)
 	stg	%r8, 24(idx,rp)
 	la	idx, 32(idx)
 	brctg	n, L(top)
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/mul_1.asm
--- a/mpn/s390_64/mul_1.asm	Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/mul_1.asm	Sun Aug 27 20:47:01 2023 +0200
@@ -95,17 +95,17 @@
 	lg	%r7, 8(idx,up)
 	mlgr	%r0, v0
 	mlgr	%r6, v0
-	alcgr	%r1, %r8
+	alcgr	%r8, %r1
 	alcgr	%r0, %r7
-	stg	%r1, 0(idx,rp)
+	stg	%r8, 0(idx,rp)
 	stg	%r0, 8(idx,rp)
 L(mid):	lg	%r1, 16(idx,up)
 	lg	%r9, 24(idx,up)
 	mlgr	%r0, v0
 	mlgr	%r8, v0
-	alcgr	%r1, %r6
+	alcgr	%r6, %r1
 	alcgr	%r0, %r9
-	stg	%r1, 16(idx,rp)
+	stg	%r6, 16(idx,rp)
 	stg	%r0, 24(idx,rp)
 	la	idx, 32(idx)
 	brctg	n, L(top)
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/submul_1.asm
--- a/mpn/s390_64/submul_1.asm	Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/submul_1.asm	Sun Aug 27 20:47:01 2023 +0200
@@ -56,6 +56,7 @@
 ASM_START()
 PROLOGUE(mpn_submul_1)
 	stmg	%r6, %r14, 48(%r15)
+	lghi	%r14, 0
 	lghi	cy, 0
 	tmll	n, 1
 	la	n, 3(n)
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/z13/bdiv_dbm1c.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z13/bdiv_dbm1c.asm	Sun Aug 27 20:47:01 2023 +0200
@@ -0,0 +1,92 @@
+dnl  S/390-64 mpn_bdiv_dbm1c
+
+dnl  Copyright 2023 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900		 -
+C z990		 -
+C z9		 -
+C z10		 -
+C z196		 -
+C z12		 -
+C z13		 ?
+C z14		 ?
+C z15		 5.0
+
+C INPUT PARAMETERS
+define(`qp',	  `%r2')
+define(`up',	  `%r3')
+define(`n',	  `%r4')
+define(`bd',	  `%r5')
+define(`cy',	  `%r6')
+
+define(`idx',     `%r7')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_dbm1c)
+	stmg	%r6, %r9, 48(%r15)
+	vlvgp	%v2, %r6, %r6
+	lghi	idx, 0
+	tmll	n, 1
+	srlg	n, n, 1
+	je	L(top)
+
+	lg	%r1, 0(up)
+	mlgr	%r0, bd
+	agr	%r0, %r1
+	vlvgp	%v0, %r0, %r1
+	vsq	%v2, %v2, %v0
+	vsteg	%v2, 0(qp), 1
+	vpdi	%v2, %v2, %v2, 0	C copy left dword to both dwords
+	cgije	n, 0, L(end)
+	lghi	idx, 8
+
+L(top):	lg	%r1, 0(idx,up)
+	lg	%r9, 8(idx,up)
+	mlgr	%r0, bd
+	mlgr	%r8, bd
+	agr	%r0, %r1
+	vlvgp	%v0, %r0, %r1
+	agr	%r8, %r9
+	vlvgp	%v1, %r8, %r9
+	vsq	%v3, %v2, %v0
+	vpdi	%v4, %v3, %v3, 0
+	vsq	%v5, %v4, %v1
+	vpdi	%v2, %v5, %v5, 0
+	vsteg	%v3, 0(idx,qp), 1
+	vsteg	%v5, 8(idx,qp), 1
+	la	idx, 16(idx)
+	brctg	n, L(top)
+
+L(end):	vlgvg	%r2, %v6, 0
+	lmg	%r6, %r9, 48(%r15)
+	br	%r14
+EPILOGUE()
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/z14/hamdist.asm
--- a/mpn/s390_64/z14/hamdist.asm	Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/z14/hamdist.asm	Sun Aug 27 20:47:01 2023 +0200
@@ -39,7 +39,7 @@
 C z12		 ?
 C z13		 ?
 C z14		 ?
-C z15		 ?
+C z15		 1.0
 
 define(`ap',	`%r2')
 define(`bp',	`%r3')
@@ -47,30 +47,67 @@
 
 ASM_START()
 PROLOGUE(mpn_hamdist)
-	vzero	%v30
-	tmll	n, 1
-	srlg	n, n, 1
-	je	L(top)
+	clgije	n, 1, L(1)
+	vzero	%v31
+	lay	%r0, -2(n)
+	srlg	%r0, %r0, 2
+
+	vl	%v16, 0(ap), 3
+	vl	%v17, 0(bp), 3
+	vx	%v18, %v16, %v17
+	vpopctg	%v30, %v18
+	tmll	n, 2
+	je	L(b0x)
 
-L(odd):	vllezg	%v16, 0(ap)
-	vllezg	%v17, 0(bp)
-	vx	%v16, %v16, %v17
-	vpopctg	%v30, %v16
-	la	ap, 8(ap)
-	la	bp, 8(bp)
-	clgije	n, 0, L(end)
+L(b1x):	la	ap, 16(ap)
+	la	bp, 16(bp)
+	clgijle	n, 3, L(end)
+	vl	%v16, 0(ap), 3
+	vl	%v17, 0(bp), 3
+	vx	%v18, %v16, %v17
+	vpopctg	%v31, %v18
+	j	L(mid)
+
+L(b0x):	vl	%v16, 16(ap), 3
+	vl	%v17, 16(bp), 3
+	vx	%v18, %v16, %v17
+	la	ap, 32(ap)
+	la	bp, 32(bp)
+	vpopctg	%v31, %v18
+	clgijle	n, 5, L(end)
 
 L(top):	vl	%v16, 0(ap), 3
 	vl	%v17, 0(bp), 3
-	vx	%v16, %v16, %v17
-	vpopctg	%v20, %v16
+	vx	%v18, %v16, %v17
+	vpopctg	%v20, %v18
 	vag	%v30, %v30, %v20
-	la	ap, 16(ap)
-	la	bp, 16(bp)
-	brctg	n, L(top)
+L(mid):	vl	%v16, 16(ap), 3
+	vl	%v17, 16(bp), 3
+	vx	%v18, %v16, %v17
+	vpopctg	%v20, %v18
+	vag	%v31, %v31, %v20
+	la	ap, 32(ap)
+	la	bp, 32(bp)
+	brctg	%r0, L(top)
 
-L(end):	vzero	%v29
+L(end):	tmll	n, 1
+	je	L(evn)
+	vllezg	%v16, 0(ap)
+	vllezg	%v17, 0(bp)
+	vx	%v18, %v16, %v17
+	vpopctg	%v20, %v18
+	vag	%v30, %v30, %v20
+
+L(evn):	vag	%v30, %v30, %v31
+	vzero	%v29
 	vsumqg	%v30, %v30, %v29
 	vlgvg	%r2, %v30, 1(%r0)
 	br	%r14
+
+L(1):	vllezg	%v16, 0(ap)
+	vllezg	%v17, 0(bp)
+	vx	%v18, %v16, %v17
+	vpopctg	%v30, %v18
+	vlgvg	%r2, %v30, 0
+	br	%r14
 EPILOGUE()
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/z15/addmul_2.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z15/addmul_2.asm	Sun Aug 27 20:47:01 2023 +0200
@@ -0,0 +1,280 @@
+dnl  S/390-64 mpn_addmul_2
+
+dnl  Copyright 2023 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl


More information about the gmp-commit mailing list