[Gmp-commit] /var/hg/gmp: Provide basic set of z13 assembly loops as well as ...

mercurial at gmplib.org mercurial at gmplib.org
Tue Jul 25 17:47:39 CEST 2023


details:   /var/hg/gmp/rev/55fa199cf6c6
changeset: 18399:55fa199cf6c6
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jul 25 17:47:35 2023 +0200
description:
Provide basic set of z13 assembly loops as well as a sqr_basecase in C.

diffstat:

 ChangeLog                        |   14 ++
 mpn/s390_64/z13/addmul_1.asm     |  128 +++++++++++++++++++
 mpn/s390_64/z13/addmul_2.asm     |  132 +++++++++++++++++++
 mpn/s390_64/z13/mul_1.asm        |  117 +++++++++++++++++
 mpn/s390_64/z13/mul_2.asm        |  121 ++++++++++++++++++
 mpn/s390_64/z13/mul_basecase.asm |  258 +++++++++++++++++++++++++++++++++++++++
 mpn/s390_64/z13/sqr_basecase.c   |   82 ++++++++++++
 mpn/x86/p6/aors_n.asm            |   17 +-
 8 files changed, 864 insertions(+), 5 deletions(-)

diffs (truncated from 930 to 300 lines):

diff -r f45eae89762d -r 55fa199cf6c6 ChangeLog
--- a/ChangeLog	Thu Jul 20 19:57:28 2023 +0200
+++ b/ChangeLog	Tue Jul 25 17:47:35 2023 +0200
@@ -1,3 +1,17 @@
+2023-07-20  Torbjörn Granlund  <tg at gmplib.org>
+
+	* config.guess: Generalise sparc patterns.
+
+2023-07-19  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/generic/div_qr_1n_pi2.c, mpn/generic/div_qr_1n_pi1.c,
+	mpn/generic/mod_1_1.c, mpn/generic/div_qr_1u_pi2.c,
+	mpn/generic/div_qr_2.c: Get arm64 asm contraints right for adcs.
+
+2023-07-18  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac: Suppress any -flto flags for running GMP_ASM_*.
+
 2023-07-15  Torbjörn Granlund  <tg at gmplib.org>
 
 	* configure.ac: Pass -mcpu= instead of -mtune= consistently for ibm power.
diff -r f45eae89762d -r 55fa199cf6c6 mpn/s390_64/z13/addmul_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z13/addmul_1.asm	Tue Jul 25 17:47:35 2023 +0200
@@ -0,0 +1,128 @@
+dnl  S/390-64 mpn_addmul_1
+
+dnl  Copyright 2021 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl TODO
+dnl * Schedule vlvgp away from mlgr; that saves 20% of the run time.
+dnl * Perhaps use vp[0]/vp[1] in innerloop instead preloading v0/v1.
+
+C            cycles/limb
+C z900		 -
+C z990		 -
+C z9		 -
+C z10		 -
+C z196		 -
+C z12		 ?
+C z13		 ?
+C z14		 ?
+C z15		 3.9
+
+
+define(`rp',	`%r2')
+define(`up',	`%r3')
+define(`un',	`%r4')
+define(`v0',	`%r5')
+define(`cy',	`%r6')
+
+define(`idx',	`%r8')
+
+ASM_START()
+
+PROLOGUE(mpn_addmul_1c)
+	stmg	%r6, %r9, 48(%r15)
+	tmll	un, 1
+	srlg	un, un, 1
+	je	L(cev)
+
+L(cod):	lg	%r9, 0(up)
+	mlgr	%r8, v0			C W1 W0
+	alg	%r6, 0(rp)		C W0
+	lghi	%r7, 0
+	alcgr	%r8, %r7		C W1
+	algr	%r6, %r9		C W0
+	alcgr	%r8, %r7		C W1
+	stg	%r6, 0(rp)
+	lgr	%r6, %r8
+	clgije	un, 0, L(1)
+	lghi	idx, 8
+	j	L(lst)
+L(cev):	lghi	idx, 0
+	j	L(lst)
+EPILOGUE()
+
+PROLOGUE(mpn_addmul_1)
+	stmg	%r6, %r9, 48(%r15)
+	tmll	un, 1
+	srlg	un, un, 1
+	je	L(evn)
+
+L(odd):	lg	%r7, 0(up)
+	mlgr	%r6, v0			C W1 W0
+	lghi	%r9, 0
+	alg	%r7, 0(rp)
+	alcgr	%r6, %r9
+	stg	%r7, 0(rp)
+	clgije	un, 0, L(1)
+	lghi	idx, 8
+	j	L(lst)
+L(evn):	lghi	%r6, 0
+	lghi	idx, 0
+
+L(lst):	vzero	%v29
+	vzero	%v30
+L(top):	lgr	%r9, %r6
+	lg	%r1, 0(idx, up)
+	lg	%r7, 8(idx, up)
+	mlgr	%r0, v0			C W1 W0
+	mlgr	%r6, v0			C W2 W1
+	vlvgp	%v23, %r0, %r1		C W1 W0
+	vlvgp	%v21, %r7, %r9		C W1 W0
+	vacq	%v24, %v23, %v21, %v29	C
+	vacccq	%v29, %v23, %v21, %v29	C	carry critical path 3
+	vl	%v16, 0(idx, rp)
+	vpdi	%v16, %v16, %v16, 4
+	vacq	%v20, %v24, %v16, %v30	C
+	vacccq	%v30, %v24, %v16, %v30	C	carry critical path 4
+	vpdi	%v20, %v20, %v20, 4
+	vst	%v20, 0(idx, rp)
+	la	idx, 16(idx)
+	brctg	un, L(top)
+
+L(end):	vag	%v29, %v29, %v30
+	vlgvg	%r2, %v29, 1
+	algr	%r2, %r6
+
+	lmg	%r6, %r9, 48(%r15)
+	br	%r14
+L(1):	lgr	%r2, %r6
+	lmg	%r6, %r9, 48(%r15)
+	br	%r14
+EPILOGUE()
diff -r f45eae89762d -r 55fa199cf6c6 mpn/s390_64/z13/addmul_2.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z13/addmul_2.asm	Tue Jul 25 17:47:35 2023 +0200
@@ -0,0 +1,132 @@
+dnl  S/390-64 mpn_addmul_2
+
+dnl  Copyright 2021 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl TODO
+dnl * Schedule vlvgp away from mlgr; that saves 20% of the run time.
+dnl * Perhaps use vp[0]/vp[1] in innerloop instead preloading v0/v1.
+
+C            cycles/limb
+C z900		 -
+C z990		 -
+C z9		 -
+C z10		 -
+C z196		 -
+C z12		 ?
+C z13		 ?
+C z14		 ?
+C z15		 3.1
+
+
+define(`rp',	`%r2')
+define(`up',	`%r3')
+define(`un',	`%r4')
+define(`vp',	`%r5')
+
+define(`idx',	`%r12')
+define(`v0',	`%r11')
+define(`v1',	`%r5')
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+	stmg	%r6, %r12, 48(%r15)
+
+	vzero	%v27
+	vzero	%v28
+	vzero	%v29
+	vzero	%v30
+	lghi	%r10, 0
+	lg	v0, 0(vp)
+	lg	v1, 8(vp)
+	tmll	un, 1
+	srlg	un, un, 1
+	je	L(evn)
+
+L(odd):	lg	%r7, 0(up)
+	mlgr	%r6, v0			C W2 W1
+	alg	%r7, 0(rp)
+	alcgr	%r6, %r10
+	stg	%r7, 0(rp)
+	lghi	idx, 8
+dnl	clgije	un, 0, L(end)
+	j	L(top)
+
+L(evn):	lghi	%r6, 0
+	lghi	idx, 0
+	lghi	%r1, 0
+	j	L(ent)
+
+L(top):	lg	%r1, -8(idx, up)
+L(ent):	lg	%r9, 0(idx, up)
+	mlgr	%r0, v1			C W2 W1
+	mlgr	%r8, v1			C W3 W2
+	vlvgp	%v22, %r0, %r1		C W2 W1
+	vlvgp	%v23, %r9, %r6		C W2 W1
+	lg	%r1, 0(idx, up)
+	lg	%r7, 8(idx, up)
+	mlgr	%r0, v0			C W2 W1
+	mlgr	%r6, v0			C W3 W2
+	vlvgp	%v20, %r0, %r1		C W2 W1
+	vlvgp	%v21, %r7, %r10		C W2 W1
+	vacq	%v24, %v22, %v23, %v27	C
+	vacccq	%v27, %v22, %v23, %v27	C	carry critical path 1
+	vacq	%v23, %v24, %v20, %v28	C
+	vacccq	%v28, %v24, %v20, %v28	C	carry critical path 2
+	vacq	%v24, %v23, %v21, %v29	C
+	vacccq	%v29, %v23, %v21, %v29	C	carry critical path 3
+	vl	%v16, 0(idx, rp)
+	vpdi	%v16, %v16, %v16, 4
+	vacq	%v20, %v24, %v16, %v30	C
+	vacccq	%v30, %v24, %v16, %v30	C	carry critical path 4
+	vpdi	%v20, %v20, %v20, 4
+	vst	%v20, 0(idx, rp)
+	lgr	%r10, %r8
+	la	idx, 16(idx)
+	brctg	un, L(top)
+
+L(end):	lg	%r1, -8(idx, up)
+	mlgr	%r0, v1
+	algr	%r1, %r6
+	alcgr	%r0, un
+	algr	%r1, %r8
+	alcgr	%r0, un
+	vag	%v27, %v27, %v28
+	vag	%v29, %v29, %v30
+	vag	%v27, %v27, %v29
+	vlgvg	%r10, %v27, 1
+	algr	%r1, %r10
+	stg	%r1, 0(idx, rp)
+	alcgr	%r0, un
+	lgr	%r2, %r0
+
+	lmg	%r6, %r12, 48(%r15)
+	br	%r14
+EPILOGUE()
diff -r f45eae89762d -r 55fa199cf6c6 mpn/s390_64/z13/mul_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z13/mul_1.asm	Tue Jul 25 17:47:35 2023 +0200
@@ -0,0 +1,117 @@
+dnl  S/390-64 mpn_mul_1
+
+dnl  Copyright 2021 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify


More information about the gmp-commit mailing list