[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Aug 27 20:47:08 CEST 2023
details: /var/hg/gmp/rev/bc631043e8b0
changeset: 18446:bc631043e8b0
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Aug 27 19:39:21 2023 +0200
description:
Unroll z14 hamdist.
details: /var/hg/gmp/rev/1c94d4863498
changeset: 18447:1c94d4863498
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Aug 27 20:24:41 2023 +0200
description:
Properly initiate register used for carry.
details: /var/hg/gmp/rev/22ee4735f29d
changeset: 18448:22ee4735f29d
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Aug 27 20:27:13 2023 +0200
description:
Use registers differently to give further software pipelining more wiggle-room.
details: /var/hg/gmp/rev/83c95a896c4c
changeset: 18449:83c95a896c4c
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Aug 27 20:40:51 2023 +0200
description:
Fix typo.
details: /var/hg/gmp/rev/1d3a34646256
changeset: 18450:1d3a34646256
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Aug 27 20:43:54 2023 +0200
description:
Provide z13 bdiv_dbm1c.
details: /var/hg/gmp/rev/e3cc6f9e9753
changeset: 18451:e3cc6f9e9753
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Aug 27 20:47:01 2023 +0200
description:
Provide z15 mul_2 and addmul_2. These could be ported to z13 by replacing VLER by VL+VPDI and VSTER vy VPDI+VST.
diffstat:
mpn/s390_64/addmul_1.asm | 9 +-
mpn/s390_64/mul_1.asm | 8 +-
mpn/s390_64/submul_1.asm | 1 +
mpn/s390_64/z13/bdiv_dbm1c.asm | 92 +++++++++++++
mpn/s390_64/z14/hamdist.asm | 73 ++++++++--
mpn/s390_64/z15/addmul_2.asm | 280 +++++++++++++++++++++++++++++++++++++++++
mpn/s390_64/z15/mul_2.asm | 251 ++++++++++++++++++++++++++++++++++++
7 files changed, 688 insertions(+), 26 deletions(-)
diffs (truncated from 809 to 300 lines):
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/addmul_1.asm
--- a/mpn/s390_64/addmul_1.asm Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/addmul_1.asm Sun Aug 27 20:47:01 2023 +0200
@@ -74,7 +74,7 @@
mlgr %r8, v0
mlgr %r12, v0
algr %r7, cy
- alcgr %r9, %r6
+ alcgr %r6, %r9
lghi cy, 0
alcgr %r8, %r13
alcgr cy, %r12
@@ -107,6 +107,7 @@
alcgr %r8, %r13
alcgr cy, %r12
alg %r9, 0(rp)
+ lgr %r6, %r9
lghi idx, -16
j L(m2)
L(b00): clgr %r0, %r0 C clear CF
@@ -121,7 +122,7 @@
mlgr %r12, v0
alcgr %r1, cy
alcgr %r0, %r7
- alcgr %r9, %r6
+ alcgr %r6, %r9
lghi cy, 0
alcgr %r8, %r13
alcgr cy, %r12
@@ -129,9 +130,9 @@
alcg %r0, 8(idx,rp)
stg %r1, 0(idx,rp)
stg %r0, 8(idx,rp)
-L(m3): alcg %r9, 16(idx,rp)
+L(m3): alcg %r6, 16(idx,rp)
L(m2): alcg %r8, 24(idx,rp)
- stg %r9, 16(idx,rp)
+ stg %r6, 16(idx,rp)
stg %r8, 24(idx,rp)
la idx, 32(idx)
brctg n, L(top)
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/mul_1.asm
--- a/mpn/s390_64/mul_1.asm Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/mul_1.asm Sun Aug 27 20:47:01 2023 +0200
@@ -95,17 +95,17 @@
lg %r7, 8(idx,up)
mlgr %r0, v0
mlgr %r6, v0
- alcgr %r1, %r8
+ alcgr %r8, %r1
alcgr %r0, %r7
- stg %r1, 0(idx,rp)
+ stg %r8, 0(idx,rp)
stg %r0, 8(idx,rp)
L(mid): lg %r1, 16(idx,up)
lg %r9, 24(idx,up)
mlgr %r0, v0
mlgr %r8, v0
- alcgr %r1, %r6
+ alcgr %r6, %r1
alcgr %r0, %r9
- stg %r1, 16(idx,rp)
+ stg %r6, 16(idx,rp)
stg %r0, 24(idx,rp)
la idx, 32(idx)
brctg n, L(top)
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/submul_1.asm
--- a/mpn/s390_64/submul_1.asm Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/submul_1.asm Sun Aug 27 20:47:01 2023 +0200
@@ -56,6 +56,7 @@
ASM_START()
PROLOGUE(mpn_submul_1)
stmg %r6, %r14, 48(%r15)
+ lghi %r14, 0
lghi cy, 0
tmll n, 1
la n, 3(n)
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/z13/bdiv_dbm1c.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z13/bdiv_dbm1c.asm Sun Aug 27 20:47:01 2023 +0200
@@ -0,0 +1,92 @@
+dnl S/390-64 mpn_bdiv_dbm1c
+
+dnl Copyright 2023 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 -
+C z990 -
+C z9 -
+C z10 -
+C z196 -
+C z12 -
+C z13 ?
+C z14 ?
+C z15 5.0
+
+C INPUT PARAMETERS
+define(`qp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`bd', `%r5')
+define(`cy', `%r6')
+
+define(`idx', `%r7')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_dbm1c)
+ stmg %r6, %r9, 48(%r15)
+ vlvgp %v2, %r6, %r6
+ lghi idx, 0
+ tmll n, 1
+ srlg n, n, 1
+ je L(top)
+
+ lg %r1, 0(up)
+ mlgr %r0, bd
+ agr %r0, %r1
+ vlvgp %v0, %r0, %r1
+ vsq %v2, %v2, %v0
+ vsteg %v2, 0(qp), 1
+ vpdi %v2, %v2, %v2, 0 C copy left dword to both dwords
+ cgije n, 0, L(end)
+ lghi idx, 8
+
+L(top): lg %r1, 0(idx,up)
+ lg %r9, 8(idx,up)
+ mlgr %r0, bd
+ mlgr %r8, bd
+ agr %r0, %r1
+ vlvgp %v0, %r0, %r1
+ agr %r8, %r9
+ vlvgp %v1, %r8, %r9
+ vsq %v3, %v2, %v0
+ vpdi %v4, %v3, %v3, 0
+ vsq %v5, %v4, %v1
+ vpdi %v2, %v5, %v5, 0
+ vsteg %v3, 0(idx,qp), 1
+ vsteg %v5, 8(idx,qp), 1
+ la idx, 16(idx)
+ brctg n, L(top)
+
+L(end): vlgvg %r2, %v6, 0
+ lmg %r6, %r9, 48(%r15)
+ br %r14
+EPILOGUE()
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/z14/hamdist.asm
--- a/mpn/s390_64/z14/hamdist.asm Tue Aug 22 10:20:40 2023 +0200
+++ b/mpn/s390_64/z14/hamdist.asm Sun Aug 27 20:47:01 2023 +0200
@@ -39,7 +39,7 @@
C z12 ?
C z13 ?
C z14 ?
-C z15 ?
+C z15 1.0
define(`ap', `%r2')
define(`bp', `%r3')
@@ -47,30 +47,67 @@
ASM_START()
PROLOGUE(mpn_hamdist)
- vzero %v30
- tmll n, 1
- srlg n, n, 1
- je L(top)
+ clgije n, 1, L(1)
+ vzero %v31
+ lay %r0, -2(n)
+ srlg %r0, %r0, 2
+
+ vl %v16, 0(ap), 3
+ vl %v17, 0(bp), 3
+ vx %v18, %v16, %v17
+ vpopctg %v30, %v18
+ tmll n, 2
+ je L(b0x)
-L(odd): vllezg %v16, 0(ap)
- vllezg %v17, 0(bp)
- vx %v16, %v16, %v17
- vpopctg %v30, %v16
- la ap, 8(ap)
- la bp, 8(bp)
- clgije n, 0, L(end)
+L(b1x): la ap, 16(ap)
+ la bp, 16(bp)
+ clgijle n, 3, L(end)
+ vl %v16, 0(ap), 3
+ vl %v17, 0(bp), 3
+ vx %v18, %v16, %v17
+ vpopctg %v31, %v18
+ j L(mid)
+
+L(b0x): vl %v16, 16(ap), 3
+ vl %v17, 16(bp), 3
+ vx %v18, %v16, %v17
+ la ap, 32(ap)
+ la bp, 32(bp)
+ vpopctg %v31, %v18
+ clgijle n, 5, L(end)
L(top): vl %v16, 0(ap), 3
vl %v17, 0(bp), 3
- vx %v16, %v16, %v17
- vpopctg %v20, %v16
+ vx %v18, %v16, %v17
+ vpopctg %v20, %v18
vag %v30, %v30, %v20
- la ap, 16(ap)
- la bp, 16(bp)
- brctg n, L(top)
+L(mid): vl %v16, 16(ap), 3
+ vl %v17, 16(bp), 3
+ vx %v18, %v16, %v17
+ vpopctg %v20, %v18
+ vag %v31, %v31, %v20
+ la ap, 32(ap)
+ la bp, 32(bp)
+ brctg %r0, L(top)
-L(end): vzero %v29
+L(end): tmll n, 1
+ je L(evn)
+ vllezg %v16, 0(ap)
+ vllezg %v17, 0(bp)
+ vx %v18, %v16, %v17
+ vpopctg %v20, %v18
+ vag %v30, %v30, %v20
+
+L(evn): vag %v30, %v30, %v31
+ vzero %v29
vsumqg %v30, %v30, %v29
vlgvg %r2, %v30, 1(%r0)
br %r14
+
+L(1): vllezg %v16, 0(ap)
+ vllezg %v17, 0(bp)
+ vx %v18, %v16, %v17
+ vpopctg %v30, %v18
+ vlgvg %r2, %v30, 0
+ br %r14
EPILOGUE()
diff -r 510152c4ca97 -r e3cc6f9e9753 mpn/s390_64/z15/addmul_2.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_64/z15/addmul_2.asm Sun Aug 27 20:47:01 2023 +0200
@@ -0,0 +1,280 @@
+dnl S/390-64 mpn_addmul_2
+
+dnl Copyright 2023 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
More information about the gmp-commit
mailing list