[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Jul 27 11:30:14 CEST 2023
details: /var/hg/gmp/rev/295f47ee8032
changeset: 18406:295f47ee8032
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Jul 27 10:43:28 2023 +0200
description:
Move recently added s390_64 popcount and hamdist code to new z14 subdir.
details: /var/hg/gmp/rev/a261cb0a2f65
changeset: 18407:a261cb0a2f65
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Jul 27 11:30:11 2023 +0200
description:
Misc minor z13 improvements.
diffstat:
mpn/s390_64/z13/addmul_1.asm | 4 +-
mpn/s390_64/z13/addmul_2.asm | 4 +-
mpn/s390_64/z13/hamdist.asm | 76 ----------------------------------------
mpn/s390_64/z13/mul_1.asm | 2 +-
mpn/s390_64/z13/mul_2.asm | 2 +-
mpn/s390_64/z13/mul_basecase.asm | 23 +++++------
mpn/s390_64/z13/popcount.asm | 69 ------------------------------------
mpn/s390_64/z13/submul_1.asm | 4 +-
mpn/s390_64/z14/hamdist.asm | 76 ++++++++++++++++++++++++++++++++++++++++
mpn/s390_64/z14/popcount.asm | 69 ++++++++++++++++++++++++++++++++++++
10 files changed, 163 insertions(+), 166 deletions(-)
diffs (truncated from 483 to 300 lines):
diff -r f60317772ef9 -r a261cb0a2f65 mpn/s390_64/z13/addmul_1.asm
--- a/mpn/s390_64/z13/addmul_1.asm Thu Jul 27 10:05:50 2023 +0200
+++ b/mpn/s390_64/z13/addmul_1.asm Thu Jul 27 11:30:11 2023 +0200
@@ -107,12 +107,12 @@
vlvgp %v21, %r7, %r9 C W1 W0
vacq %v24, %v23, %v21, %v29 C
vacccq %v29, %v23, %v21, %v29 C carry critical path 1
- vl %v16, 0(idx, rp)
+ vl %v16, 0(idx, rp), 3
vpdi %v16, %v16, %v16, 4
vacq %v20, %v24, %v16, %v30 C
vacccq %v30, %v24, %v16, %v30 C carry critical path 2
vpdi %v20, %v20, %v20, 4
- vst %v20, 0(idx, rp)
+ vst %v20, 0(idx, rp), 3
la idx, 16(idx)
brctg un, L(top)
diff -r f60317772ef9 -r a261cb0a2f65 mpn/s390_64/z13/addmul_2.asm
--- a/mpn/s390_64/z13/addmul_2.asm Thu Jul 27 10:05:50 2023 +0200
+++ b/mpn/s390_64/z13/addmul_2.asm Thu Jul 27 11:30:11 2023 +0200
@@ -102,12 +102,12 @@
vacccq %v28, %v24, %v20, %v28 C carry critical path 2
vacq %v24, %v23, %v21, %v29 C
vacccq %v29, %v23, %v21, %v29 C carry critical path 3
- vl %v16, 0(idx, rp)
+ vl %v16, 0(idx, rp), 3
vpdi %v16, %v16, %v16, 4
vacq %v20, %v24, %v16, %v30 C
vacccq %v30, %v24, %v16, %v30 C carry critical path 4
vpdi %v20, %v20, %v20, 4
- vst %v20, 0(idx, rp)
+ vst %v20, 0(idx, rp), 3
lgr %r10, %r8
la idx, 16(idx)
brctg un, L(top)
diff -r f60317772ef9 -r a261cb0a2f65 mpn/s390_64/z13/hamdist.asm
--- a/mpn/s390_64/z13/hamdist.asm Thu Jul 27 10:05:50 2023 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-dnl S/390-64 mpn_hamdist
-
-dnl Copyright 2023 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C z900 -
-C z990 -
-C z9 -
-C z10 -
-C z196 -
-C z12 ?
-C z13 ?
-C z14 ?
-C z15 ?
-
-define(`ap', `%r2')
-define(`bp', `%r3')
-define(`n', `%r4')
-
-ASM_START()
-PROLOGUE(mpn_hamdist)
- vzero %v30
- tmll n, 1
- srlg n, n, 1
- je L(top)
-
-L(odd): vllezg %v16, 0(ap)
- vllezg %v17, 0(bp)
- vx %v16, %v16, %v17
- vpopctg %v30, %v16
- la ap, 8(ap)
- la bp, 8(bp)
- clgije n, 0, L(end)
-
-L(top): vl %v16, 0(ap), 3
- vl %v17, 0(bp), 3
- vx %v16, %v16, %v17
- vpopctg %v20, %v16
- vag %v30, %v30, %v20
- la ap, 16(ap)
- la bp, 16(bp)
- brctg n, L(top)
-
-L(end): vzero %v29
- vsumqg %v30, %v30, %v29
- vlgvg %r2, %v30, 1(%r0)
- br %r14
-EPILOGUE()
diff -r f60317772ef9 -r a261cb0a2f65 mpn/s390_64/z13/mul_1.asm
--- a/mpn/s390_64/z13/mul_1.asm Thu Jul 27 10:05:50 2023 +0200
+++ b/mpn/s390_64/z13/mul_1.asm Thu Jul 27 11:30:11 2023 +0200
@@ -103,7 +103,7 @@
vacq %v20, %v23, %v21, %v29 C
vacccq %v29, %v23, %v21, %v29 C carry critical path
vpdi %v20, %v20, %v20, 4
- vst %v20, 0(idx, rp)
+ vst %v20, 0(idx, rp), 3
la idx, 16(idx)
brctg un, L(top)
diff -r f60317772ef9 -r a261cb0a2f65 mpn/s390_64/z13/mul_2.asm
--- a/mpn/s390_64/z13/mul_2.asm Thu Jul 27 10:05:50 2023 +0200
+++ b/mpn/s390_64/z13/mul_2.asm Thu Jul 27 11:30:11 2023 +0200
@@ -97,7 +97,7 @@
vacccq %v29, %v23, %v21, %v29 C carry critical path 3
vpdi %v20, %v20, %v20, 4
lg %r1, 8(idx, up)
- vst %v20, 0(idx, rp)
+ vst %v20, 0(idx, rp), 3
lgr %r10, %r8
la idx, 16(idx)
brctg un, L(top)
diff -r f60317772ef9 -r a261cb0a2f65 mpn/s390_64/z13/mul_basecase.asm
--- a/mpn/s390_64/z13/mul_basecase.asm Thu Jul 27 10:05:50 2023 +0200
+++ b/mpn/s390_64/z13/mul_basecase.asm Thu Jul 27 11:30:11 2023 +0200
@@ -72,12 +72,13 @@
vacq %v20, %v23, %v21, %v29 C
vacccq %v29, %v23, %v21, %v29 C carry critical path 3
vpdi %v20, %v20, %v20, 4
- vst %v20, 0(idx, rp)
+ vst %v20, 0(idx, rp), 3
la idx, 16(idx)
brctg un, L(top)
L(end): vlgvg %r7, %v29, 1
algr %r6, %r7
+ stg %r6, 0(idx, rp)
popdef(`L')
')
@@ -126,7 +127,7 @@
vacccq %v29, %v23, %v21, %v29 C carry critical path 3
vpdi %v20, %v20, %v20, 4
lg %r1, 8(idx, up)
- vst %v20, 0(idx, rp)
+ vst %v20, 0(idx, rp), 3
lgr %r10, %r8
la idx, 16(idx)
brctg un, L(top)
@@ -143,6 +144,7 @@
algr %r1, %r10
stg %r1, 0(idx, rp)
alcgr %r0, un
+ stg %r0, 8(idx, rp)
popdef(`L')
')
@@ -192,12 +194,12 @@
vacccq %v28, %v24, %v20, %v28 C carry critical path 2
vacq %v24, %v23, %v21, %v29 C
vacccq %v29, %v23, %v21, %v29 C carry critical path 3
- vl %v16, 0(idx, rp)
+ vl %v16, 0(idx, rp), 3
vpdi %v16, %v16, %v16, 4
vacq %v20, %v24, %v16, %v30 C
vacccq %v30, %v24, %v16, %v30 C carry critical path 4
vpdi %v20, %v20, %v20, 4
- vst %v20, 0(idx, rp)
+ vst %v20, 0(idx, rp), 3
lgr %r10, %r8
la idx, 16(idx)
brctg un, L(top)
@@ -215,6 +217,7 @@
algr %r1, %r10
stg %r1, 0(idx, rp)
alcgr %r0, un
+ stg %r0, 8(idx, rp)
popdef(`L')
')
@@ -222,10 +225,7 @@
ASM_START()
PROLOGUE(mpn_mul_basecase)
- stmg %r6, %r15, 48(%r15)
- lay %r15, -240(%r15)
- stg un, 216(%r15)
-
+ stmg %r4, %r14, 32(%r15)
lgr vn, vn_arg
tmll vn, 1
@@ -233,26 +233,23 @@
L(vn_odd):
lg v0, 0(vp)
MUL_1()
- stg %r6, 0(idx, rp)
lay vp, -8(vp)
lay rp, -8(rp)
j L(join)
L(vn_evn):
MUL_2()
- stg %r0, 8(idx, rp)
lay vn, -2(vn)
L(join):
srlg vn, vn, 1
cgije vn, 0, L(oend)
L(otop):
- lg un, 216(%r15)
+ lg un, 32(%r15)
la rp, 16(rp)
la vp, 16(vp)
ADDMUL_2()
- stg %r0, 8(idx, rp)
brctg vn, L(otop)
L(oend):
- lmg %r6, %r15, 288(%r15)
+ lmg %r6, %r14, 48(%r15)
br %r14
EPILOGUE()
diff -r f60317772ef9 -r a261cb0a2f65 mpn/s390_64/z13/popcount.asm
--- a/mpn/s390_64/z13/popcount.asm Thu Jul 27 10:05:50 2023 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-dnl S/390-64 mpn_popcount
-
-dnl Copyright 2023 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C z900 -
-C z990 -
-C z9 -
-C z10 -
-C z196 -
-C z12 ?
-C z13 ?
-C z14 ?
-C z15 ?
-
-define(`ap', `%r2')
-define(`n', `%r3')
-
-ASM_START()
-PROLOGUE(mpn_popcount)
- vzero %v30
- tmll n, 1
- srlg n, n, 1
- je L(top)
-
-L(odd): vllezg %v16, 0(ap)
- vpopctg %v30, %v16
- la ap, 8(ap)
- clgije n, 0, L(end)
More information about the gmp-commit
mailing list