[Gmp-commit] /var/hg/gmp: Unroll z14 popcount for 2x speedup.
mercurial at gmplib.org
mercurial at gmplib.org
Sun Aug 20 01:48:53 CEST 2023
details: /var/hg/gmp/rev/4e849884bdd8
changeset: 18437:4e849884bdd8
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Aug 20 01:38:19 2023 +0200
description:
Unroll z14 popcount for 2x speedup.
diffstat:
mpn/s390_64/z14/popcount.asm | 50 +++++++++++++++++++++++++++++++++----------
1 files changed, 38 insertions(+), 12 deletions(-)
diffs (73 lines):
diff -r 787855d61266 -r 4e849884bdd8 mpn/s390_64/z14/popcount.asm
--- a/mpn/s390_64/z14/popcount.asm Sat Aug 19 23:16:07 2023 +0200
+++ b/mpn/s390_64/z14/popcount.asm Sun Aug 20 01:38:19 2023 +0200
@@ -39,31 +39,57 @@
C z12 ?
C z13 ?
C z14 ?
-C z15 ?
+C z15 0.66 (@4.2)
define(`ap', `%r2')
define(`n', `%r3')
ASM_START()
PROLOGUE(mpn_popcount)
- vzero %v30
- tmll n, 1
- srlg n, n, 1
- je L(top)
+ clgije n, 1, L(1)
+ vzero %v31
+ lay %r0, -2(n)
+ srlg %r0, %r0, 2
+
+ vl %v16, 0(ap), 3
+ vpopctg %v30, %v16
+ tmll n, 2
+ je L(b0x)
-L(odd): vllezg %v16, 0(ap)
- vpopctg %v30, %v16
- la ap, 8(ap)
- clgije n, 0, L(end)
+L(b1x): la ap, 16(ap)
+ clgijle n, 3, L(end)
+ vl %v16, 0(ap), 3
+ vpopctg %v31, %v16
+ j L(mid)
+
+L(b0x): vl %v16, 16(ap), 3
+ la ap, 32(ap)
+ vpopctg %v31, %v16
+ clgijle n, 5, L(end)
L(top): vl %v16, 0(ap), 3
vpopctg %v20, %v16
vag %v30, %v30, %v20
- la ap, 16(ap)
- brctg n, L(top)
+L(mid): vl %v16, 16(ap), 3
+ vpopctg %v20, %v16
+ vag %v31, %v31, %v20
+ la ap, 32(ap)
+ brctg %r0, L(top)
-L(end): vzero %v29
+L(end): tmll n, 1
+ je L(evn)
+ vllezg %v16, 0(ap)
+ vpopctg %v20, %v16
+ vag %v30, %v30, %v20
+
+L(evn): vag %v30, %v30, %v31
+ vzero %v29
vsumqg %v30, %v30, %v29
vlgvg %r2, %v30, 1(%r0)
br %r14
+
+L(1): vllezg %v16, 0(ap)
+ vpopctg %v30, %v16
+ vlgvg %r2, %v30, 0
+ br %r14
EPILOGUE()
More information about the gmp-commit
mailing list