[Gmp-commit] /var/hg/gmp: Unroll z14 popcount for 2x speedup.

mercurial at gmplib.org mercurial at gmplib.org
Sun Aug 20 01:48:53 CEST 2023


details:   /var/hg/gmp/rev/4e849884bdd8
changeset: 18437:4e849884bdd8
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Aug 20 01:38:19 2023 +0200
description:
Unroll z14 popcount for 2x speedup.

diffstat:

 mpn/s390_64/z14/popcount.asm |  50 +++++++++++++++++++++++++++++++++----------
 1 files changed, 38 insertions(+), 12 deletions(-)

diffs (73 lines):

diff -r 787855d61266 -r 4e849884bdd8 mpn/s390_64/z14/popcount.asm
--- a/mpn/s390_64/z14/popcount.asm	Sat Aug 19 23:16:07 2023 +0200
+++ b/mpn/s390_64/z14/popcount.asm	Sun Aug 20 01:38:19 2023 +0200
@@ -39,31 +39,57 @@
 C z12		 ?
 C z13		 ?
 C z14		 ?
-C z15		 ?
+C z15		 0.66	(@4.2)
 
 define(`ap',	`%r2')
 define(`n',	`%r3')
 
 ASM_START()
 PROLOGUE(mpn_popcount)
-	vzero	%v30
-	tmll	n, 1
-	srlg	n, n, 1
-	je	L(top)
+	clgije	n, 1, L(1)
+	vzero	%v31
+	lay	%r0, -2(n)
+	srlg	%r0, %r0, 2
+
+	vl	%v16, 0(ap), 3
+	vpopctg	%v30, %v16
+	tmll	n, 2
+	je	L(b0x)
 
-L(odd):	vllezg	%v16, 0(ap)
-	vpopctg	%v30, %v16
-	la	ap, 8(ap)
-	clgije	n, 0, L(end)
+L(b1x):	la	ap, 16(ap)
+	clgijle	n, 3, L(end)
+	vl	%v16, 0(ap), 3
+	vpopctg	%v31, %v16
+	j	L(mid)
+
+L(b0x):	vl	%v16, 16(ap), 3
+	la	ap, 32(ap)
+	vpopctg	%v31, %v16
+	clgijle	n, 5, L(end)
 
 L(top):	vl	%v16, 0(ap), 3
 	vpopctg	%v20, %v16
 	vag	%v30, %v30, %v20
-	la	ap, 16(ap)
-	brctg	n, L(top)
+L(mid):	vl	%v16, 16(ap), 3
+	vpopctg	%v20, %v16
+	vag	%v31, %v31, %v20
+	la	ap, 32(ap)
+	brctg	%r0, L(top)
 
-L(end):	vzero	%v29
+L(end):	tmll	n, 1
+	je	L(evn)
+	vllezg	%v16, 0(ap)
+	vpopctg	%v20, %v16
+	vag	%v30, %v30, %v20
+
+L(evn):	vag	%v30, %v30, %v31
+	vzero	%v29
 	vsumqg	%v30, %v30, %v29
 	vlgvg	%r2, %v30, 1(%r0)
 	br	%r14
+
+L(1):	vllezg	%v16, 0(ap)
+	vpopctg	%v30, %v16
+	vlgvg	%r2, %v30, 0
+	br	%r14
 EPILOGUE()


More information about the gmp-commit mailing list