[PATCH] 64-bit Popcount/Hweight for T3 and later

David Miller davem at davemloft.net
Fri Mar 22 20:42:23 CET 2013


To get my feet wet I wrote some population count routines for 64-bit
sparc.   Even though popc was specified in v9, cpus didn't implement
it in hardware until recently.

Technically we could use this on some chips we don't distinguish on
a fine enough granularity yet.  For example we can assume popc is
available on T2 as well as UltraSPARC-IV.

But for now, just T3 and later.

I think that popc runs in the multiplier unit on T4, and thus has
similar characteristics.  It fully pipelines but has a latency of
12 cycles.

2013-03-22  David S. Miller  <davem at davemloft.net>

	* mpn/sparc64/ultrasparct3/hamdist.asm: New file.
	* mpn/sparc64/ultrasparct3/popcount.asm: New file.

diff --git a/mpn/sparc64/ultrasparct3/hamdist.asm b/mpn/sparc64/ultrasparct3/hamdist.asm
new file mode 100644
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/hamdist.asm
@@ -0,0 +1,67 @@
+dnl  SPARC v9 mpn_hamdist for T3/T4.
+
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C UltraSPARC T3:	 ?
+C UltraSPARC T4:	 3.5
+
+C INPUT PARAMETERS
+define(`up',   `%o0')
+define(`vp',   `%o1')
+define(`n',    `%o2')
+define(`pcnt', `%o5')
+
+ASM_START()
+	REGISTER(%g2,#scratch)
+	REGISTER(%g3,#scratch)
+PROLOGUE(mpn_hamdist)
+	subcc	n, 1, n
+	be	L(final_one)
+	 clr	pcnt
+L(top):
+	ldx	[up + 0], %g1
+	ldx	[vp + 0], %g2
+	ldx	[up + 8], %o4
+	ldx	[vp + 8], %g3
+	sub	n, 2, n
+	xor	%g1, %g2, %g1
+	add	up, 16, up
+	popc	%g1, %g2
+	add	vp, 16, vp
+	xor	%o4, %g3, %o4
+	add	pcnt, %g2, pcnt
+	popc	%o4, %g3
+	brgz	n, L(top)
+	 add	pcnt, %g3, pcnt
+	brlz,pt	n, L(done)
+	 nop
+L(final_one):
+	ldx	[up + 0], %g1
+	ldx	[vp + 0], %g2
+	xor	%g1,%g2, %g1
+	popc	%g1, %g2
+	add	pcnt, %g2, pcnt
+L(done):
+	retl
+	 mov	pcnt, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct3/popcount.asm b/mpn/sparc64/ultrasparct3/popcount.asm
new file mode 100644
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/popcount.asm
@@ -0,0 +1,59 @@
+dnl  SPARC v9 mpn_popcount for T3/T4.
+
+dnl  Contributed to the GNU project by David Miller.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C UltraSPARC T3:	 ?
+C UltraSPARC T4:	 2.5
+
+C INPUT PARAMETERS
+define(`up',   `%o0')
+define(`n',    `%o1')
+define(`pcnt', `%o5')
+
+ASM_START()
+	REGISTER(%g2,#scratch)
+	REGISTER(%g3,#scratch)
+PROLOGUE(mpn_popcount)
+	subcc	n, 1, n
+	be	L(final_one)
+	 clr	pcnt
+L(top):
+	ldx	[up + 0], %g1
+	sub	n, 2, n
+	ldx	[up + 8], %o4
+	add	up, 16, up
+	popc	%g1, %g2
+	popc	%o4, %g3
+	add	pcnt, %g2, pcnt
+	brgz	n, L(top)
+	 add	pcnt, %g3, pcnt
+	brlz,pt	n, L(done)
+	 nop
+L(final_one):
+	ldx	[up + 0], %g1
+	popc	%g1, %g2
+	add	pcnt, %g2, pcnt
+L(done):
+	retl
+	 mov	pcnt, %o0
+EPILOGUE()


More information about the gmp-devel mailing list