[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Dec 12 14:28:25 CET 2010


details:   /home/hgfiles/gmp/rev/a94aeff9d18a
changeset: 13703:a94aeff9d18a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Dec 12 09:46:45 2010 +0100
description:
Add a k10 popcount.asm.

details:   /home/hgfiles/gmp/rev/8c470347a967
changeset: 13704:8c470347a967
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Dec 12 09:51:04 2010 +0100
description:
Setup special path for k10 and later AMD CPUs.

diffstat:

 ChangeLog                   |    6 ++
 configure.in                |    6 +-
 mpn/x86_64/k10/popcount.asm |  122 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 132 insertions(+), 2 deletions(-)

diffs (155 lines):

diff -r 6fc76fbd5d94 -r 8c470347a967 ChangeLog
--- a/ChangeLog	Sat Dec 11 11:42:24 2010 +0100
+++ b/ChangeLog	Sun Dec 12 09:51:04 2010 +0100
@@ -1,3 +1,9 @@
+2010-12-12  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/k10/popcount.asm: New file.
+	* configure.in: Setup special path for k10 and later AMD CPUs.
+	Remove special x86_64'k8' path, since directory is non-existent.
+
 2010-12-11  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/sparc32/ultrasparct1: New directory.
diff -r 6fc76fbd5d94 -r 8c470347a967 configure.in
--- a/configure.in	Sat Dec 11 11:42:24 2010 +0100
+++ b/configure.in	Sun Dec 12 09:51:04 2010 +0100
@@ -1502,8 +1502,10 @@
 	case $host_cpu in
 	  x86_64)
 	    ;;
-	  athlon64 | k8 | k10 | bobcat | bulldozer)
-	    path_64="x86_64/k8 $path_64"
+	  k10 | bobcat | bulldozer)
+	    path_64="x86_64/k10 $path_64"
+	    ;;
+	  athlon64 | k8)
 	    ;;
 	  pentium4)
 	    path_64="x86_64/pentium4 $path_64"
diff -r 6fc76fbd5d94 -r 8c470347a967 mpn/x86_64/k10/popcount.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/k10/popcount.asm	Sun Dec 12 09:51:04 2010 +0100
@@ -0,0 +1,122 @@
+dnl  AMD64 mpn_popcount -- population count.
+
+dnl  Copyright 2008, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		     popcount
+C		    cycles/limb
+C AMD K8,K9		 n/a
+C AMD K10		 1.15
+C Intel P4		 n/a
+C Intel core2	 	 n/a
+C Intel corei		 1.25
+C Intel atom		 n/a
+C VIA nano		 n/a
+
+C * The zero-offset of popcount is misassembled to the offset-less form, which
+C   is one byte shorted and therefore will mess up the switching code.
+C * The outdated gas used in FreeBSD and NetbSD cannot handle the POPCNT insn
+
+C TODO
+C  * Improve switching code, the current code sucks.
+
+define(`up',		`%rdi')
+define(`n',		`%rsi')
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_popcount)
+
+ifelse(1,1,`
+	lea	(up,n,8), up
+
+C	mov	R32(n), R32(%rcx)
+C	neg	R32(%rcx)
+	imul	$-1, R32(n), R32(%rcx)
+	and	$8-1, R32(%rcx)
+
+	neg	n
+
+	mov	R32(%rcx), R32(%rax)
+	neg	%rax
+	lea	(up,%rax,8),up
+
+	xor	R32(%rax), R32(%rax)
+
+	lea	(%rcx,%rcx,4), %rcx
+
+	lea	L(top)(%rip), %rdx
+	lea	(%rdx,%rcx,2), %rdx
+	jmp	*%rdx
+',`
+	lea	(up,n,8), up
+
+	mov	R32(n), R32(%rcx)
+	neg	R32(%rcx)
+	and	$8-1, R32(%rcx)
+
+	neg	n
+
+	mov	R32(%rcx), R32(%rax)
+	shl	$3, R32(%rax)
+	sub	%rax, up
+
+	xor	R32(%rax), R32(%rax)
+
+C	add	R32(%rcx), R32(%rcx)	C 2x
+C	lea	(%rcx,%rcx,4), %rcx	C 10x
+	imul	$10, R32(%rcx)
+
+	lea	L(top)(%rip), %rdx
+	add	%rcx, %rdx
+	jmp	*%rdx
+')
+
+	ALIGN(32)
+L(top):
+C 0 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x00	C popcnt 0(up,n,8), %r8
+	add	%r8, %rax
+C 7 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x08	C popcnt 8(up,n,8), %r9
+	add	%r9, %rax
+C 6 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x10	C popcnt 16(up,n,8), %r8
+	add	%r8, %rax
+C 5 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x18	C popcnt 24(up,n,8), %r9
+	add	%r9, %rax
+C 4 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x20	C popcnt 32(up,n,8), %r8
+	add	%r8, %rax
+C 3 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x28	C popcnt 40(up,n,8), %r9
+	add	%r9, %rax
+C 2 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x30	C popcnt 48(up,n,8), %r8
+	add	%r8, %rax
+C 1 = n mod 8
+	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x38	C popcnt 56(up,n,8), %r9
+	add	%r9, %rax
+
+	add	$8, n
+	js	L(top)
+	ret
+EPILOGUE()


More information about the gmp-commit mailing list