[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Dec 13 20:08:13 CET 2010


details:   /home/hgfiles/gmp/rev/25f5b5a0a5dd
changeset: 13707:25f5b5a0a5dd
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 13 19:56:15 2010 +0100
description:
Fix comment typos.

details:   /home/hgfiles/gmp/rev/9a97c2aa6515
changeset: 13708:9a97c2aa6515
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 13 20:08:08 2010 +0100
description:
Add a k10 hamdist.asm.

diffstat:

 ChangeLog                   |   2 +
 mpn/x86_64/k10/hamdist.asm  |  85 +++++++++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/k10/popcount.asm |  10 ++--
 3 files changed, 92 insertions(+), 5 deletions(-)

diffs (128 lines):

diff -r df2acbfb109e -r 9a97c2aa6515 ChangeLog
--- a/ChangeLog	Mon Dec 13 11:25:51 2010 +0100
+++ b/ChangeLog	Mon Dec 13 20:08:08 2010 +0100
@@ -1,5 +1,7 @@
 2010-12-13  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/x86_64/k10/hamdist.asm: New file.
+
 	* configure.in: Amend last change for lame /bin/sh.
 
 2010-12-12  Torbjorn Granlund  <tege at gmplib.org>
diff -r df2acbfb109e -r 9a97c2aa6515 mpn/x86_64/k10/hamdist.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/k10/hamdist.asm	Mon Dec 13 20:08:08 2010 +0100
@@ -0,0 +1,85 @@
+dnl  AMD64 mpn_hamdist -- hamming distance.
+
+dnl  Copyright 2008, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		    cycles/limb
+C AMD K8,K9		 n/a
+C AMD K10		 2
+C Intel P4		 n/a
+C Intel core2		 n/a
+C Intel corei		 2.05
+C Intel atom		 n/a
+C VIA nano		 n/a
+
+C This is very straightforward 2-way unrolled code.
+
+C TODO
+C  * Write something less basic.  It should not be hard to reach 1.5 c/l with
+C    4-way unrolling.
+
+define(`ap',		`%rdi')
+define(`bp',		`%rsi')
+define(`n',		`%rdx')
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_hamdist)
+	mov	(ap), %r8
+	xor	(bp), %r8
+
+	lea	(ap,n,8), ap			C point at A operand end
+	lea	(bp,n,8), bp			C point at B operand end
+	neg	n
+
+	bt	$0, R32(n)
+	jnc	L(2)
+
+L(1):	.byte	0xf3,0x49,0x0f,0xb8,0xc0	C popcnt %r8, %rax
+	xor	R32(%r10), R32(%r10)
+	add	$1, n
+	js	L(top)
+	ret
+
+	ALIGN(16)
+L(2):	mov	8(ap,n,8), %r9
+	.byte	0xf3,0x49,0x0f,0xb8,0xc0	C popcnt %r8, %rax
+	xor	8(bp,n,8), %r9
+	.byte	0xf3,0x4d,0x0f,0xb8,0xd1	C popcnt %r9, %r10
+	add	$2, n
+	js	L(top)
+	lea	(%r10, %rax), %rax
+	ret
+
+	ALIGN(16)
+L(top):	mov	(ap,n,8), %r8
+	lea	(%r10, %rax), %rax
+	mov	8(ap,n,8), %r9
+	xor	(bp,n,8), %r8
+	xor	8(bp,n,8), %r9
+	.byte	0xf3,0x49,0x0f,0xb8,0xc8	C popcnt %r8, %rcx
+	lea	(%rcx, %rax), %rax
+	.byte	0xf3,0x4d,0x0f,0xb8,0xd1	C popcnt %r9, %r10
+	add	$2, n
+	js	L(top)
+
+	lea	(%r10, %rax), %rax
+	ret
+EPILOGUE()
diff -r df2acbfb109e -r 9a97c2aa6515 mpn/x86_64/k10/popcount.asm
--- a/mpn/x86_64/k10/popcount.asm	Mon Dec 13 11:25:51 2010 +0100
+++ b/mpn/x86_64/k10/popcount.asm	Mon Dec 13 20:08:08 2010 +0100
@@ -19,19 +19,19 @@
 
 include(`../config.m4')
 
-C		     popcount
 C		    cycles/limb
 C AMD K8,K9		 n/a
-C AMD K10		 1.15
+C AMD K10		 1.125
 C Intel P4		 n/a
-C Intel core2	 	 n/a
+C Intel core2		 n/a
 C Intel corei		 1.25
 C Intel atom		 n/a
 C VIA nano		 n/a
 
 C * The zero-offset of popcount is misassembled to the offset-less form, which
-C   is one byte shorted and therefore will mess up the switching code.
-C * The outdated gas used in FreeBSD and NetbSD cannot handle the POPCNT insn
+C   is one byte shorter and therefore will mess up the switching code.
+C * The outdated gas used in FreeBSD and NetBSD cannot handle the POPCNT insn,
+C   which is the main reason for our usage of '.byte'.
 
 C TODO
 C  * Improve switching code, the current code sucks.


More information about the gmp-commit mailing list