[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Dec 13 20:08:13 CET 2010
details: /home/hgfiles/gmp/rev/25f5b5a0a5dd
changeset: 13707:25f5b5a0a5dd
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 13 19:56:15 2010 +0100
description:
Fix comment typos.
details: /home/hgfiles/gmp/rev/9a97c2aa6515
changeset: 13708:9a97c2aa6515
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Dec 13 20:08:08 2010 +0100
description:
Add a k10 hamdist.asm.
diffstat:
ChangeLog | 2 +
mpn/x86_64/k10/hamdist.asm | 85 +++++++++++++++++++++++++++++++++++++++++++++
mpn/x86_64/k10/popcount.asm | 10 ++--
3 files changed, 92 insertions(+), 5 deletions(-)
diffs (128 lines):
diff -r df2acbfb109e -r 9a97c2aa6515 ChangeLog
--- a/ChangeLog Mon Dec 13 11:25:51 2010 +0100
+++ b/ChangeLog Mon Dec 13 20:08:08 2010 +0100
@@ -1,5 +1,7 @@
2010-12-13 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/x86_64/k10/hamdist.asm: New file.
+
* configure.in: Amend last change for lame /bin/sh.
2010-12-12 Torbjorn Granlund <tege at gmplib.org>
diff -r df2acbfb109e -r 9a97c2aa6515 mpn/x86_64/k10/hamdist.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/k10/hamdist.asm Mon Dec 13 20:08:08 2010 +0100
@@ -0,0 +1,85 @@
+dnl AMD64 mpn_hamdist -- hamming distance.
+
+dnl Copyright 2008, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 n/a
+C AMD K10 2
+C Intel P4 n/a
+C Intel core2 n/a
+C Intel corei 2.05
+C Intel atom n/a
+C VIA nano n/a
+
+C This is very straightforward 2-way unrolled code.
+
+C TODO
+C * Write something less basic. It should not be hard to reach 1.5 c/l with
+C 4-way unrolling.
+
+define(`ap', `%rdi')
+define(`bp', `%rsi')
+define(`n', `%rdx')
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_hamdist)
+ mov (ap), %r8
+ xor (bp), %r8
+
+ lea (ap,n,8), ap C point at A operand end
+ lea (bp,n,8), bp C point at B operand end
+ neg n
+
+ bt $0, R32(n)
+ jnc L(2)
+
+L(1): .byte 0xf3,0x49,0x0f,0xb8,0xc0 C popcnt %r8, %rax
+ xor R32(%r10), R32(%r10)
+ add $1, n
+ js L(top)
+ ret
+
+ ALIGN(16)
+L(2): mov 8(ap,n,8), %r9
+ .byte 0xf3,0x49,0x0f,0xb8,0xc0 C popcnt %r8, %rax
+ xor 8(bp,n,8), %r9
+ .byte 0xf3,0x4d,0x0f,0xb8,0xd1 C popcnt %r9, %r10
+ add $2, n
+ js L(top)
+ lea (%r10, %rax), %rax
+ ret
+
+ ALIGN(16)
+L(top): mov (ap,n,8), %r8
+ lea (%r10, %rax), %rax
+ mov 8(ap,n,8), %r9
+ xor (bp,n,8), %r8
+ xor 8(bp,n,8), %r9
+ .byte 0xf3,0x49,0x0f,0xb8,0xc8 C popcnt %r8, %rcx
+ lea (%rcx, %rax), %rax
+ .byte 0xf3,0x4d,0x0f,0xb8,0xd1 C popcnt %r9, %r10
+ add $2, n
+ js L(top)
+
+ lea (%r10, %rax), %rax
+ ret
+EPILOGUE()
diff -r df2acbfb109e -r 9a97c2aa6515 mpn/x86_64/k10/popcount.asm
--- a/mpn/x86_64/k10/popcount.asm Mon Dec 13 11:25:51 2010 +0100
+++ b/mpn/x86_64/k10/popcount.asm Mon Dec 13 20:08:08 2010 +0100
@@ -19,19 +19,19 @@
include(`../config.m4')
-C popcount
C cycles/limb
C AMD K8,K9 n/a
-C AMD K10 1.15
+C AMD K10 1.125
C Intel P4 n/a
-C Intel core2 n/a
+C Intel core2 n/a
C Intel corei 1.25
C Intel atom n/a
C VIA nano n/a
C * The zero-offset of popcount is misassembled to the offset-less form, which
-C is one byte shorted and therefore will mess up the switching code.
-C * The outdated gas used in FreeBSD and NetbSD cannot handle the POPCNT insn
+C is one byte shorter and therefore will mess up the switching code.
+C * The outdated gas used in FreeBSD and NetBSD cannot handle the POPCNT insn,
+C which is the main reason for our usage of '.byte'.
C TODO
C * Improve switching code, the current code sucks.
More information about the gmp-commit
mailing list