[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Dec 12 14:28:25 CET 2010
details: /home/hgfiles/gmp/rev/a94aeff9d18a
changeset: 13703:a94aeff9d18a
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Dec 12 09:46:45 2010 +0100
description:
Add a k10 popcount.asm.
details: /home/hgfiles/gmp/rev/8c470347a967
changeset: 13704:8c470347a967
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Dec 12 09:51:04 2010 +0100
description:
Setup special path for k10 and later AMD CPUs.
diffstat:
ChangeLog | 6 ++
configure.in | 6 +-
mpn/x86_64/k10/popcount.asm | 122 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 132 insertions(+), 2 deletions(-)
diffs (155 lines):
diff -r 6fc76fbd5d94 -r 8c470347a967 ChangeLog
--- a/ChangeLog Sat Dec 11 11:42:24 2010 +0100
+++ b/ChangeLog Sun Dec 12 09:51:04 2010 +0100
@@ -1,3 +1,9 @@
+2010-12-12 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/k10/popcount.asm: New file.
+ * configure.in: Setup special path for k10 and later AMD CPUs.
+ Remove special x86_64'k8' path, since directory is non-existent.
+
2010-12-11 Torbjorn Granlund <tege at gmplib.org>
* mpn/sparc32/ultrasparct1: New directory.
diff -r 6fc76fbd5d94 -r 8c470347a967 configure.in
--- a/configure.in Sat Dec 11 11:42:24 2010 +0100
+++ b/configure.in Sun Dec 12 09:51:04 2010 +0100
@@ -1502,8 +1502,10 @@
case $host_cpu in
x86_64)
;;
- athlon64 | k8 | k10 | bobcat | bulldozer)
- path_64="x86_64/k8 $path_64"
+ k10 | bobcat | bulldozer)
+ path_64="x86_64/k10 $path_64"
+ ;;
+ athlon64 | k8)
;;
pentium4)
path_64="x86_64/pentium4 $path_64"
diff -r 6fc76fbd5d94 -r 8c470347a967 mpn/x86_64/k10/popcount.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/k10/popcount.asm Sun Dec 12 09:51:04 2010 +0100
@@ -0,0 +1,122 @@
+dnl AMD64 mpn_popcount -- population count.
+
+dnl Copyright 2008, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C popcount
+C cycles/limb
+C AMD K8,K9 n/a
+C AMD K10 1.15
+C Intel P4 n/a
+C Intel core2 n/a
+C Intel corei 1.25
+C Intel atom n/a
+C VIA nano n/a
+
+C * The zero-offset of popcount is misassembled to the offset-less form, which
+C is one byte shorted and therefore will mess up the switching code.
+C * The outdated gas used in FreeBSD and NetbSD cannot handle the POPCNT insn
+
+C TODO
+C * Improve switching code, the current code sucks.
+
+define(`up', `%rdi')
+define(`n', `%rsi')
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_popcount)
+
+ifelse(1,1,`
+ lea (up,n,8), up
+
+C mov R32(n), R32(%rcx)
+C neg R32(%rcx)
+ imul $-1, R32(n), R32(%rcx)
+ and $8-1, R32(%rcx)
+
+ neg n
+
+ mov R32(%rcx), R32(%rax)
+ neg %rax
+ lea (up,%rax,8),up
+
+ xor R32(%rax), R32(%rax)
+
+ lea (%rcx,%rcx,4), %rcx
+
+ lea L(top)(%rip), %rdx
+ lea (%rdx,%rcx,2), %rdx
+ jmp *%rdx
+',`
+ lea (up,n,8), up
+
+ mov R32(n), R32(%rcx)
+ neg R32(%rcx)
+ and $8-1, R32(%rcx)
+
+ neg n
+
+ mov R32(%rcx), R32(%rax)
+ shl $3, R32(%rax)
+ sub %rax, up
+
+ xor R32(%rax), R32(%rax)
+
+C add R32(%rcx), R32(%rcx) C 2x
+C lea (%rcx,%rcx,4), %rcx C 10x
+ imul $10, R32(%rcx)
+
+ lea L(top)(%rip), %rdx
+ add %rcx, %rdx
+ jmp *%rdx
+')
+
+ ALIGN(32)
+L(top):
+C 0 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x00 C popcnt 0(up,n,8), %r8
+ add %r8, %rax
+C 7 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x08 C popcnt 8(up,n,8), %r9
+ add %r9, %rax
+C 6 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x10 C popcnt 16(up,n,8), %r8
+ add %r8, %rax
+C 5 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x18 C popcnt 24(up,n,8), %r9
+ add %r9, %rax
+C 4 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x20 C popcnt 32(up,n,8), %r8
+ add %r8, %rax
+C 3 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x28 C popcnt 40(up,n,8), %r9
+ add %r9, %rax
+C 2 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x30 C popcnt 48(up,n,8), %r8
+ add %r8, %rax
+C 1 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x38 C popcnt 56(up,n,8), %r9
+ add %r9, %rax
+
+ add $8, n
+ js L(top)
+ ret
+EPILOGUE()
More information about the gmp-commit
mailing list