[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Jun 1 13:30:21 UTC 2017
details: /var/hg/gmp/rev/26450f6ece46
changeset: 17413:26450f6ece46
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Jun 01 15:27:19 2017 +0200
description:
(GMP_AVX_NOT_REALLY_AVAILABLE): New m4 define.
details: /var/hg/gmp/rev/e592a6525a17
changeset: 17414:e592a6525a17
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Jun 01 15:27:51 2017 +0200
description:
Use GMP_AVX_NOT_REALLY_AVAILABLE.
diffstat:
configure.ac | 5 ++++-
mpn/x86_64/bd1/hamdist.asm | 9 +++++++++
mpn/x86_64/bd1/popcount.asm | 14 ++++++++++++++
3 files changed, 27 insertions(+), 1 deletions(-)
diffs (73 lines):
diff -r f90e2b744030 -r e592a6525a17 configure.ac
--- a/configure.ac Thu Jun 01 00:52:20 2017 +0200
+++ b/configure.ac Thu Jun 01 15:27:51 2017 +0200
@@ -47,6 +47,7 @@
m4_pattern_allow(GMP_NUMB_BITS)
m4_pattern_allow(GMP_NONSTD_ABI)
m4_pattern_allow(GMP_CPU_TYPE)
+m4_pattern_allow(GMP_AVX_NOT_REALLY_AVAILABLE)
# If --target is not used then $target_alias is empty, but if say
# "./configure athlon-pc-freebsd3.5" is used, then all three of
@@ -1905,7 +1906,9 @@
# unconditionally for NetBSD where they don't work but OSXSAVE is set
# to claim the contrary.
*noavx-*-* | *-*-netbsd*)
- gcc_cflags_noavx="-mno-avx";;
+ gcc_cflags_noavx="-mno-avx"
+ GMP_DEFINE_RAW(["define(<GMP_AVX_NOT_REALLY_AVAILABLE>,1)"])
+ ;;
esac
case $host in
diff -r f90e2b744030 -r e592a6525a17 mpn/x86_64/bd1/hamdist.asm
--- a/mpn/x86_64/bd1/hamdist.asm Thu Jun 01 00:52:20 2017 +0200
+++ b/mpn/x86_64/bd1/hamdist.asm Thu Jun 01 15:27:51 2017 +0200
@@ -53,6 +53,14 @@
C Intel SLM n/a
C VIA nano n/a
+C We use vpshlb and vpperm below, which are XOP extensions to AVX. Some
+C systems, e.g., NetBSD, set OSXSAVE but nevertheless trigger SIGILL for AVX.
+C We fall back to the core2 code.
+ifdef(`GMP_AVX_NOT_REALLY_AVAILABLE',`
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86_64/core2/hamdist.asm')
+',`
+
define(`up', `%rdi')
define(`vp', `%rsi')
define(`n', `%rdx')
@@ -198,3 +206,4 @@
.byte 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
.byte 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
END_OBJECT(L(cnsts))
+')
diff -r f90e2b744030 -r e592a6525a17 mpn/x86_64/bd1/popcount.asm
--- a/mpn/x86_64/bd1/popcount.asm Thu Jun 01 00:52:20 2017 +0200
+++ b/mpn/x86_64/bd1/popcount.asm Thu Jun 01 15:27:51 2017 +0200
@@ -53,6 +53,19 @@
C Intel SLM n/a
C VIA nano n/a
+C TODO
+C * Perform some load-use scheduling for a small speedup.
+C * The innerloop takes around 13 cycles. That means that we could do 3 plain
+C popcnt instructions in parallel and thereby approach 1.17 c/l.
+
+C We use vpshlb and vpperm below, which are XOP extensions to AVX. Some
+C systems, e.g., NetBSD, set OSXSAVE but nevertheless trigger SIGILL for AVX.
+C We fall back to the core2 code.
+ifdef(`GMP_AVX_NOT_REALLY_AVAILABLE',`
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86_64/core2/popcount.asm')
+',`
+
define(`up', `%rdi')
define(`n', `%rsi')
@@ -173,3 +186,4 @@
.byte 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
.byte 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
END_OBJECT(L(cnsts))
+')
More information about the gmp-commit
mailing list