[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Jun 1 13:30:21 UTC 2017


details:   /var/hg/gmp/rev/26450f6ece46
changeset: 17413:26450f6ece46
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Jun 01 15:27:19 2017 +0200
description:
(GMP_AVX_NOT_REALLY_AVAILABLE): New m4 define.

details:   /var/hg/gmp/rev/e592a6525a17
changeset: 17414:e592a6525a17
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Jun 01 15:27:51 2017 +0200
description:
Use GMP_AVX_NOT_REALLY_AVAILABLE.

diffstat:

 configure.ac                |   5 ++++-
 mpn/x86_64/bd1/hamdist.asm  |   9 +++++++++
 mpn/x86_64/bd1/popcount.asm |  14 ++++++++++++++
 3 files changed, 27 insertions(+), 1 deletions(-)

diffs (73 lines):

diff -r f90e2b744030 -r e592a6525a17 configure.ac
--- a/configure.ac	Thu Jun 01 00:52:20 2017 +0200
+++ b/configure.ac	Thu Jun 01 15:27:51 2017 +0200
@@ -47,6 +47,7 @@
 m4_pattern_allow(GMP_NUMB_BITS)
 m4_pattern_allow(GMP_NONSTD_ABI)
 m4_pattern_allow(GMP_CPU_TYPE)
+m4_pattern_allow(GMP_AVX_NOT_REALLY_AVAILABLE)
 
 # If --target is not used then $target_alias is empty, but if say
 # "./configure athlon-pc-freebsd3.5" is used, then all three of
@@ -1905,7 +1906,9 @@
       # unconditionally for NetBSD where they don't work but OSXSAVE is set
       # to claim the contrary.
       *noavx-*-* | *-*-netbsd*)
-	gcc_cflags_noavx="-mno-avx";;
+	gcc_cflags_noavx="-mno-avx"
+	GMP_DEFINE_RAW(["define(<GMP_AVX_NOT_REALLY_AVAILABLE>,1)"])
+	;;
     esac
 
     case $host in
diff -r f90e2b744030 -r e592a6525a17 mpn/x86_64/bd1/hamdist.asm
--- a/mpn/x86_64/bd1/hamdist.asm	Thu Jun 01 00:52:20 2017 +0200
+++ b/mpn/x86_64/bd1/hamdist.asm	Thu Jun 01 15:27:51 2017 +0200
@@ -53,6 +53,14 @@
 C Intel SLM		n/a
 C VIA nano		n/a
 
+C We use vpshlb and vpperm below, which are XOP extensions to AVX.  Some
+C systems, e.g., NetBSD, set OSXSAVE but nevertheless trigger SIGILL for AVX.
+C We fall back to the core2 code.
+ifdef(`GMP_AVX_NOT_REALLY_AVAILABLE',`
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86_64/core2/hamdist.asm')
+',`
+
 define(`up',		`%rdi')
 define(`vp',		`%rsi')
 define(`n',		`%rdx')
@@ -198,3 +206,4 @@
 	.byte	0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
 	.byte	0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
 END_OBJECT(L(cnsts))
+')
diff -r f90e2b744030 -r e592a6525a17 mpn/x86_64/bd1/popcount.asm
--- a/mpn/x86_64/bd1/popcount.asm	Thu Jun 01 00:52:20 2017 +0200
+++ b/mpn/x86_64/bd1/popcount.asm	Thu Jun 01 15:27:51 2017 +0200
@@ -53,6 +53,19 @@
 C Intel SLM		n/a
 C VIA nano		n/a
 
+C TODO
+C  * Perform some load-use scheduling for a small speedup.
+C  * The innerloop takes around 13 cycles.  That means that we could do 3 plain
+C    popcnt instructions in parallel and thereby approach 1.17 c/l.
+
+C We use vpshlb and vpperm below, which are XOP extensions to AVX.  Some
+C systems, e.g., NetBSD, set OSXSAVE but nevertheless trigger SIGILL for AVX.
+C We fall back to the core2 code.
+ifdef(`GMP_AVX_NOT_REALLY_AVAILABLE',`
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86_64/core2/popcount.asm')
+',`
+
 define(`up',		`%rdi')
 define(`n',		`%rsi')
 
@@ -173,3 +186,4 @@
 	.byte	0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
 	.byte	0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f
 END_OBJECT(L(cnsts))
+')


More information about the gmp-commit mailing list