[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Feb 13 13:58:25 CET 2013
details: /var/hg/gmp/rev/d85fc5a1d4bf
changeset: 15411:d85fc5a1d4bf
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Feb 13 13:53:15 2013 +0100
description:
(GMP_ASM_X86_MULX, GMP_ASM_X86_ADOX): New feature tests.
details: /var/hg/gmp/rev/bdc8ad4b0b11
changeset: 15412:bdc8ad4b0b11
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Feb 13 13:55:53 2013 +0100
description:
File moved to cope with older assemblers
details: /var/hg/gmp/rev/6e3c1a982157
changeset: 15413:6e3c1a982157
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Feb 13 13:58:21 2013 +0100
description:
Use new feature tests.
Update haswell path to include "mulx".
diffstat:
acinclude.m4 | 77 ++++++++++++++++++++++++++++++++++-
configure.ac | 9 +++-
mpn/x86_64/haswell/mul_1.asm | 86 ---------------------------------------
mpn/x86_64/haswell/mulx/mul_1.asm | 86 +++++++++++++++++++++++++++++++++++++++
4 files changed, 170 insertions(+), 88 deletions(-)
diffs (300 lines):
diff -r a4a4dd24ccab -r 6e3c1a982157 acinclude.m4
--- a/acinclude.m4 Tue Feb 12 21:03:50 2013 +0100
+++ b/acinclude.m4 Wed Feb 13 13:58:21 2013 +0100
@@ -1,7 +1,7 @@
dnl GMP specific autoconf macros
-dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009, 2011 Free
+dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009, 2011, 2013 Free
dnl Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -2606,6 +2606,81 @@
])
+dnl GMP_ASM_X86_MULX([ACTION-IF-YES][,ACTION-IF-NO])
+dnl ------------------------------------------------
+dnl Determine whether the assembler supports the mulx instruction which debut
+dnl with Haswell.
+dnl
+dnl This macro is wanted before GMP_ASM_TEXT, so ".text" is hard coded
+dnl here. ".text" is believed to be correct on all x86 systems, certainly
+dnl it's all GMP_ASM_TEXT gives currently. Actually ".text" probably isn't
+dnl needed at all, at least for just checking instruction syntax.
+
+AC_DEFUN([GMP_ASM_X86_MULX],
+[AC_CACHE_CHECK([if the assembler knows about the mulx instruction],
+ gmp_cv_asm_x86_mulx,
+[GMP_TRY_ASSEMBLE(
+[ .text
+ mulx %r8, %r9, %r10],
+ [gmp_cv_asm_x86_mulx=yes],
+ [gmp_cv_asm_x86_mulx=no])
+])
+case $gmp_cv_asm_x86_mulx in
+yes)
+ ifelse([$1],,:,[$1])
+ ;;
+*)
+ AC_MSG_WARN([+----------------------------------------------------------])
+ AC_MSG_WARN([| WARNING WARNING WARNING])
+ AC_MSG_WARN([| Host CPU has the mulx instruction, but it can't be assembled by])
+ AC_MSG_WARN([| $CCAS $CFLAGS $CPPFLAGS])
+ AC_MSG_WARN([| Older x86 instructions will be used.])
+ AC_MSG_WARN([| This will be an inferior build.])
+ AC_MSG_WARN([+----------------------------------------------------------])
+ ifelse([$2],,:,[$2])
+ ;;
+esac
+])
+
+
+dnl GMP_ASM_X86_ADOX([ACTION-IF-YES][,ACTION-IF-NO])
+dnl ------------------------------------------------
+dnl Determine whether the assembler supports the adcx and adox instructions
+dnl which debut with the Haswell shrink Broadwell.
+dnl
+dnl This macro is wanted before GMP_ASM_TEXT, so ".text" is hard coded
+dnl here. ".text" is believed to be correct on all x86 systems, certainly
+dnl it's all GMP_ASM_TEXT gives currently. Actually ".text" probably isn't
+dnl needed at all, at least for just checking instruction syntax.
+
+AC_DEFUN([GMP_ASM_X86_ADOX],
+[AC_CACHE_CHECK([if the assembler knows about the adox instruction],
+ gmp_cv_asm_x86_adox,
+[GMP_TRY_ASSEMBLE(
+[ .text
+ adox %r8, %r9
+ adcx %r8, %r9],
+ [gmp_cv_asm_x86_adox=yes],
+ [gmp_cv_asm_x86_adox=no])
+])
+case $gmp_cv_asm_x86_adox in
+yes)
+ ifelse([$1],,:,[$1])
+ ;;
+*)
+ AC_MSG_WARN([+----------------------------------------------------------])
+ AC_MSG_WARN([| WARNING WARNING WARNING])
+ AC_MSG_WARN([| Host CPU has the adcx and adox instructions, but they can't be assembled by])
+ AC_MSG_WARN([| $CCAS $CFLAGS $CPPFLAGS])
+ AC_MSG_WARN([| Older x86 instructions will be used.])
+ AC_MSG_WARN([| This will be an inferior build.])
+ AC_MSG_WARN([+----------------------------------------------------------])
+ ifelse([$2],,:,[$2])
+ ;;
+esac
+])
+
+
dnl GMP_ASM_X86_MCOUNT
dnl ------------------
dnl Find out how to call mcount for profiling on an x86 system.
diff -r a4a4dd24ccab -r 6e3c1a982157 configure.ac
--- a/configure.ac Tue Feb 12 21:03:50 2013 +0100
+++ b/configure.ac Wed Feb 13 13:58:21 2013 +0100
@@ -1644,7 +1644,7 @@
gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
path="x86/haswell x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
- path_64="x86_64/haswell x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+ path_64="x86_64/haswell/mulx x86_64/haswell x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
;;
atom)
gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
@@ -2293,6 +2293,13 @@
*sse2*) GMP_ASM_X86_SSE2( , [GMP_STRIP_PATH(sse2)]) ;;
esac
fi
+ case "$path $fat_path" in
+ *mulx*) GMP_ASM_X86_MULX( , [GMP_STRIP_PATH(mulx)]) ;;
+ esac
+ case "$path $fat_path" in
+ *adox*) GMP_ASM_X86_MULX( , [GMP_STRIP_PATH(adox)])
+ GMP_ASM_X86_ADOX( , [GMP_STRIP_PATH(adox)]) ;;
+ esac
;;
esac
diff -r a4a4dd24ccab -r 6e3c1a982157 mpn/x86_64/haswell/mul_1.asm
--- a/mpn/x86_64/haswell/mul_1.asm Tue Feb 12 21:03:50 2013 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-dnl AMD64 mpn_mul_1 for Intel Haswell.
-
-dnl Copyright 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bobcat
-C Intel P4
-C Intel PNR
-C Intel NHM
-C Intel SBR
-C Intel HWL
-C Intel atom
-C VIA nano
-
-define(`rp', `%rdi') C rcx
-define(`up', `%rsi') C rdx
-define(`n_param', `%rdx') C r8
-define(`v0_param',`%rcx') C r9
-
-define(`n', `%rcx')
-define(`v0', `%rdx')
-
-IFDOS(` define(`up', ``%rsi'') ') dnl
-IFDOS(` define(`rp', ``%rcx'') ') dnl
-IFDOS(` define(`v0', ``%r9'') ') dnl
-IFDOS(` define(`r9', ``rdi'') ') dnl
-IFDOS(` define(`n', ``%r8'') ') dnl
-IFDOS(` define(`r8', ``r11'') ') dnl
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_mul_1c)
- mov %r8, %rax
- jmp L(ent)
-EPILOGUE()
- ALIGN(16)
-PROLOGUE(mpn_mul_1)
- xor %rax, %rax
-L(ent): mov v0_param, %r8
- mov n_param, n
- mov %r8, v0
- lea (rp,n,8), rp
- lea (up,n,8), up
- neg n
- test $1, R8(n)
- jne L(odd)
-
-L(top): jrcxz L(end)
- mulx (up,n,8), %r9, %r8
- adc %rax, %r9
- mov %r9, (rp,n,8)
-L(mid): mulx 8(up,n,8), %r11, %rax
- adc %r8, %r11
- mov %r11, 8(rp,n,8)
- lea 2(n), n
- jmp L(top)
-
-L(end): adc $0, %rax
- ret
-
-L(odd): dec n
- mov %rax, %r8
- jmp L(mid)
-EPILOGUE()
-ASM_END()
diff -r a4a4dd24ccab -r 6e3c1a982157 mpn/x86_64/haswell/mulx/mul_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/haswell/mulx/mul_1.asm Wed Feb 13 13:58:21 2013 +0100
@@ -0,0 +1,86 @@
+dnl AMD64 mpn_mul_1 for Intel Haswell.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9
+C AMD K10
+C AMD bd1
+C AMD bobcat
+C Intel P4
+C Intel PNR
+C Intel NHM
+C Intel SBR
+C Intel HWL
+C Intel atom
+C VIA nano
+
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`v0_param',`%rcx') C r9
+
+define(`n', `%rcx')
+define(`v0', `%rdx')
+
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`v0', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+ mov %r8, %rax
+ jmp L(ent)
+EPILOGUE()
+ ALIGN(16)
+PROLOGUE(mpn_mul_1)
+ xor %rax, %rax
+L(ent): mov v0_param, %r8
+ mov n_param, n
+ mov %r8, v0
+ lea (rp,n,8), rp
+ lea (up,n,8), up
+ neg n
+ test $1, R8(n)
+ jne L(odd)
+
+L(top): jrcxz L(end)
+ mulx (up,n,8), %r9, %r8
+ adc %rax, %r9
+ mov %r9, (rp,n,8)
+L(mid): mulx 8(up,n,8), %r11, %rax
+ adc %r8, %r11
+ mov %r11, 8(rp,n,8)
+ lea 2(n), n
+ jmp L(top)
+
+L(end): adc $0, %rax
+ ret
+
+L(odd): dec n
+ mov %rax, %r8
+ jmp L(mid)
+EPILOGUE()
+ASM_END()
More information about the gmp-commit
mailing list