[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Apr 15 00:08:28 UTC 2017
details: /var/hg/gmp/rev/50970e9d2cf3
changeset: 17342:50970e9d2cf3
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Apr 15 00:26:38 2017 +0200
description:
Move x86_64/mulx code to new x86_64/zen subdir.
details: /var/hg/gmp/rev/2ce5c60f5372
changeset: 17343:2ce5c60f5372
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Apr 15 02:08:10 2017 +0200
description:
Recognise AMD zen.
diffstat:
acinclude.m4 | 4 +-
config.guess | 3 +
config.sub | 2 +-
configure.ac | 14 ++-
mpn/x86_64/fat/fat.c | 7 +-
mpn/x86_64/mulx/aorsmul_1.asm | 161 ----------------------------------------
mpn/x86_64/mulx/mul_1.asm | 154 --------------------------------------
mpn/x86_64/zen/aorsmul_1.asm | 168 ++++++++++++++++++++++++++++++++++++++++++
mpn/x86_64/zen/mul_1.asm | 161 ++++++++++++++++++++++++++++++++++++++++
9 files changed, 351 insertions(+), 323 deletions(-)
diffs (truncated from 768 to 300 lines):
diff -r c32d616089b4 -r 2ce5c60f5372 acinclude.m4
--- a/acinclude.m4 Sun Apr 02 06:14:40 2017 +0200
+++ b/acinclude.m4 Sat Apr 15 02:08:10 2017 +0200
@@ -63,7 +63,7 @@
[[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-* | geode*-*-* | atom-*-*]])
define(X86_64_PATTERN,
-[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-*]])
+[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-*]])
dnl GMP_FAT_SUFFIX(DSTVAR, DIRECTORY)
dnl ---------------------------------
@@ -741,7 +741,7 @@
long i;
for (i = 0; i < 88 + 1; i++)
a[i] = ~0L;
- r = malloc (10000 * sizeof (unsigned long));
+ r = calloc (10000, sizeof (unsigned long));
r2 = r;
for (i = 0; i < 528; i += 23)
{
diff -r c32d616089b4 -r 2ce5c60f5372 config.guess
--- a/config.guess Sun Apr 02 06:14:40 2017 +0200
+++ b/config.guess Sat Apr 15 02:08:10 2017 +0200
@@ -951,6 +951,9 @@
case 22: /* Jaguar, an improved bobcat */
cpu_64bit = 1, cpu_avx = 1, modelstr = "jaguar";
break;
+ case 23: /* Zen */
+ cpu_64bit = 1, cpu_avx = 1, modelstr = "zen";
+ break;
}
}
else if (strcmp (vendor_string, "CyrixInstead") == 0)
diff -r c32d616089b4 -r 2ce5c60f5372 config.sub
--- a/config.sub Sun Apr 02 06:14:40 2017 +0200
+++ b/config.sub Sat Apr 15 02:08:10 2017 +0200
@@ -102,7 +102,7 @@
test_cpu=ia64 ;;
pentium | pentiummmx | pentiumpro | pentium[234m] | k[567] | k6[23] | geode | athlon | viac3*)
test_cpu=i386 ;;
-athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding)
+athlon64 | atom | silvermont | goldmont | core2 | corei* | opteron | k[89] | k10 | bobcat | jaguar* | bulldozer* | piledriver* | steamroller* | excavator* | zen | nano | nehalem* | westmere* | sandybridge* | ivybridge* | haswell* | broadwell* | skylake* | kabylake* | knightslanding)
test_cpu=x86_64 ;;
power[2-9] | power2sc)
test_cpu=power ;;
diff -r c32d616089b4 -r 2ce5c60f5372 configure.ac
--- a/configure.ac Sun Apr 02 06:14:40 2017 +0200
+++ b/configure.ac Sat Apr 15 02:08:10 2017 +0200
@@ -1828,6 +1828,12 @@
path="x86/bd4 x86/bd3 x86/bd2 x86/bd1 x86/k7/mmx x86/k7 x86/mmx x86"
path_64="x86_64/bd4 x86_64/bd3 x86_64/bd2 x86_64/bd1 x86_64/k10 x86_64/k8 x86_64"
;;
+ zen)
+ gcc_cflags_cpu="-mtune=znver1 -mtune=amdfam10 -mtune=k8"
+ gcc_cflags_arch="-march=znver1 -march=amdfam10 -march=k8"
+ path="x86/k7/mmx x86/k7 x86/mmx x86"
+ path_64="x86_64/zen x86_64/k10 x86_64/k8 x86_64"
+ ;;
core2)
gcc_cflags_cpu="-mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
@@ -2225,10 +2231,10 @@
extra_functions_64="$extra_functions_64 fat fat_entry"
path_64="x86_64/fat x86_64"
fat_path="x86_64 x86_64/fat
- x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat x86_64/pentium4
- x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/coreihwl
- x86_64/coreibwl x86_64/skylake x86_64/atom x86_64/silvermont
- x86_64/nano"
+ x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat x86_64/zen
+ x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
+ x86_64/coreihwl x86_64/coreibwl x86_64/skylake x86_64/atom
+ x86_64/silvermont x86_64/nano"
fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
fi
diff -r c32d616089b4 -r 2ce5c60f5372 mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c Sun Apr 02 06:14:40 2017 +0200
+++ b/mpn/x86_64/fat/fat.c Sat Apr 15 02:08:10 2017 +0200
@@ -91,6 +91,7 @@
{ "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
{ "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) },
{ "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) },
+ { "zen", "AuthenticAMD", MAKE_FMS (23, 1) },
{ "nano", "CentaurHauls", MAKE_FMS (6, 15) },
};
@@ -400,7 +401,6 @@
case 0x0f: /* k8 */
case 0x11: /* "fam 11h", mix of k8 and k10 */
case 0x13:
- case 0x17:
CPUVEC_SETUP_k8;
break;
@@ -421,6 +421,11 @@
CPUVEC_SETUP_k8;
CPUVEC_SETUP_k10;
CPUVEC_SETUP_bd1;
+
+ case 0x17: /* zen */
+ CPUVEC_SETUP_k8;
+ CPUVEC_SETUP_k10;
+ CPUVEC_SETUP_zen;
}
}
else if (strcmp (vendor_string, "CentaurHauls") == 0)
diff -r c32d616089b4 -r 2ce5c60f5372 mpn/x86_64/mulx/aorsmul_1.asm
--- a/mpn/x86_64/mulx/aorsmul_1.asm Sun Apr 02 06:14:40 2017 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-dnl AMD64 mpn_addmul_1 and mpn_submul_1 for CPUs with mulx.
-
-dnl Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C AMD K8,K9 -
-C AMD K10 -
-C AMD bd1 -
-C AMD bd2 ?
-C AMD bobcat -
-C AMD jaguar ?
-C Intel P4 -
-C Intel PNR -
-C Intel NHM -
-C Intel SBR -
-C Intel HWL ?
-C Intel BWL ?
-C Intel atom -
-C VIA nano -
-
-define(`rp', `%rdi') C rcx
-define(`up', `%rsi') C rdx
-define(`n_param', `%rdx') C r8
-define(`v0_param',`%rcx') C r9
-
-define(`n', `%rcx')
-define(`v0', `%rdx')
-
-ifdef(`OPERATION_addmul_1',`
- define(`ADDSUB', `add')
- define(`ADCSBB', `adc')
- define(`func', `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
- define(`ADDSUB', `sub')
- define(`ADCSBB', `sbb')
- define(`func', `mpn_submul_1')
-')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-IFDOS(` define(`up', ``%rsi'') ') dnl
-IFDOS(` define(`rp', ``%rcx'') ') dnl
-IFDOS(` define(`vl', ``%r9'') ') dnl
-IFDOS(` define(`r9', ``rdi'') ') dnl
-IFDOS(` define(`n', ``%r8'') ') dnl
-IFDOS(` define(`r8', ``r11'') ') dnl
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(func)
- mov (up), %r8
-
- push %rbx
- push %r12
- push %r13
-
- lea (up,n_param,8), up
- lea -32(rp,n_param,8), rp
- mov R32(n_param), R32(%rax)
- xchg v0_param, v0 C FIXME: is this insn fast?
-
- neg n
-
- and $3, R8(%rax)
- jz L(b0)
- cmp $2, R8(%rax)
- jz L(b2)
- jg L(b3)
-
-L(b1): mulx %r8, %rbx, %rax
- sub $-1, n
- jz L(wd1)
- mulx (up,n,8), %r9, %r8
- mulx 8(up,n,8), %r11, %r10
- test R32(%rax), R32(%rax) C clear cy
- jmp L(lo1)
-
-L(b0): mulx %r8, %r9, %r8
- mulx 8(up,n,8), %r11, %r10
- mulx 16(up,n,8), %r13, %r12
- xor R32(%rax), R32(%rax)
- jmp L(lo0)
-
-L(b3): mulx %r8, %r11, %r10
- mulx 8(up,n,8), %r13, %r12
- mulx 16(up,n,8), %rbx, %rax
- add %r10, %r13
- adc %r12, %rbx
- adc $0, %rax
- sub $-3, n
- jz L(wd3)
- test R32(%rax), R32(%rax) C clear cy
- jmp L(lo3)
-
-L(b2): mulx %r8, %r13, %r12
- mulx 8(up,n,8), %rbx, %rax
- add %r12, %rbx
- adc $0, %rax
- sub $-2, n
- jz L(wd2)
- mulx (up,n,8), %r9, %r8
- test R32(%rax), R32(%rax) C clear cy
- jmp L(lo2)
-
-L(top): ADDSUB %r9, (rp,n,8)
-L(lo3): mulx (up,n,8), %r9, %r8
- ADCSBB %r11, 8(rp,n,8)
-L(lo2): mulx 8(up,n,8), %r11, %r10
- ADCSBB %r13, 16(rp,n,8)
-L(lo1): mulx 16(up,n,8), %r13, %r12
- ADCSBB %rbx, 24(rp,n,8)
- adc %rax, %r9
-L(lo0): mulx 24(up,n,8), %rbx, %rax
- adc %r8, %r11
- adc %r10, %r13
- adc %r12, %rbx
- adc $0, %rax C rax = carry limb
- add $4, n
- js L(top)
-
-L(end): ADDSUB %r9, (rp)
-L(wd3): ADCSBB %r11, 8(rp)
-L(wd2): ADCSBB %r13, 16(rp)
-L(wd1): ADCSBB %rbx, 24(rp)
- adc n, %rax
- pop %r13
- pop %r12
- pop %rbx
- ret
-EPILOGUE()
-ASM_END()
diff -r c32d616089b4 -r 2ce5c60f5372 mpn/x86_64/mulx/mul_1.asm
--- a/mpn/x86_64/mulx/mul_1.asm Sun Apr 02 06:14:40 2017 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-dnl AMD64 mpn_mul_1 for CPUs with mulx.
-
-dnl Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
More information about the gmp-commit
mailing list