[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Aug 21 21:28:04 UTC 2019
details: /var/hg/gmp/rev/dfdcf95fc1ac
changeset: 17832:dfdcf95fc1ac
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Aug 21 23:03:14 2019 +0200
description:
Provide several x86-64 gcd_22 files.
details: /var/hg/gmp/rev/195b9608e5e4
changeset: 17833:195b9608e5e4
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Aug 21 23:03:40 2019 +0200
description:
ChangeLog
diffstat:
ChangeLog | 42 +++++++++-
mpn/x86_64/atom/gcd_22.asm | 37 ++++++++
mpn/x86_64/bd4/gcd_22.asm | 37 ++++++++
mpn/x86_64/bt1/gcd_22.asm | 171 +++++++++++++++++++++++++++++++++++++++++
mpn/x86_64/core2/gcd_22.asm | 141 +++++++++++++++++++++++++++++++++
mpn/x86_64/coreihwl/gcd_22.asm | 145 ++++++++++++++++++++++++++++++++++
mpn/x86_64/k10/gcd_22.asm | 146 +++++++++++++++++++++++++++++++++++
mpn/x86_64/k8/gcd_22.asm | 169 ++++++++++++++++++++++++++++++++++++++++
mpn/x86_64/zen/gcd_22.asm | 37 ++++++++
9 files changed, 924 insertions(+), 1 deletions(-)
diffs (truncated from 985 to 300 lines):
diff -r fdc9d35fb5c8 -r 195b9608e5e4 ChangeLog
--- a/ChangeLog Thu Aug 08 23:48:03 2019 +1000
+++ b/ChangeLog Wed Aug 21 23:03:40 2019 +0200
@@ -1,3 +1,38 @@
+2019-08-21 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/core2/gcd_22.asm: New file.
+ * mpn/x86_64/k8/gcd_22.asm: New file.
+ * mpn/x86_64/k10/gcd_22.asm: New file.
+ * mpn/x86_64/coreihwl/gcd_22.asm: New file.
+ * mpn/x86_64/bt1/gcd_22.asm: New file.
+ * mpn/x86_64/bd4/gcd_22.asm: New grabber.
+ * mpn/x86_64/zen/gcd_22.asm: New grabber.
+ * mpn/x86_64/atom/gcd_22.asm: New grabber.
+
+2019-08-19 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac: Check for ELFv1 ABI on PowerPC.
+
+2019-08-18 Torbjörn Granlund <tg at gmplib.org>
+
+ * longlong.h (arm32 sub_ddmmss): Define separately for thumb and
+ non-thumb as rsc instruction is missing for thumb.
+
+ * mpn/powerpc64/mode64/p7/gcd_22.asm: New file.
+ * mpn/powerpc64/mode64/p9/gcd_22.asm: New file.
+
+2019-08-17 Torbjörn Granlund <tg at gmplib.org>
+
+ * demos/expr/t-expr.c: #include gmp-impl.h as it includes tests.h.
+
+ * mpn/asm-defs.m4: Add gcd_22.
+
+ * tests/refmpn.c (refmpn_gcd_22): New function.
+ * tests/tests.h: Declare it.
+
+ * tests/t-constants.c: #include gmp-impl.h.
+ * tests/mpf/t-get_d.c: Likewise.
+
2019-08-17 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/gcd_22.c (mpn_gcd_22): New implementation with less
@@ -26,6 +61,10 @@
* tests/mpn/Makefile.am (check_PROGRAMS): Add t-gcd_22.
* tests/refmpz.c (refmpz_gcd): New function (plain binary gcd).
+2019-08-15 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/zen/gcd_11.asm: Use bd2 instead of bd4 code.
+
2019-08-13 Torbjörn Granlund <tg at gmplib.org>
* mpn/x86_64: Add more gcd_11 variants of of x86_64 gcd_11.asm and
@@ -70,6 +109,7 @@
* mpn/x86_64/bd2/gcd_11.asm: Likewise.
* mpn/x86_64/core2/gcd_11.asm: Likewise.
* mpn/x86_64/gcd_11.asm: Likewise.
+ * mpn/asm-defs.m4: Add gcd_11.
2019-08-06 Niels Möller <nisse at lysator.liu.se>
@@ -910,7 +950,7 @@
2017-02-24 Torbjörn Granlund <tg at gmplib.org>
* longlong.h (arm32/arm64 add_ssaaaa): Use "subs" for some immediates.
- * longlong.h (arm32/arm64 sub_ssaaaa): Use "adds" for some immediates.
+ * longlong.h (arm32/arm64 sub_ddmmss): Use "adds" for some immediates.
* mpn/arm64/copyi.asm: Avoid branching on flags.
* mpn/arm64/copyd.asm: Likewise.
diff -r fdc9d35fb5c8 -r 195b9608e5e4 mpn/x86_64/atom/gcd_22.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/atom/gcd_22.asm Wed Aug 21 23:03:40 2019 +0200
@@ -0,0 +1,37 @@
+dnl AMD64 mpn_gcd_22.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_22)
+include_mpn(`x86_64/k8/gcd_22.asm')
diff -r fdc9d35fb5c8 -r 195b9608e5e4 mpn/x86_64/bd4/gcd_22.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bd4/gcd_22.asm Wed Aug 21 23:03:40 2019 +0200
@@ -0,0 +1,37 @@
+dnl AMD64 mpn_gcd_22.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_22)
+include_mpn(`x86_64/coreihwl/gcd_22.asm')
diff -r fdc9d35fb5c8 -r 195b9608e5e4 mpn/x86_64/bt1/gcd_22.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bt1/gcd_22.asm Wed Aug 21 23:03:40 2019 +0200
@@ -0,0 +1,171 @@
+dnl AMD64 mpn_gcd_22. Assumes useless bsf, useless shrd, tzcnt, no shlx.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit
+C AMD K8,K9 ?
+C AMD K10 ?
+C AMD bd1 9.83
+C AMD bd2 7.81
+C AMD bd3 ?
+C AMD bd4 ?
+C AMD bt1 9.0
+C AMD bt2 9.2
+C AMD zn1 ?
+C AMD zn2 ?
+C Intel P4 ?
+C Intel CNR ?
+C Intel PNR ?
+C Intel NHM ?
+C Intel WSM ?
+C Intel SBR ?
+C Intel IBR ?
+C Intel HWL ?
+C Intel BWL ?
+C Intel SKL ?
+C Intel atom ?
+C Intel SLM ?
+C Intel GLM ?
+C Intel GLM+ ?
+C VIA nano ?
+
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+PROTECT(`ctz_table')
+
+DEF_OBJECT(ctz_table,64)
+ .byte MAXSHIFT
+forloop(i,1,MASK,
+` .byte m4_count_trailing_zeros(i)
+')
+END_OBJECT(ctz_table)
+
+define(`u1', `%rdi')
+define(`u0', `%rsi')
+define(`v1', `%rdx')
+define(`v0_param', `%rcx')
+
+define(`v0', `%rax')
+define(`cnt', `%rcx')
+
+define(`s0', `%r8')
+define(`s1', `%r9')
+define(`t0', `%r10')
+define(`t1', `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_22)
+ FUNC_ENTRY(2)
+ push %r12
+ mov v0_param, v0
+
+ LEA( ctz_table, %r12)
+
+ ALIGN(16)
+L(top): mov v0, t0
+ sub u0, t0
+ jz L(lowz) C jump when low limb result = 0
+ mov v1, t1
+ sbb u1, t1
+
+ mov u0, s0
+ mov u1, s1
+
+ sub v0, u0
+ sbb v1, u1
+
+L(bck): cmovc t0, u0 C u = |u - v|
+ cmovnc u1, t1 C u = |u - v|
+ cmovc s0, v0 C v = min(u,v)
+ cmovc s1, v1 C v = min(u,v)
+
+ and $MASK, R32(t0)
+ movzbl (%r12,t0), R32(%rcx)
+ jz L(count_better)
+C Rightshift (t1,,u0) into (u1,,u0)
+L(shr): shr R8(cnt), u0
+ mov t1, u1
+ shr R8(cnt), u1
+ neg cnt
+ shl R8(cnt), t1
+ or t1, u0
+
+ test u1, u1
+ jnz L(top)
+ test v1, v1
+ jnz L(top)
+
+L(gcd_11):
+ mov v0, %rdi
+ mov u0, %rsi
+ xor R32(%rdx), R32(%rdx)
+ pop %r12
+ jmp mpn_gcd_11
+
+L(count_better):
+ rep;bsf u0, cnt C tzcnt!
More information about the gmp-commit
mailing list