[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Jul 1 18:28:04 UTC 2018
details: /var/hg/gmp/rev/077fcc9f1153
changeset: 17644:077fcc9f1153
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Jul 01 20:27:35 2018 +0200
description:
Significantly improve 64-bit Pentium 4 performance with simple grabber files.
details: /var/hg/gmp/rev/a5f07b87662a
changeset: 17645:a5f07b87662a
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Jul 01 20:27:59 2018 +0200
description:
ChangeLog
diffstat:
ChangeLog | 19 ++++
mpn/x86_64/pentium4/addmul_2.asm | 37 ++++++++
mpn/x86_64/pentium4/aorsmul_1.asm | 37 ++++++++
mpn/x86_64/pentium4/lshift.asm | 137 +-----------------------------
mpn/x86_64/pentium4/lshiftc.asm | 150 +--------------------------------
mpn/x86_64/pentium4/mul_1.asm | 37 ++++++++
mpn/x86_64/pentium4/mul_2.asm | 37 ++++++++
mpn/x86_64/pentium4/mul_basecase.asm | 37 ++++++++
mpn/x86_64/pentium4/mullo_basecase.asm | 37 ++++++++
mpn/x86_64/pentium4/redc_1.asm | 37 ++++++++
mpn/x86_64/pentium4/sqr_basecase.asm | 37 ++++++++
11 files changed, 323 insertions(+), 279 deletions(-)
diffs (truncated from 678 to 300 lines):
diff -r cdc397f6fa03 -r a5f07b87662a ChangeLog
--- a/ChangeLog Wed Jun 13 23:04:39 2018 +0200
+++ b/ChangeLog Sun Jul 01 20:27:59 2018 +0200
@@ -1,3 +1,16 @@
+2018-07-01 Torbjörn Granlund <tg at gmplib.org>
+
+ * lshift.asm: Replace with grabber file.
+ * lshiftc.asm: Replace with grabber file.
+ * x86_64/pentium4/addmul_2.asm: New grabber file.
+ * x86_64/pentium4/aorsmul_1.asm: New grabber file.
+ * x86_64/pentium4/mul_1.asm: New grabber file.
+ * x86_64/pentium4/mul_2.asm: New grabber file.
+ * x86_64/pentium4/mul_basecase.asm: New grabber file.
+ * x86_64/pentium4/mullo_basecase.asm: New grabber file.
+ * x86_64/pentium4/redc_1.asm: New grabber file.
+ * x86_64/pentium4/sqr_basecase.asm: New grabber file.
+
2018-06-13 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/gcd_1.c (mpn_gcd_1): Delete unused code variant for
@@ -5,6 +18,12 @@
structure of the remaining code variant, without gotos to the
mid-loop strip_u_maybe label.
+2018-05-30 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac (x86): Provide goldmont specific path.
+
+ * mpn/x86_64/goldmont/gmp-mparam.h: New file.
+
2018-05-29 Torbjörn Granlund <tg at gmplib.org>
* configure.ac (x86): Pass more exact arch/tune options for nehalem.
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/addmul_2.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/pentium4/addmul_2.asm Sun Jul 01 20:27:59 2018 +0200
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_addmul_2 optimised for Intel Nocona.
+
+dnl Copyright 2018 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_2)
+include_mpn(`x86_64/bd1/addmul_2.asm')
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/aorsmul_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/pentium4/aorsmul_1.asm Sun Jul 01 20:27:59 2018 +0200
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_addmul_1 and mpn_submul_1 optimised for Intel Nocona.
+
+dnl Copyright 2018 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+include_mpn(`x86_64/bd1/aorsmul_1.asm')
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/lshift.asm
--- a/mpn/x86_64/pentium4/lshift.asm Wed Jun 13 23:04:39 2018 +0200
+++ b/mpn/x86_64/pentium4/lshift.asm Sun Jul 01 20:27:59 2018 +0200
@@ -1,6 +1,6 @@
-dnl x86-64 mpn_lshift optimized for Pentium 4.
+dnl X86-64 mpn_lshift optimised for Pentium 4.
-dnl Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+dnl Copyright 2018 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -30,137 +30,8 @@
include(`../config.m4')
-
-C cycles/limb
-C AMD K8,K9 2.5
-C AMD K10 ?
-C Intel P4 3.29
-C Intel core2 2.1 (fluctuates, presumably cache related)
-C Intel corei ?
-C Intel atom 14.3
-C VIA nano ?
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-define(`cnt',`%cl')
-
ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)
-ASM_START()
- TEXT
- ALIGN(32)
-PROLOGUE(mpn_lshift)
- FUNC_ENTRY(4)
- mov -8(up,n,8), %rax
- movd R32(%rcx), %mm4
- neg R32(%rcx) C put rsh count in cl
- and $63, R32(%rcx)
- movd R32(%rcx), %mm5
-
- lea 1(n), R32(%r8)
-
- shr R8(%rcx), %rax C function return value
-
- and $3, R32(%r8)
- je L(rol) C jump for n = 3, 7, 11, ...
-
- dec R32(%r8)
- jne L(1)
-C n = 4, 8, 12, ...
- movq -8(up,n,8), %mm2
- psllq %mm4, %mm2
- movq -16(up,n,8), %mm0
- psrlq %mm5, %mm0
- por %mm0, %mm2
- movq %mm2, -8(rp,n,8)
- dec n
- jmp L(rol)
-
-L(1): dec R32(%r8)
- je L(1x) C jump for n = 1, 5, 9, 13, ...
-C n = 2, 6, 10, 16, ...
- movq -8(up,n,8), %mm2
- psllq %mm4, %mm2
- movq -16(up,n,8), %mm0
- psrlq %mm5, %mm0
- por %mm0, %mm2
- movq %mm2, -8(rp,n,8)
- dec n
-L(1x):
- cmp $1, n
- je L(ast)
- movq -8(up,n,8), %mm2
- psllq %mm4, %mm2
- movq -16(up,n,8), %mm3
- psllq %mm4, %mm3
- movq -16(up,n,8), %mm0
- movq -24(up,n,8), %mm1
- psrlq %mm5, %mm0
- por %mm0, %mm2
- psrlq %mm5, %mm1
- por %mm1, %mm3
- movq %mm2, -8(rp,n,8)
- movq %mm3, -16(rp,n,8)
- sub $2, n
-
-L(rol): movq -8(up,n,8), %mm2
- psllq %mm4, %mm2
- movq -16(up,n,8), %mm3
- psllq %mm4, %mm3
-
- sub $4, n C 4
- jb L(end) C 2
- ALIGN(32)
-L(top):
- C finish stuff from lsh block
- movq 16(up,n,8), %mm0
- movq 8(up,n,8), %mm1
- psrlq %mm5, %mm0
- por %mm0, %mm2
- psrlq %mm5, %mm1
- movq (up,n,8), %mm0
- por %mm1, %mm3
- movq -8(up,n,8), %mm1
- movq %mm2, 24(rp,n,8)
- movq %mm3, 16(rp,n,8)
- C start two new rsh
- psrlq %mm5, %mm0
- psrlq %mm5, %mm1
-
- C finish stuff from rsh block
- movq 8(up,n,8), %mm2
- movq (up,n,8), %mm3
- psllq %mm4, %mm2
- por %mm2, %mm0
- psllq %mm4, %mm3
- movq -8(up,n,8), %mm2
- por %mm3, %mm1
- movq -16(up,n,8), %mm3
- movq %mm0, 8(rp,n,8)
- movq %mm1, (rp,n,8)
- C start two new lsh
- sub $4, n
- psllq %mm4, %mm2
- psllq %mm4, %mm3
-
- jae L(top) C 2
-L(end):
- movq 8(up), %mm0
- psrlq %mm5, %mm0
- por %mm0, %mm2
- movq (up), %mm1
- psrlq %mm5, %mm1
- por %mm1, %mm3
- movq %mm2, 16(rp)
- movq %mm3, 8(rp)
-
-L(ast): movq (up), %mm2
- psllq %mm4, %mm2
- movq %mm2, (rp)
- emms
- FUNC_EXIT()
- ret
-EPILOGUE()
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86_64/fastsse/lshift.asm')
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/lshiftc.asm
--- a/mpn/x86_64/pentium4/lshiftc.asm Wed Jun 13 23:04:39 2018 +0200
+++ b/mpn/x86_64/pentium4/lshiftc.asm Sun Jul 01 20:27:59 2018 +0200
@@ -1,7 +1,6 @@
-dnl x86-64 mpn_lshiftc optimized for Pentium 4.
+dnl X86-64 mpn_lshiftc optimised for Pentium 4.
-dnl Copyright 2003, 2005, 2007, 2008, 2010, 2012 Free Software Foundation,
-dnl Inc.
+dnl Copyright 2018 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -31,149 +30,8 @@
include(`../config.m4')
-
-C cycles/limb
-C AMD K8,K9 ?
-C AMD K10 ?
-C Intel P4 4.15
-C Intel core2 ?
-C Intel corei ?
-C Intel atom ?
-C VIA nano ?
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-define(`cnt',`%cl')
-
More information about the gmp-commit
mailing list