[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Jul 1 18:28:04 UTC 2018


details:   /var/hg/gmp/rev/077fcc9f1153
changeset: 17644:077fcc9f1153
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Jul 01 20:27:35 2018 +0200
description:
Significantly improve 64-bit Pentium 4 performance with simple grabber files.

details:   /var/hg/gmp/rev/a5f07b87662a
changeset: 17645:a5f07b87662a
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Jul 01 20:27:59 2018 +0200
description:
ChangeLog

diffstat:

 ChangeLog                              |   19 ++++
 mpn/x86_64/pentium4/addmul_2.asm       |   37 ++++++++
 mpn/x86_64/pentium4/aorsmul_1.asm      |   37 ++++++++
 mpn/x86_64/pentium4/lshift.asm         |  137 +-----------------------------
 mpn/x86_64/pentium4/lshiftc.asm        |  150 +--------------------------------
 mpn/x86_64/pentium4/mul_1.asm          |   37 ++++++++
 mpn/x86_64/pentium4/mul_2.asm          |   37 ++++++++
 mpn/x86_64/pentium4/mul_basecase.asm   |   37 ++++++++
 mpn/x86_64/pentium4/mullo_basecase.asm |   37 ++++++++
 mpn/x86_64/pentium4/redc_1.asm         |   37 ++++++++
 mpn/x86_64/pentium4/sqr_basecase.asm   |   37 ++++++++
 11 files changed, 323 insertions(+), 279 deletions(-)

diffs (truncated from 678 to 300 lines):

diff -r cdc397f6fa03 -r a5f07b87662a ChangeLog
--- a/ChangeLog	Wed Jun 13 23:04:39 2018 +0200
+++ b/ChangeLog	Sun Jul 01 20:27:59 2018 +0200
@@ -1,3 +1,16 @@
+2018-07-01  Torbjörn Granlund  <tg at gmplib.org>
+
+	* lshift.asm: Replace with grabber file.
+	* lshiftc.asm: Replace with grabber file.
+	* x86_64/pentium4/addmul_2.asm: New grabber file.
+	* x86_64/pentium4/aorsmul_1.asm: New grabber file.
+	* x86_64/pentium4/mul_1.asm: New grabber file.
+	* x86_64/pentium4/mul_2.asm: New grabber file.
+	* x86_64/pentium4/mul_basecase.asm: New grabber file.
+	* x86_64/pentium4/mullo_basecase.asm: New grabber file.
+	* x86_64/pentium4/redc_1.asm: New grabber file.
+	* x86_64/pentium4/sqr_basecase.asm: New grabber file.
+
 2018-06-13  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/generic/gcd_1.c (mpn_gcd_1): Delete unused code variant for
@@ -5,6 +18,12 @@
 	structure of the remaining code variant, without gotos to the
 	mid-loop strip_u_maybe label.
 
+2018-05-30  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac (x86): Provide goldmont specific path.
+
+	* mpn/x86_64/goldmont/gmp-mparam.h: New file.
+
 2018-05-29  Torbjörn Granlund  <tg at gmplib.org>
 
 	* configure.ac (x86): Pass more exact arch/tune options for nehalem.
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/addmul_2.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/pentium4/addmul_2.asm	Sun Jul 01 20:27:59 2018 +0200
@@ -0,0 +1,37 @@
+dnl  X86-64 mpn_addmul_2 optimised for Intel Nocona.
+
+dnl  Copyright 2018 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_2)
+include_mpn(`x86_64/bd1/addmul_2.asm')
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/aorsmul_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/pentium4/aorsmul_1.asm	Sun Jul 01 20:27:59 2018 +0200
@@ -0,0 +1,37 @@
+dnl  X86-64 mpn_addmul_1 and mpn_submul_1 optimised for Intel Nocona.
+
+dnl  Copyright 2018 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+include_mpn(`x86_64/bd1/aorsmul_1.asm')
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/lshift.asm
--- a/mpn/x86_64/pentium4/lshift.asm	Wed Jun 13 23:04:39 2018 +0200
+++ b/mpn/x86_64/pentium4/lshift.asm	Sun Jul 01 20:27:59 2018 +0200
@@ -1,6 +1,6 @@
-dnl  x86-64 mpn_lshift optimized for Pentium 4.
+dnl  X86-64 mpn_lshift optimised for Pentium 4.
 
-dnl  Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2018 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -30,137 +30,8 @@
 
 include(`../config.m4')
 
-
-C	     cycles/limb
-C AMD K8,K9	 2.5
-C AMD K10	 ?
-C Intel P4	 3.29
-C Intel core2	 2.1 (fluctuates, presumably cache related)
-C Intel corei	 ?
-C Intel atom	14.3
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-define(`cnt',`%cl')
-
 ABI_SUPPORT(DOS64)
 ABI_SUPPORT(STD64)
 
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_lshift)
-	FUNC_ENTRY(4)
-	mov	-8(up,n,8), %rax
-	movd	R32(%rcx), %mm4
-	neg	R32(%rcx)		C put rsh count in cl
-	and	$63, R32(%rcx)
-	movd	R32(%rcx), %mm5
-
-	lea	1(n), R32(%r8)
-
-	shr	R8(%rcx), %rax		C function return value
-
-	and	$3, R32(%r8)
-	je	L(rol)			C jump for n = 3, 7, 11, ...
-
-	dec	R32(%r8)
-	jne	L(1)
-C	n = 4, 8, 12, ...
-	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm0
-	psrlq	%mm5, %mm0
-	por	%mm0, %mm2
-	movq	%mm2, -8(rp,n,8)
-	dec	n
-	jmp	L(rol)
-
-L(1):	dec	R32(%r8)
-	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
-C	n = 2, 6, 10, 16, ...
-	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm0
-	psrlq	%mm5, %mm0
-	por	%mm0, %mm2
-	movq	%mm2, -8(rp,n,8)
-	dec	n
-L(1x):
-	cmp	$1, n
-	je	L(ast)
-	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm3
-	psllq	%mm4, %mm3
-	movq	-16(up,n,8), %mm0
-	movq	-24(up,n,8), %mm1
-	psrlq	%mm5, %mm0
-	por	%mm0, %mm2
-	psrlq	%mm5, %mm1
-	por	%mm1, %mm3
-	movq	%mm2, -8(rp,n,8)
-	movq	%mm3, -16(rp,n,8)
-	sub	$2, n
-
-L(rol):	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm3
-	psllq	%mm4, %mm3
-
-	sub	$4, n			C				      4
-	jb	L(end)			C				      2
-	ALIGN(32)
-L(top):
-	C finish stuff from lsh block
-	movq	16(up,n,8), %mm0
-	movq	8(up,n,8), %mm1
-	psrlq	%mm5, %mm0
-	por	%mm0, %mm2
-	psrlq	%mm5, %mm1
-	movq	(up,n,8), %mm0
-	por	%mm1, %mm3
-	movq	-8(up,n,8), %mm1
-	movq	%mm2, 24(rp,n,8)
-	movq	%mm3, 16(rp,n,8)
-	C start two new rsh
-	psrlq	%mm5, %mm0
-	psrlq	%mm5, %mm1
-
-	C finish stuff from rsh block
-	movq	8(up,n,8), %mm2
-	movq	(up,n,8), %mm3
-	psllq	%mm4, %mm2
-	por	%mm2, %mm0
-	psllq	%mm4, %mm3
-	movq	-8(up,n,8), %mm2
-	por	%mm3, %mm1
-	movq	-16(up,n,8), %mm3
-	movq	%mm0, 8(rp,n,8)
-	movq	%mm1, (rp,n,8)
-	C start two new lsh
-	sub	$4, n
-	psllq	%mm4, %mm2
-	psllq	%mm4, %mm3
-
-	jae	L(top)			C				      2
-L(end):
-	movq	8(up), %mm0
-	psrlq	%mm5, %mm0
-	por	%mm0, %mm2
-	movq	(up), %mm1
-	psrlq	%mm5, %mm1
-	por	%mm1, %mm3
-	movq	%mm2, 16(rp)
-	movq	%mm3, 8(rp)
-
-L(ast):	movq	(up), %mm2
-	psllq	%mm4, %mm2
-	movq	%mm2, (rp)
-	emms
-	FUNC_EXIT()
-	ret
-EPILOGUE()
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86_64/fastsse/lshift.asm')
diff -r cdc397f6fa03 -r a5f07b87662a mpn/x86_64/pentium4/lshiftc.asm
--- a/mpn/x86_64/pentium4/lshiftc.asm	Wed Jun 13 23:04:39 2018 +0200
+++ b/mpn/x86_64/pentium4/lshiftc.asm	Sun Jul 01 20:27:59 2018 +0200
@@ -1,7 +1,6 @@
-dnl  x86-64 mpn_lshiftc optimized for Pentium 4.
+dnl  X86-64 mpn_lshiftc optimised for Pentium 4.
 
-dnl  Copyright 2003, 2005, 2007, 2008, 2010, 2012 Free Software Foundation,
-dnl  Inc.
+dnl  Copyright 2018 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -31,149 +30,8 @@
 
 include(`../config.m4')
 
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 4.15
-C Intel core2	 ?
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-define(`cnt',`%cl')
-


More information about the gmp-commit mailing list