[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Feb 14 23:19:59 UTC 2017


details:   /var/hg/gmp/rev/aebf964cdd9e
changeset: 17266:aebf964cdd9e
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Feb 14 21:02:04 2017 +0100
description:
New file, grabbing coreisbr code.

details:   /var/hg/gmp/rev/e3c1b555c2ea
changeset: 17267:e3c1b555c2ea
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Feb 14 21:03:45 2017 +0100
description:
Replace coreisbr grabbing code with code based on Marco's x64/atom/aors_n.asm.

details:   /var/hg/gmp/rev/ea589c2c758b
changeset: 17268:ea589c2c758b
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Feb 14 21:07:23 2017 +0100
description:
Update c/l tables.

details:   /var/hg/gmp/rev/5d87c53eeade
changeset: 17269:5d87c53eeade
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Feb 15 00:19:54 2017 +0100
description:
Update c/l tables.

diffstat:

 mpn/x86_64/aors_n.asm             |  20 +++++--
 mpn/x86_64/aorsmul_1.asm          |  38 +++++++-------
 mpn/x86_64/atom/aors_n.asm        |  99 +++++++++++++++++++++++++++++++++++++-
 mpn/x86_64/atom/aorsmul_1.asm     |  36 +++++++------
 mpn/x86_64/atom/mul_1.asm         |  34 +++++++-----
 mpn/x86_64/atom/mul_2.asm         |  32 +++++++-----
 mpn/x86_64/bd1/aorsmul_1.asm      |  29 +++++++---
 mpn/x86_64/bd1/mul_1.asm          |  29 +++++++---
 mpn/x86_64/bd1/mul_2.asm          |  31 ++++++-----
 mpn/x86_64/bobcat/aors_n.asm      |  30 +++++++----
 mpn/x86_64/bobcat/aorsmul_1.asm   |  29 +++++++---
 mpn/x86_64/bobcat/mul_1.asm       |  29 +++++++---
 mpn/x86_64/core2/aors_n.asm       |  16 +++++-
 mpn/x86_64/core2/aorsmul_1.asm    |  29 +++++++---
 mpn/x86_64/coreibwl/addmul_1.asm  |  37 +++++++-------
 mpn/x86_64/coreibwl/mul_1.asm     |  36 +++++++------
 mpn/x86_64/coreihwl/aorsmul_1.asm |  33 +++++++-----
 mpn/x86_64/coreihwl/mul_1.asm     |  36 +++++++------
 mpn/x86_64/coreihwl/mul_2.asm     |  38 +++++++-------
 mpn/x86_64/coreinhm/aorsmul_1.asm |  35 +++++++------
 mpn/x86_64/coreisbr/aors_n.asm    |  34 +++++++-----
 mpn/x86_64/coreisbr/aorsmul_1.asm |  37 +++++++------
 mpn/x86_64/coreisbr/mul_1.asm     |  14 ++++-
 mpn/x86_64/coreisbr/mul_2.asm     |  32 +++++++-----
 mpn/x86_64/mul_1.asm              |  38 +++++++-------
 mpn/x86_64/mul_2.asm              |  38 +++++++-------
 mpn/x86_64/silvermont/aors_n.asm  |  37 ++++++++++++++
 27 files changed, 586 insertions(+), 340 deletions(-)

diffs (truncated from 1210 to 300 lines):

diff -r 2ab28c0b9c4a -r 5d87c53eeade mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm	Sun Feb 12 21:56:48 2017 +0100
+++ b/mpn/x86_64/aors_n.asm	Wed Feb 15 00:19:54 2017 +0100
@@ -33,14 +33,22 @@
 C	     cycles/limb
 C AMD K8,K9	 1.5
 C AMD K10	 1.5
-C AMD bd1	 1.8
-C AMD bobcat	 2.5
-C Intel P4
+C AMD bull	 1.8
+C AMD pile	 1.74
+C AMD steam
+C AMD excavator
+C AMD bobcat	 2.54
+C AMD jaguar	 2.15
+C Intel P4	11.5
 C Intel core2	 4.9
-C Intel NHM	 5.5
-C Intel SBR	 1.61
-C Intel IBR	 1.61
+C Intel NHM	 5.53
+C Intel SBR	 1.59
+C Intel IBR	 1.55
+C Intel HWL	 1.44
+C Intel BWL	 1.14
+C Intel SKL	 1.21
 C Intel atom	 4
+C Intel SLM	 3
 C VIA nano	 3.25
 
 C The loop of this code is the result of running a code generation and
diff -r 2ab28c0b9c4a -r 5d87c53eeade mpn/x86_64/aorsmul_1.asm
--- a/mpn/x86_64/aorsmul_1.asm	Sun Feb 12 21:56:48 2017 +0100
+++ b/mpn/x86_64/aorsmul_1.asm	Wed Feb 15 00:19:54 2017 +0100
@@ -31,25 +31,25 @@
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.5
-C AMD K10	 2.5
-C AMD bull	 4.6
-C AMD pile	 5.5
-C AMD steam	 ?
-C AMD excavator	 ?
-C AMD bobcat	 6.17
-C AMD jaguar	5.5\6.5
-C Intel P4	14.9
-C Intel core2	 5.1
-C Intel NHM	 4.9
-C Intel SBR	 3.9
-C Intel IBR	 3.75
-C Intel HWL	 3.62
-C Intel BWL	 2.53
-C Intel SKL	 2.53
-C Intel atom	21.3
-C Intel SLM	 9.0
-C VIA nano	 5.0
+C AMD K8,K9      2.52
+C AMD K10        2.51
+C AMD bull       4.43
+C AMD pile       5.03    5.63
+C AMD steam
+C AMD excavator
+C AMD bobcat     6.20
+C AMD jaguar     5.57    6.56
+C Intel P4      14.9    17.1
+C Intel core2    5.15
+C Intel NHM      4.93
+C Intel SBR      3.95
+C Intel IBR      3.75
+C Intel HWL      3.62
+C Intel BWL      2.53
+C Intel SKL      2.53
+C Intel atom    21.3
+C Intel SLM      9.0
+C VIA nano       5.0
 
 C The loop of this code is the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
diff -r 2ab28c0b9c4a -r 5d87c53eeade mpn/x86_64/atom/aors_n.asm
--- a/mpn/x86_64/atom/aors_n.asm	Sun Feb 12 21:56:48 2017 +0100
+++ b/mpn/x86_64/atom/aors_n.asm	Wed Feb 15 00:19:54 2017 +0100
@@ -1,6 +1,9 @@
-dnl  X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
+dnl  X86-64 mpn_add_n, mpn_sub_n, optimised for Intel Atom.
 
-dnl  Copyright 2003-2005, 2007, 2008, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2011, 2017 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Marco Bodrato.  Ported to 64-bit by
+dnl  Torbjörn Granlund.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -30,8 +33,96 @@
 
 include(`../config.m4')
 
+C	    cycles/limb
+C AMD K8,K9	 2
+C AMD K10	 2
+C AMD bull	 2.34\2.63
+C AMD pile	 2.27\2.52
+C AMD steam
+C AMD excavator
+C AMD bobcat	 2.79
+C AMD jaguar	 2.78
+C Intel P4	11
+C Intel core2	 7.5
+C Intel NHM	 8.5
+C Intel SBR	 2.11
+C Intel IBR	 2.07
+C Intel HWL	 1.75
+C Intel BWL	 1.51
+C Intel SKL	 1.52
+C Intel atom	 3
+C Intel SLM	 4
+C VIA nano
+
+define(`rp',	`%rdi')	C rcx
+define(`up',	`%rsi')	C rdx
+define(`vp',	`%rdx')	C r8
+define(`n',	`%rcx')	C r9
+define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
+
+ifdef(`OPERATION_add_n', `
+  define(ADCSBB,    adc)
+  define(func_n,    mpn_add_n)
+  define(func_nc,   mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(ADCSBB,    sbb)
+  define(func_n,    mpn_sub_n)
+  define(func_nc,   mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
 ABI_SUPPORT(DOS64)
 ABI_SUPPORT(STD64)
 
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-include_mpn(`x86_64/coreisbr/aors_n.asm')
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func_n)
+	FUNC_ENTRY(4)
+	xor	cy, cy			C carry
+
+L(com):	shr	n			C n >> 1
+	jz	L(1)			C n == 1
+	jc	L(1m2)			C n % 2 == 1
+
+L(0m2):	shr	cy
+	mov	(up), %r10
+	lea	8(up), up
+	lea	8(vp), vp
+	lea	-8(rp), rp
+	jmp	L(mid)
+
+L(1):	shr	cy
+	mov	(up), %r9
+	jmp	L(end)
+
+L(1m2):	shr	cy
+	mov	(up), %r9
+
+	ALIGN(16)
+L(top):	ADCSBB	(vp), %r9
+	lea	16(up), up
+	mov	-8(up), %r10
+	lea	16(vp), vp
+	mov	%r9, (rp)
+L(mid):	ADCSBB	-8(vp), %r10
+	lea	16(rp), rp
+	dec	n
+	mov	(up), %r9
+	mov	%r10, -8(rp)
+	jnz	L(top)
+
+L(end):	ADCSBB	(vp), %r9
+	mov	$0, R32(%rax)
+	mov	%r9, (rp)
+	adc	R32(%rax), R32(%rax)
+	FUNC_EXIT()
+	ret
+EPILOGUE()
+
+PROLOGUE(func_nc)
+	FUNC_ENTRY(4)
+IFDOS(`	mov	56(%rsp), cy	')
+	jmp	L(com)
+EPILOGUE()
+ASM_END()
diff -r 2ab28c0b9c4a -r 5d87c53eeade mpn/x86_64/atom/aorsmul_1.asm
--- a/mpn/x86_64/atom/aorsmul_1.asm	Sun Feb 12 21:56:48 2017 +0100
+++ b/mpn/x86_64/atom/aorsmul_1.asm	Wed Feb 15 00:19:54 2017 +0100
@@ -30,22 +30,26 @@
 
 include(`../config.m4')
 
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bd2
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel PNR
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom	19.37		this
-C VIA nano
+C	     cycles/limb
+C AMD K8,K9	 4.5
+C AMD K10	 4.5
+C AMD bull	 4.73
+C AMD pile	 4.60	 4.80
+C AMD steam	
+C AMD excavator	
+C AMD bobcat	 5.48
+C AMD jaguar	 5.61
+C Intel P4	16.6
+C Intel core2	 5.09
+C Intel NHM	 4.79
+C Intel SBR	 3.88
+C Intel IBR	 3.65
+C Intel HWL	 3.53
+C Intel BWL	 2.75
+C Intel SKL	 2.76
+C Intel atom	19.4
+C Intel SLM	 8
+C VIA nano	
 
 C The loop of this code is the result of running a code generation and
 C optimisation tool suite written by David Harvey and Torbjorn Granlund.
diff -r 2ab28c0b9c4a -r 5d87c53eeade mpn/x86_64/atom/mul_1.asm
--- a/mpn/x86_64/atom/mul_1.asm	Sun Feb 12 21:56:48 2017 +0100
+++ b/mpn/x86_64/atom/mul_1.asm	Wed Feb 15 00:19:54 2017 +0100
@@ -30,21 +30,25 @@
 
 include(`../config.m4')
 
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bd2
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel PNR
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom	17.3		this
+C	     cycles/limb
+C AMD K8,K9      3.03
+C AMD K10        3.03
+C AMD bull       4.74
+C AMD pile       4.56
+C AMD steam
+C AMD excavator
+C AMD bobcat     5.56    6.04
+C AMD jaguar     5.55    5.84
+C Intel P4      13.05
+C Intel core2    4.03
+C Intel NHM      3.80
+C Intel SBR      2.75
+C Intel IBR      2.69
+C Intel HWL      2.50
+C Intel BWL      2.55
+C Intel SKL      2.57
+C Intel atom    17.3
+C Intel SLM     14.7
 C VIA nano
 
 C The loop of this code is the result of running a code generation and
diff -r 2ab28c0b9c4a -r 5d87c53eeade mpn/x86_64/atom/mul_2.asm
--- a/mpn/x86_64/atom/mul_2.asm	Sun Feb 12 21:56:48 2017 +0100
+++ b/mpn/x86_64/atom/mul_2.asm	Wed Feb 15 00:19:54 2017 +0100
@@ -31,20 +31,24 @@
 include(`../config.m4')
 
 C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bd2
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel PNR
-C Intel NHM


More information about the gmp-commit mailing list