[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Mar 28 20:05:51 UTC 2016


details:   /var/hg/gmp/rev/a1040398b0ad
changeset: 17055:a1040398b0ad
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Mar 28 22:04:45 2016 +0200
description:
Remove workaround for slow addmul_2.

details:   /var/hg/gmp/rev/b07df7715600
changeset: 17056:b07df7715600
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Mar 28 22:05:11 2016 +0200
description:
Add a cycle number.

details:   /var/hg/gmp/rev/e949e2497c6a
changeset: 17057:e949e2497c6a
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Mon Mar 28 22:05:48 2016 +0200
description:
Add cycle numbers.

diffstat:

 mpn/x86_64/aorsmul_1.asm         |  15 ++++++++++++---
 mpn/x86_64/coreibwl/gmp-mparam.h |   1 -
 mpn/x86_64/coreisbr/mul_2.asm    |   2 +-
 mpn/x86_64/mul_1.asm             |  15 ++++++++++++---
 mpn/x86_64/mul_2.asm             |  26 +++++++++++++++++++-------
 mpn/x86_64/skylake/gmp-mparam.h  |   1 -
 6 files changed, 44 insertions(+), 16 deletions(-)

diffs (137 lines):

diff -r 7dc980e4dd5e -r e949e2497c6a mpn/x86_64/aorsmul_1.asm
--- a/mpn/x86_64/aorsmul_1.asm	Mon Mar 28 22:02:12 2016 +0200
+++ b/mpn/x86_64/aorsmul_1.asm	Mon Mar 28 22:05:48 2016 +0200
@@ -33,13 +33,22 @@
 C	     cycles/limb
 C AMD K8,K9	 2.5
 C AMD K10	 2.5
-C AMD bd1	 5.0
+C AMD bull	 4.6
+C AMD pile	 5.5
+C AMD steam	 ?
+C AMD excavator	 ?
 C AMD bobcat	 6.17
+C AMD jaguar	5.5\6.5
 C Intel P4	14.9
-C Intel core2	 5.09
+C Intel core2	 5.1
 C Intel NHM	 4.9
-C Intel SBR	 4.0
+C Intel SBR	 3.9
+C Intel IBR	 3.75
+C Intel HWL	 3.62
+C Intel BWL	 2.53
+C Intel SKL	 2.53
 C Intel atom	21.3
+C Intel SLM	 9.0
 C VIA nano	 5.0
 
 C The loop of this code is the result of running a code generation and
diff -r 7dc980e4dd5e -r e949e2497c6a mpn/x86_64/coreibwl/gmp-mparam.h
--- a/mpn/x86_64/coreibwl/gmp-mparam.h	Mon Mar 28 22:02:12 2016 +0200
+++ b/mpn/x86_64/coreibwl/gmp-mparam.h	Mon Mar 28 22:05:48 2016 +0200
@@ -33,7 +33,6 @@
 
 /* Disable use of slow functions.  FIXME: We should disable lib inclusion.  */
 #undef HAVE_NATIVE_mpn_mul_2
-#undef HAVE_NATIVE_mpn_addmul_2
 
 /* 2100 MHz Intel i3-5100 */
 /* FFT tuning limit = 200 M */
diff -r 7dc980e4dd5e -r e949e2497c6a mpn/x86_64/coreisbr/mul_2.asm
--- a/mpn/x86_64/coreisbr/mul_2.asm	Mon Mar 28 22:02:12 2016 +0200
+++ b/mpn/x86_64/coreisbr/mul_2.asm	Mon Mar 28 22:05:48 2016 +0200
@@ -43,7 +43,7 @@
 C Intel core
 C Intel NHM
 C Intel SBR	 2.57		 2.52 using 4-way code
-C Intel IBR	 2.35		 2.32 using 4-way code
+C Intel IBR	 2.29		
 C Intel HWL	 2.02		 1.86
 C Intel BWL
 C Intel atom
diff -r 7dc980e4dd5e -r e949e2497c6a mpn/x86_64/mul_1.asm
--- a/mpn/x86_64/mul_1.asm	Mon Mar 28 22:02:12 2016 +0200
+++ b/mpn/x86_64/mul_1.asm	Mon Mar 28 22:05:48 2016 +0200
@@ -33,13 +33,22 @@
 C	     cycles/limb
 C AMD K8,K9	 2.5
 C AMD K10	 2.5
-C AMD bd1	 5.0
+C AMD bull	 5.0
+C AMD pile	 4.8
+C AMD steam	 ?
+C AMD excavator	 ?
 C AMD bobcat	 5.5
+C AMD jaguar	 6.1
 C Intel P4	12.3
 C Intel core2	 4.0
-C Intel NHM	 3.75
-C Intel SBR	 2.95
+C Intel NHM	 4.0
+C Intel SBR	 2.9
+C Intel IBR	 2.72
+C Intel HWL	 2.45
+C Intel BWL	 2.39
+C Intel SKL	 2.45
 C Intel atom	19.8
+C Intel SLM	 9.0
 C VIA nano	 4.25
 
 C The loop of this code is the result of running a code generation and
diff -r 7dc980e4dd5e -r e949e2497c6a mpn/x86_64/mul_2.asm
--- a/mpn/x86_64/mul_2.asm	Mon Mar 28 22:02:12 2016 +0200
+++ b/mpn/x86_64/mul_2.asm	Mon Mar 28 22:05:48 2016 +0200
@@ -1,7 +1,7 @@
 dnl  AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and
 dnl  store the result in a third limb vector.
 
-dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011, 2012, 2016 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -31,14 +31,26 @@
 
 include(`../config.m4')
 
-C	     cycles/limb
-C AMD K8,K9	 2.275
+C	     cycles/limb     cycles/limb cfg	cycles/limb m1+am1
+C AMD K8,K9	 2.275		
 C AMD K10	 2.275
+C AMD bull	 5		 4.3
+C AMD pile	 4.62		 4.2		4.62			same
+C AMD steam	 ?
+C AMD excavator	 ?
+C AMD bobcat	 5.62		<-		5.0			bad
+C AMD jaguar	 5.97		<-		5.2-5.6			bad
 C Intel P4	13.5
-C Intel core2	 4.0
-C Intel corei	 3.8
-C Intel atom	 ?
-C VIA nano	 ?
+C Intel core2	 4		<-		4.12-4.25		good
+C Intel NHM	 3.88		<-		4.28			good
+C Intel SBR	 3.16	`	 2.57		2.87			bad
+C Intel IBR	 3		 2.29		2.63			bad
+C Intel HWL	 3		 1.86		1.93			bad
+C Intel BWL	 2.22		 2.1		1.58			bad
+C Intel SKL	 2.27		 2.1		1.57			bad
+C Intel atom	19.5		17.7
+C Intel SLM	 8		 8.5
+C VIA nano	 
 
 C This code is the result of running a code generation and optimization tool
 C suite written by David Harvey and Torbjorn Granlund.
diff -r 7dc980e4dd5e -r e949e2497c6a mpn/x86_64/skylake/gmp-mparam.h
--- a/mpn/x86_64/skylake/gmp-mparam.h	Mon Mar 28 22:02:12 2016 +0200
+++ b/mpn/x86_64/skylake/gmp-mparam.h	Mon Mar 28 22:05:48 2016 +0200
@@ -33,7 +33,6 @@
 
 /* Disable use of slow functions.  FIXME: We should disable lib inclusion.  */
 #undef HAVE_NATIVE_mpn_mul_2
-#undef HAVE_NATIVE_mpn_addmul_2
 
 /* 3500 MHz i5-6600K Skylake */
 /* FFT tuning limit = 300 M */


More information about the gmp-commit mailing list