[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Feb 16 22:37:11 UTC 2017


details:   /var/hg/gmp/rev/ce0c017f5837
changeset: 17277:ce0c017f5837
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Feb 16 22:56:37 2017 +0100
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/48949787c2e2
changeset: 17278:48949787c2e2
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Feb 16 22:57:05 2017 +0100
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/197085d4297f
changeset: 17279:197085d4297f
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Feb 16 22:57:45 2017 +0100
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/6c3d56354689
changeset: 17280:6c3d56354689
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Feb 16 23:36:58 2017 +0100
description:
ChangeLog

diffstat:

 ChangeLog                                 |  24 ++++++++++++++++++++++++
 mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm |   2 +-
 mpn/generic/toom_interpolate_8pts.c       |   2 +-
 mpn/x86_64/atom/aorsmul_1.asm             |   6 +++---
 mpn/x86_64/bd1/aorsmul_1.asm              |   8 ++++----
 mpn/x86_64/bobcat/aorsmul_1.asm           |   4 ++--
 mpn/x86_64/core2/aorsmul_1.asm            |   4 ++--
 mpn/x86_64/coreinhm/aorsmul_1.asm         |   6 +++---
 mpn/x86_64/coreisbr/aorsmul_1.asm         |   6 +++---
 9 files changed, 43 insertions(+), 19 deletions(-)

diffs (182 lines):

diff -r e8dc1d2d9cff -r 6c3d56354689 ChangeLog
--- a/ChangeLog	Thu Feb 16 22:55:29 2017 +0100
+++ b/ChangeLog	Thu Feb 16 23:36:58 2017 +0100
@@ -1,3 +1,27 @@
+2017-02-16  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm64/aorsorrlshC_n.asm: New file.
+	* mpn/arm64/aorsorrlsh2_n.asm: New file.
+	* mpn/arm64/aorsorrlsh1_n.asm: New file.
+
+	* mpn/arm64/xgene1/aors_n.asm: Remove file since default code now
+	performs similarly.
+
+	* mpn/arm64/aors_n.asm: Rewrite to use 4x unrolling.
+
+2017-02-15  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/silvermont/hamdist.asm: New file, based on k10 code.
+
+	* mpn/x86_64/silvermont/popcount.asm: Grab coreisbr/popcount.asm.
+
+2017-02-14  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/silvermont/aors_n.asm: New file, grabbing coreisbr code.
+
+	* mpn/x86_64/atom/aors_n.asm: Replace coreisbr grabbing code with
+	code based on Marco's x64/atom/aors_n.asm.
+
 2017-02-12  Torbjörn Granlund  <tg at gmplib.org>
 
 	* mpn/powerpc64/aix.m4 (AIX): New define.
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
--- a/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm	Thu Feb 16 23:36:58 2017 +0100
@@ -59,7 +59,7 @@
   define(`CLRCY',	`cmp	r13, #0')
   define(`RETVAL',	`sbc	$2, $2, $2
 			cmn	$2, #1
-			adc	 r0, $1, #0')
+			adc	r0, $1, #0')
   define(`func',	mpn_sublsh`'LSH`'_n)')
 ifdef(`DO_rsb', `
   define(`ADCSBCS',	`sbcs	$1, $3, $2')
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/generic/toom_interpolate_8pts.c
--- a/mpn/generic/toom_interpolate_8pts.c	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/generic/toom_interpolate_8pts.c	Thu Feb 16 23:36:58 2017 +0100
@@ -187,7 +187,7 @@
     MPN_INCR_U (r7 + n, 2*n + 1, 1);
     cy = 0;
   }
-    
+
   cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
   MPN_DECR_U (r7 + 2*n, n + 1, cy);
 
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/atom/aorsmul_1.asm
--- a/mpn/x86_64/atom/aorsmul_1.asm	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/atom/aorsmul_1.asm	Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
 C AMD K10	 4.5
 C AMD bull	 4.73
 C AMD pile	 4.60	 4.80
-C AMD steam	
-C AMD excavator	
+C AMD steam
+C AMD excavator
 C AMD bobcat	 5.48
 C AMD jaguar	 5.61
 C Intel P4	16.6
@@ -49,7 +49,7 @@
 C Intel SKL	 2.76
 C Intel atom	19.4
 C Intel SLM	 8
-C VIA nano	
+C VIA nano
 
 C The loop of this code is the result of running a code generation and
 C optimisation tool suite written by David Harvey and Torbjorn Granlund.
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/bd1/aorsmul_1.asm
--- a/mpn/x86_64/bd1/aorsmul_1.asm	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/bd1/aorsmul_1.asm	Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
 C AMD K10        3.09
 C AMD bull       4.47    4.72
 C AMD pile       4.66
-C AMD steam     
-C AMD excavator 
+C AMD steam
+C AMD excavator
 C AMD bobcat     6.30
 C AMD jaguar     6.29
 C Intel P4      17.3    17.8
@@ -49,13 +49,13 @@
 C Intel SKL      2.53
 C Intel atom    20.3
 C Intel SLM      9
-C VIA nano      
+C VIA nano
 
 C The loop of this code is the result of running a code generation and
 C optimisation tool suite written by David Harvey and Torbjorn Granlund.
 
 C TODO
-C  * Try to make loop run closer to 4 c/l.
+C  * Try to make loop run closer to 4 c/l in Bulldozer and Piledriver.
 
 define(`rp',      `%rdi')   C rcx
 define(`up',      `%rsi')   C rdx
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/bobcat/aorsmul_1.asm
--- a/mpn/x86_64/bobcat/aorsmul_1.asm	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/bobcat/aorsmul_1.asm	Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
 C AMD K10        4.51
 C AMD bull       4.66
 C AMD pile       4.57
-C AMD steam     
-C AMD excavator 
+C AMD steam
+C AMD excavator
 C AMD bobcat     5.05
 C AMD jaguar     5.22
 C Intel P4      16.8    18.6
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/core2/aorsmul_1.asm
--- a/mpn/x86_64/core2/aorsmul_1.asm	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/core2/aorsmul_1.asm	Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
 C AMD K10        4.01
 C AMD bull       4.98
 C AMD pile       4.83
-C AMD steam     
-C AMD excavator 
+C AMD steam
+C AMD excavator
 C AMD bobcat     5.56
 C AMD jaguar     5.54
 C Intel P4      16.3    17.3
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/coreinhm/aorsmul_1.asm
--- a/mpn/x86_64/coreinhm/aorsmul_1.asm	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/coreinhm/aorsmul_1.asm	Thu Feb 16 23:36:58 2017 +0100
@@ -37,8 +37,8 @@
 C AMD K10        4.0
 C AMD bull       5.0
 C AMD pile       4.84    5.39
-C AMD steam     
-C AMD excavator 
+C AMD steam
+C AMD excavator
 C AMD bobcat     5.56
 C AMD jaguar     5.30
 C Intel P4      15.7    17.2
@@ -50,7 +50,7 @@
 C Intel SKL      2.76
 C Intel atom    21
 C Intel SLM     11
-C VIA nano      
+C VIA nano
 
 C The loop of this code is the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/coreisbr/aorsmul_1.asm
--- a/mpn/x86_64/coreisbr/aorsmul_1.asm	Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/coreisbr/aorsmul_1.asm	Thu Feb 16 23:36:58 2017 +0100
@@ -37,8 +37,8 @@
 C AMD K10        4.27    4.54
 C AMD bull       4.76
 C AMD pile       4.55
-C AMD steam     
-C AMD excavator 
+C AMD steam
+C AMD excavator
 C AMD bobcat     5.30
 C AMD jaguar     5.28
 C Intel P4      16.2    17.1
@@ -51,7 +51,7 @@
 C Intel SKL      2.76
 C Intel atom    21.5
 C Intel SLM      9.5
-C VIA nano      
+C VIA nano
 
 C The loop of this code is the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjörn Granlund.


More information about the gmp-commit mailing list