[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Feb 16 22:37:11 UTC 2017
details: /var/hg/gmp/rev/ce0c017f5837
changeset: 17277:ce0c017f5837
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Feb 16 22:56:37 2017 +0100
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/48949787c2e2
changeset: 17278:48949787c2e2
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Feb 16 22:57:05 2017 +0100
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/197085d4297f
changeset: 17279:197085d4297f
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Feb 16 22:57:45 2017 +0100
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/6c3d56354689
changeset: 17280:6c3d56354689
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Feb 16 23:36:58 2017 +0100
description:
ChangeLog
diffstat:
ChangeLog | 24 ++++++++++++++++++++++++
mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm | 2 +-
mpn/generic/toom_interpolate_8pts.c | 2 +-
mpn/x86_64/atom/aorsmul_1.asm | 6 +++---
mpn/x86_64/bd1/aorsmul_1.asm | 8 ++++----
mpn/x86_64/bobcat/aorsmul_1.asm | 4 ++--
mpn/x86_64/core2/aorsmul_1.asm | 4 ++--
mpn/x86_64/coreinhm/aorsmul_1.asm | 6 +++---
mpn/x86_64/coreisbr/aorsmul_1.asm | 6 +++---
9 files changed, 43 insertions(+), 19 deletions(-)
diffs (182 lines):
diff -r e8dc1d2d9cff -r 6c3d56354689 ChangeLog
--- a/ChangeLog Thu Feb 16 22:55:29 2017 +0100
+++ b/ChangeLog Thu Feb 16 23:36:58 2017 +0100
@@ -1,3 +1,27 @@
+2017-02-16 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm64/aorsorrlshC_n.asm: New file.
+ * mpn/arm64/aorsorrlsh2_n.asm: New file.
+ * mpn/arm64/aorsorrlsh1_n.asm: New file.
+
+ * mpn/arm64/xgene1/aors_n.asm: Remove file since default code now
+ performs similarly.
+
+ * mpn/arm64/aors_n.asm: Rewrite to use 4x unrolling.
+
+2017-02-15 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/silvermont/hamdist.asm: New file, based on k10 code.
+
+ * mpn/x86_64/silvermont/popcount.asm: Grab coreisbr/popcount.asm.
+
+2017-02-14 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/silvermont/aors_n.asm: New file, grabbing coreisbr code.
+
+ * mpn/x86_64/atom/aors_n.asm: Replace coreisbr grabbing code with
+ code based on Marco's x64/atom/aors_n.asm.
+
2017-02-12 Torbjörn Granlund <tg at gmplib.org>
* mpn/powerpc64/aix.m4 (AIX): New define.
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
--- a/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm Thu Feb 16 23:36:58 2017 +0100
@@ -59,7 +59,7 @@
define(`CLRCY', `cmp r13, #0')
define(`RETVAL', `sbc $2, $2, $2
cmn $2, #1
- adc r0, $1, #0')
+ adc r0, $1, #0')
define(`func', mpn_sublsh`'LSH`'_n)')
ifdef(`DO_rsb', `
define(`ADCSBCS', `sbcs $1, $3, $2')
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/generic/toom_interpolate_8pts.c
--- a/mpn/generic/toom_interpolate_8pts.c Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/generic/toom_interpolate_8pts.c Thu Feb 16 23:36:58 2017 +0100
@@ -187,7 +187,7 @@
MPN_INCR_U (r7 + n, 2*n + 1, 1);
cy = 0;
}
-
+
cy = mpn_sub_nc (pp + 2*n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
MPN_DECR_U (r7 + 2*n, n + 1, cy);
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/atom/aorsmul_1.asm
--- a/mpn/x86_64/atom/aorsmul_1.asm Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/atom/aorsmul_1.asm Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
C AMD K10 4.5
C AMD bull 4.73
C AMD pile 4.60 4.80
-C AMD steam
-C AMD excavator
+C AMD steam
+C AMD excavator
C AMD bobcat 5.48
C AMD jaguar 5.61
C Intel P4 16.6
@@ -49,7 +49,7 @@
C Intel SKL 2.76
C Intel atom 19.4
C Intel SLM 8
-C VIA nano
+C VIA nano
C The loop of this code is the result of running a code generation and
C optimisation tool suite written by David Harvey and Torbjorn Granlund.
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/bd1/aorsmul_1.asm
--- a/mpn/x86_64/bd1/aorsmul_1.asm Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/bd1/aorsmul_1.asm Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
C AMD K10 3.09
C AMD bull 4.47 4.72
C AMD pile 4.66
-C AMD steam
-C AMD excavator
+C AMD steam
+C AMD excavator
C AMD bobcat 6.30
C AMD jaguar 6.29
C Intel P4 17.3 17.8
@@ -49,13 +49,13 @@
C Intel SKL 2.53
C Intel atom 20.3
C Intel SLM 9
-C VIA nano
+C VIA nano
C The loop of this code is the result of running a code generation and
C optimisation tool suite written by David Harvey and Torbjorn Granlund.
C TODO
-C * Try to make loop run closer to 4 c/l.
+C * Try to make loop run closer to 4 c/l in Bulldozer and Piledriver.
define(`rp', `%rdi') C rcx
define(`up', `%rsi') C rdx
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/bobcat/aorsmul_1.asm
--- a/mpn/x86_64/bobcat/aorsmul_1.asm Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/bobcat/aorsmul_1.asm Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
C AMD K10 4.51
C AMD bull 4.66
C AMD pile 4.57
-C AMD steam
-C AMD excavator
+C AMD steam
+C AMD excavator
C AMD bobcat 5.05
C AMD jaguar 5.22
C Intel P4 16.8 18.6
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/core2/aorsmul_1.asm
--- a/mpn/x86_64/core2/aorsmul_1.asm Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/core2/aorsmul_1.asm Thu Feb 16 23:36:58 2017 +0100
@@ -35,8 +35,8 @@
C AMD K10 4.01
C AMD bull 4.98
C AMD pile 4.83
-C AMD steam
-C AMD excavator
+C AMD steam
+C AMD excavator
C AMD bobcat 5.56
C AMD jaguar 5.54
C Intel P4 16.3 17.3
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/coreinhm/aorsmul_1.asm
--- a/mpn/x86_64/coreinhm/aorsmul_1.asm Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/coreinhm/aorsmul_1.asm Thu Feb 16 23:36:58 2017 +0100
@@ -37,8 +37,8 @@
C AMD K10 4.0
C AMD bull 5.0
C AMD pile 4.84 5.39
-C AMD steam
-C AMD excavator
+C AMD steam
+C AMD excavator
C AMD bobcat 5.56
C AMD jaguar 5.30
C Intel P4 15.7 17.2
@@ -50,7 +50,7 @@
C Intel SKL 2.76
C Intel atom 21
C Intel SLM 11
-C VIA nano
+C VIA nano
C The loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
diff -r e8dc1d2d9cff -r 6c3d56354689 mpn/x86_64/coreisbr/aorsmul_1.asm
--- a/mpn/x86_64/coreisbr/aorsmul_1.asm Thu Feb 16 22:55:29 2017 +0100
+++ b/mpn/x86_64/coreisbr/aorsmul_1.asm Thu Feb 16 23:36:58 2017 +0100
@@ -37,8 +37,8 @@
C AMD K10 4.27 4.54
C AMD bull 4.76
C AMD pile 4.55
-C AMD steam
-C AMD excavator
+C AMD steam
+C AMD excavator
C AMD bobcat 5.30
C AMD jaguar 5.28
C Intel P4 16.2 17.1
@@ -51,7 +51,7 @@
C Intel SKL 2.76
C Intel atom 21.5
C Intel SLM 9.5
-C VIA nano
+C VIA nano
C The loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjörn Granlund.
More information about the gmp-commit
mailing list