[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Feb 23 03:25:20 UTC 2017
details: /var/hg/gmp/rev/b742ff45f520
changeset: 17291:b742ff45f520
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Feb 22 23:44:59 2017 +0100
description:
Provide common aorsmul_1 which runs well on A53, A57, and X-Gene.
details: /var/hg/gmp/rev/1084c3789acf
changeset: 17292:1084c3789acf
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Feb 23 04:24:24 2017 +0100
description:
Provide silvermont gmp-mparam.h.
details: /var/hg/gmp/rev/dd1026a8cdfe
changeset: 17293:dd1026a8cdfe
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Feb 23 04:24:46 2017 +0100
description:
ChangeLog
diffstat:
ChangeLog | 33 +++++++
mpn/arm64/aorsmul_1.asm | 64 ++++++++-----
mpn/arm64/xgene1/aorsmul_1.asm | 125 ---------------------------
mpn/x86_64/silvermont/gmp-mparam.h | 170 +++++++++++++++++++++++++++++++++++++
4 files changed, 244 insertions(+), 148 deletions(-)
diffs (truncated from 462 to 300 lines):
diff -r 2567c1119e14 -r dd1026a8cdfe ChangeLog
--- a/ChangeLog Wed Feb 22 02:27:55 2017 +0100
+++ b/ChangeLog Thu Feb 23 04:24:46 2017 +0100
@@ -1,3 +1,36 @@
+2017-02-23 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/silvermont/gmp-mparam.h: New file.
+
+2017-02-22 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm64/aorsmul_1.asm: Rewrite.
+
+ * mpn/arm64/lshiftc.asm: New file.
+
+2017-02-21 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm64/lshift.asm: Rewrite.
+ * mpn/arm64/rshift.asm: Rewrite.
+
+ * mpn/arm64/rsh1aors_n.asm: New file.
+
+2017-02-19 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm64/mul_1.asm: Rewrite.
+ * mpn/arm64/xgene1/mul_1.asm: Remove.
+
+2017-02-17 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm64/cora53/cnd_aors_n.asm: Moved from "..".
+
+ * mpn/arm64/xgene1/cnd_aors_n.asm: Remove file since default code
+ performs better.
+
+ * mpn/arm64/cnd_aors_n.asm: Rewrite.
+
+ * mpn/arm64/logops_n.asm: Rewrite based on new aors_n.asm.
+
2017-02-16 Pedro Gimeno <pggimeno at wanadoo.es>
* rand/randmt.c (__gmp_randiset_mt): Set generator functions from
diff -r 2567c1119e14 -r dd1026a8cdfe mpn/arm64/aorsmul_1.asm
--- a/mpn/arm64/aorsmul_1.asm Wed Feb 22 02:27:55 2017 +0100
+++ b/mpn/arm64/aorsmul_1.asm Thu Feb 23 04:24:46 2017 +0100
@@ -1,8 +1,8 @@
-dnl ARM64 mpn_submul_1
+dnl ARM64 mpn_addmul_1 and mpn_submul_1
dnl Contributed to the GNU project by Torbjörn Granlund.
-dnl Copyright 2013 Free Software Foundation, Inc.
+dnl Copyright 2013, 2015, 2017 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -33,8 +33,16 @@
include(`../config.m4')
C cycles/limb
-C Cortex-A53 ?
-C Cortex-A57 ?
+C Cortex-A53 9.3-9.8
+C Cortex-A57 7.0
+C X-Gene 5.0
+
+C NOTES
+C * It is possible to keep the carry chain alive between the addition blocks
+C and thus avoid csinc, but only for addmul_1. Since that saves no time
+C on the tested pipelines, we keep addmul_1 and submul_1 similar.
+C * We could separate feed-in into 4 blocks, one for each residue (mod 4).
+C That is likely to save a few cycles.
changecom(blah)
@@ -57,15 +65,14 @@
MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
PROLOGUE(func)
- mov x15, #0
+ adds x15, xzr, xzr
tbz n, #0, L(1)
ldr x4, [up],#8
mul x8, x4, v0
umulh x12, x4, v0
- adds x8, x8, x15
- ldr x4, [rp,#0]
+ ldr x4, [rp]
ADDSUB x8, x4, x8
csinc x15, x12, x12, COND
str x8, [rp],#8
@@ -79,19 +86,32 @@
umulh x13, x5, v0
adds x8, x8, x15
adcs x9, x9, x12
- ldp x4, x5, [rp,#0]
+ ldp x4, x5, [rp]
adc x15, x13, xzr
- sub n, n, #1
ADDSUB x8, x4, x8
ADDSUBC x9, x5, x9
csinc x15, x15, x15, COND
stp x8, x9, [rp],#16
L(2): lsr n, n, #2
- cbz n, L(end)
+ cbz n, L(le3)
+ ldp x4, x5, [up],#32
+ ldp x6, x7, [up,#-16]
+ b L(mid)
+L(le3): mov x0, x15
+ ret
-L(top): ldp x4, x5, [up],#16
- ldp x6, x7, [up],#16
+ ALIGN(16)
+L(top): ldp x4, x5, [up],#32
+ ldp x6, x7, [up,#-16]
+ ADDSUB x8, x16, x8
+ ADDSUBC x9, x17, x9
+ stp x8, x9, [rp],#32
+ ADDSUBC x10, x12, x10
+ ADDSUBC x11, x13, x11
+ stp x10, x11, [rp,#-16]
+ csinc x15, x15, x15, COND
+L(mid): sub n, n, #1
mul x8, x4, v0
umulh x12, x4, v0
mul x9, x5, v0
@@ -103,20 +123,18 @@
mul x11, x7, v0
umulh x15, x7, v0
adcs x10, x10, x13
- ldp x4, x5, [rp,#0]
+ ldp x16, x17, [rp]
adcs x11, x11, x14
- ldp x6, x7, [rp,#16]
+ ldp x12, x13, [rp,#16]
adc x15, x15, xzr
- sub n, n, #1
- ADDSUB x8, x4, x8
- ADDSUBC x9, x5, x9
- ADDSUBC x10, x6, x10
- ADDSUBC x11, x7, x11
- stp x8, x9, [rp],#16
- csinc x15, x15, x15, COND
- stp x10, x11, [rp],#16
cbnz n, L(top)
-L(end): mov x0, x15
+ ADDSUB x8, x16, x8
+ ADDSUBC x9, x17, x9
+ ADDSUBC x10, x12, x10
+ ADDSUBC x11, x13, x11
+ stp x8, x9, [rp]
+ stp x10, x11, [rp,#16]
+ csinc x0, x15, x15, COND
ret
EPILOGUE()
diff -r 2567c1119e14 -r dd1026a8cdfe mpn/arm64/xgene1/aorsmul_1.asm
--- a/mpn/arm64/xgene1/aorsmul_1.asm Wed Feb 22 02:27:55 2017 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,125 +0,0 @@
-dnl ARM64 mpn_addmul_1 and mpn_submul_1
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013, 2015 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C Cortex-A53 ?
-C Cortex-A57 ?
-C X-Gene 5.75
-
-changecom(blah)
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`n', `x2')
-define(`v0', `x3')
-
-ifdef(`OPERATION_addmul_1', `
- define(`ADDSUB', adds)
- define(`ADDSUBC', adcs)
- define(`COND', `cc')
- define(`func', mpn_addmul_1)')
-ifdef(`OPERATION_submul_1', `
- define(`ADDSUB', subs)
- define(`ADDSUBC', sbcs)
- define(`COND', `cs')
- define(`func', mpn_submul_1)')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-PROLOGUE(func)
- mov x15, #0
-
- tbz n, #0, L(1)
-
- ldr x4, [up],#8
- mul x8, x4, v0
- umulh x12, x4, v0
- adds x8, x8, x15
- ldr x4, [rp,#0]
- ADDSUB x8, x4, x8
- csinc x15, x12, x12, COND
- str x8, [rp],#8
-
-L(1): tbz n, #1, L(2)
-
- ldp x4, x5, [up],#16
- mul x8, x4, v0
- umulh x12, x4, v0
- mul x9, x5, v0
- umulh x13, x5, v0
- adds x8, x8, x15
- adcs x9, x9, x12
- ldp x4, x5, [rp,#0]
- adc x15, x13, xzr
- sub n, n, #1
- ADDSUB x8, x4, x8
- ADDSUBC x9, x5, x9
- csinc x15, x15, x15, COND
- stp x8, x9, [rp],#16
-
-L(2): lsr n, n, #2
- cbz n, L(end)
-
-L(top): ldp x4, x5, [up]
- ldp x6, x7, [up,#16]
- add up, up, #32
- mul x8, x4, v0
- umulh x12, x4, v0
- mul x9, x5, v0
- umulh x13, x5, v0
- adds x8, x8, x15
- mul x10, x6, v0
- umulh x14, x6, v0
- adcs x9, x9, x12
- mul x11, x7, v0
- umulh x15, x7, v0
- adcs x10, x10, x13
- ldp x4, x5, [rp,#0]
- adcs x11, x11, x14
- ldp x6, x7, [rp,#16]
- adc x15, x15, xzr
- ADDSUB x8, x4, x8
- ADDSUBC x9, x5, x9
- ADDSUBC x10, x6, x10
- ADDSUBC x11, x7, x11
- stp x8, x9, [rp]
- csinc x15, x15, x15, COND
- stp x10, x11, [rp,#16]
- add rp, rp, #32
- sub n, n, #1
- cbnz n, L(top)
-
-L(end): mov x0, x15
- ret
-EPILOGUE()
diff -r 2567c1119e14 -r dd1026a8cdfe mpn/x86_64/silvermont/gmp-mparam.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/silvermont/gmp-mparam.h Thu Feb 23 04:24:46 2017 +0100
@@ -0,0 +1,170 @@
+/* AMD Silvermont gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
More information about the gmp-commit
mailing list