[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Apr 19 16:13:33 CEST 2013
details: /var/hg/gmp/rev/1f18124cb728
changeset: 15742:1f18124cb728
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Apr 19 16:12:25 2013 +0200
description:
ARM A15 aors_n.
details: /var/hg/gmp/rev/b9e2ee245df9
changeset: 15743:b9e2ee245df9
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Apr 19 16:12:39 2013 +0200
description:
ChangeLog
details: /var/hg/gmp/rev/782e3bfbcab0
changeset: 15744:782e3bfbcab0
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Apr 19 16:13:19 2013 +0200
description:
Update cycle/arch tables.
diffstat:
ChangeLog | 2 +
mpn/arm/v7a/cora15/addmul_1.asm | 13 ++-
mpn/arm/v7a/cora15/aors_n.asm | 151 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 162 insertions(+), 4 deletions(-)
diffs (196 lines):
diff -r b81108588266 -r 782e3bfbcab0 ChangeLog
--- a/ChangeLog Fri Apr 19 00:35:37 2013 +0200
+++ b/ChangeLog Fri Apr 19 16:13:19 2013 +0200
@@ -1,5 +1,7 @@
2013-04-19 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/arm/v7a/cora15/aors_n.asm: New file.
+
* mpn/arm/v7a/cora15/addmul_1.asm: Rewrite.
2013-04-18 Torbjorn Granlund <tege at gmplib.org>
diff -r b81108588266 -r 782e3bfbcab0 mpn/arm/v7a/cora15/addmul_1.asm
--- a/mpn/arm/v7a/cora15/addmul_1.asm Fri Apr 19 00:35:37 2013 +0200
+++ b/mpn/arm/v7a/cora15/addmul_1.asm Fri Apr 19 16:13:19 2013 +0200
@@ -20,7 +20,7 @@
include(`../config.m4')
C cycles/limb best
-C StrongARM: ?
+C StrongARM: -
C XScale ?
C Cortex-A7 ?
C Cortex-A8 ?
@@ -37,9 +37,14 @@
C overhead).
C
C We don't use r12 due to ldrd and strd limitations.
-C
-C This is armv5 code, optimised for the armv7a cpu A15. Its location in the
-C GMP file structure might be misleading.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
define(`rp', `r0')
define(`up', `r1')
diff -r b81108588266 -r 782e3bfbcab0 mpn/arm/v7a/cora15/aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/aors_n.asm Fri Apr 19 16:13:19 2013 +0200
@@ -0,0 +1,151 @@
+dnl ARM mpn_add_n/mpn_sub_n optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb best
+C StrongARM: -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 3.55 2.5
+C Cortex-A15 1.27 this
+
+C This was a major improvement compared to the code we had before, but it might
+C not be the best 8-way code possible. We've tried some permutations of auto-
+C increments and separate pointer updates, but they all ran at the same speed
+C on A15.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`OPERATION_add_n', `
+ define(`ADDSUBC', adcs)
+ define(`IFADD', `$1')
+ define(`SETCY', `cmp $1, #1')
+ define(`RETVAL', `adc r0, n, #0')
+ define(`RETVAL2', `adc r0, n, #1')
+ define(`func', mpn_add_n)
+ define(`func_nc', mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(`ADDSUBC', sbcs)
+ define(`IFADD', `')
+ define(`SETCY', `rsbs $1, $1, #0')
+ define(`RETVAL', `sbc r0, r0, r0
+ and r0, r0, #1')
+ define(`RETVAL2', `RETVAL')
+ define(`func', mpn_sub_n)
+ define(`func_nc', mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+ ldr r12, [sp]
+ b L(ent)
+EPILOGUE()
+PROLOGUE(func)
+ mov r12, #0
+L(ent): push { r4-r9 }
+
+ ands r6, n, #3
+ mov n, n, lsr #2
+ beq L(b00)
+ cmp r6, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): ldr r5, [up], #4
+ ldr r7, [vp], #4
+ SETCY( r12)
+ ADDSUBC r9, r5, r7
+ ldrd r4, r5, [up, #0]
+ ldrd r6, r7, [vp, #0]
+ str r9, [rp], #-4
+ b L(lo)
+
+L(b00): ldrd r4, r5, [up], #-8
+ ldrd r6, r7, [vp], #-8
+ SETCY( r12)
+ sub rp, rp, #16
+ b L(mid)
+
+L(b01): ldr r5, [up], #-4
+ ldr r7, [vp], #-4
+ SETCY( r12)
+ ADDSUBC r9, r5, r7
+ str r9, [rp], #-12
+ tst n, n
+ beq L(wd1)
+L(gt1): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ b L(mid)
+
+L(b10): ldrd r4, r5, [up]
+ ldrd r6, r7, [vp]
+ SETCY( r12)
+ sub rp, rp, #8
+ b L(lo)
+
+ ALIGN(16)
+L(top): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ strd r8, r9, [rp, #8]
+L(mid): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ ldrd r4, r5, [up, #16]
+ ldrd r6, r7, [vp, #16]
+ strd r8, r9, [rp, #16]
+ ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ sub n, n, #2
+ tst n, n
+ bmi L(dne)
+ ldrd r4, r5, [up, #24]
+ ldrd r6, r7, [vp, #24]
+ strd r8, r9, [rp, #24]
+ ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ ldrd r4, r5, [up, #32]!
+ ldrd r6, r7, [vp, #32]!
+ strd r8, r9, [rp, #32]!
+L(lo): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ tst n, n
+ bne L(top)
+
+L(end): strd r8, r9, [rp, #8]
+L(wd1): RETVAL
+ pop { r4-r9 }
+ bx r14
+L(dne): strd r8, r9, [rp, #24]
+ RETVAL2
+ pop { r4-r9 }
+ bx r14
+EPILOGUE()
More information about the gmp-commit
mailing list