[Gmp-commit] /var/hg/gmp: New A15 cnd-aors_n.
mercurial at gmplib.org
mercurial at gmplib.org
Fri Apr 26 21:18:42 CEST 2013
details: /var/hg/gmp/rev/1f8b8bd8b2d8
changeset: 15754:1f8b8bd8b2d8
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Apr 26 21:18:30 2013 +0200
description:
New A15 cnd-aors_n.
diffstat:
mpn/arm/v7a/cora15/cnd-aors_n.asm | 147 ++++++++++++++++++++++++++++++++++++++
1 files changed, 147 insertions(+), 0 deletions(-)
diffs (151 lines):
diff -r cc2e2f6b5ed1 -r 1f8b8bd8b2d8 mpn/arm/v7a/cora15/cnd-aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/cnd-aors_n.asm Fri Apr 26 21:18:30 2013 +0200
@@ -0,0 +1,147 @@
+dnl ARM mpn_cnd_add_n/mpn_cnd_sub_n optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb best
+C StrongARM: -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 3.75 3
+C Cortex-A15 1.78 this
+
+C This code does not run as well as one could have hoped, since 1.5 c/l seems
+C realistic for this insn mix.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
+
+define(`cnd',`r0')
+define(`rp', `r1')
+define(`up', `r2')
+define(`vp', `r3')
+define(`n', `r12')
+
+ifdef(`OPERATION_cnd_add_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`IFADD', `$1')
+ define(`INITCY', `cmn r0, #0')
+ define(`RETVAL', `adc r0, n, #0')
+ define(`RETVAL2', `adc r0, n, #1')
+ define(`func', mpn_cnd_add_n)
+ define(`func_nc', mpn_add_nc)')
+ifdef(`OPERATION_cnd_sub_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`IFADD', `')
+ define(`INITCY', `cmp r0, #0')
+ define(`RETVAL', `sbc r0, r0, r0
+ and r0, r0, #1')
+ define(`RETVAL2', `RETVAL')
+ define(`func', mpn_cnd_sub_n)
+ define(`func_nc', mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ ldr n, [sp]
+ push { r4-r9 }
+
+ cmp cnd, #1
+ sbc cnd, cnd, cnd C conditionally set to 0xffffffff
+
+ ands r6, n, #3
+ mov n, n, lsr #2
+ beq L(b00)
+ cmp r6, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): ldr r5, [up], #4
+ ldr r7, [vp], #4
+ bic r7, r7, cnd
+ ADDSUB r9, r5, r7
+ ldrd r4, r5, [up, #0]
+ ldrd r6, r7, [vp, #0]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ str r9, [rp], #-4
+ b L(lo)
+
+L(b00): ldrd r4, r5, [up], #-8
+ ldrd r6, r7, [vp], #-8
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ INITCY
+ sub rp, rp, #16
+ b L(mid)
+
+L(b01): ldr r5, [up], #-4
+ ldr r7, [vp], #-4
+ bic r7, r7, cnd
+ ADDSUB r9, r5, r7
+ str r9, [rp], #-12
+ tst n, n
+ beq L(wd1)
+L(gt1): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ b L(mid)
+
+L(b10): ldrd r4, r5, [up]
+ ldrd r6, r7, [vp]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ INITCY
+ sub rp, rp, #8
+ b L(lo)
+
+ ALIGN(16)
+L(top): ldrd r6, r7, [vp, #8]
+ ldrd r4, r5, [up, #8]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ strd r8, r9, [rp, #8]
+L(mid): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ ldrd r6, r7, [vp, #16]!
+ ldrd r4, r5, [up, #16]!
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ sub n, n, #1
+ strd r8, r9, [rp, #16]!
+L(lo): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ tst n, n
+ bne L(top)
+
+L(end): strd r8, r9, [rp, #8]
+L(wd1): RETVAL
+ pop { r4-r9 }
+ bx r14
+EPILOGUE()
More information about the gmp-commit
mailing list