[Gmp-commit] /var/hg/gmp: ARM Neon logops_n.
mercurial at gmplib.org
mercurial at gmplib.org
Fri Apr 26 18:57:59 CEST 2013
details: /var/hg/gmp/rev/cc2e2f6b5ed1
changeset: 15753:cc2e2f6b5ed1
user: Richard Henderson
date: Fri Apr 26 18:57:54 2013 +0200
description:
ARM Neon logops_n.
diffstat:
ChangeLog | 4 +
mpn/arm/neon/logops_n.asm | 139 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 143 insertions(+), 0 deletions(-)
diffs (154 lines):
diff -r fe5fa317ad04 -r cc2e2f6b5ed1 ChangeLog
--- a/ChangeLog Fri Apr 26 00:13:51 2013 +0200
+++ b/ChangeLog Fri Apr 26 18:57:54 2013 +0200
@@ -1,3 +1,7 @@
+2013-04-26 Richard Henderson <rth at twiddle.net>
+
+ * mpn/arm/neon/logops_n.asm: New file.
+
2013-04-25 Torbjorn Granlund <tege at gmplib.org>
* mpn/arm/mod_34lsub1.asm: Clear carry smarter.
diff -r fe5fa317ad04 -r cc2e2f6b5ed1 mpn/arm/neon/logops_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/neon/logops_n.asm Fri Apr 26 18:57:54 2013 +0200
@@ -0,0 +1,139 @@
+dnl ARM mpn_and_n, et al.
+
+dnl Contributed to the GNU project by Richard Henderson.
+
+dnl Copyright 2013 Richard Henderson.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb
+C and andn ior iorn xor nand nior xnor
+C StrongARM ? ?
+C XScale ? ?
+C Cortex-A8 ? ?
+C Cortex-A9 2.1 2.6
+C Cortex-A15 0.78 0.94
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+ define(`func', `mpn_and_n')
+ define(`LOGOP', `vand $1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+ define(`func', `mpn_andn_n')
+ define(`LOGOP', `vbic $1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+ define(`func', `mpn_nand_n')
+ define(`POSTOP', `vmvn $1, $1')
+ define(`LOGOP', `vand $1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+ define(`func', `mpn_ior_n')
+ define(`LOGOP', `vorr $1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+ define(`func', `mpn_iorn_n')
+ define(`LOGOP', `vorn $1, $2, $3')')
+ifdef(`OPERATION_nior_n',`
+ define(`func', `mpn_nior_n')
+ define(`POSTOP', `vmvn $1, $1')
+ define(`LOGOP', `vorr $1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+ define(`func', `mpn_xor_n')
+ define(`LOGOP', `veor $1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+ define(`func', `mpn_xnor_n')
+ define(`POSTOP', `vmvn $1, $1')
+ define(`LOGOP', `veor $1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+ .fpu neon
+PROLOGUE(func)
+ cmp n, #7
+ ble L(bc)
+
+C Perform a few initial operation until rp is 128-bit aligned
+ tst rp, #4
+ beq L(al1)
+ vld1.32 {d0[0]}, [up]!
+ vld1.32 {d1[0]}, [vp]!
+ sub n, n, #1
+ LOGOP( d0, d0, d1)
+ POSTOP( d0, d0)
+ vst1.32 {d0[0]}, [rp]!
+L(al1): tst rp, #8
+ beq L(al2)
+ vld1.32 {d0}, [up]!
+ vld1.32 {d1}, [vp]!
+ sub n, n, #2
+ LOGOP( d0, d0, d1)
+ POSTOP( d0, d0)
+ vst1.32 {d0}, [rp:64]!
+L(al2): vld1.32 {q2}, [up]!
+ vld1.32 {q3}, [vp]!
+ subs n, n, #12
+ blt L(end)
+
+ ALIGN(16)
+L(top): vld1.32 {q0}, [up]!
+ LOGOP( q2, q2, q3)
+ vld1.32 {q1}, [vp]!
+ POSTOP( q2, q2)
+ subs n, n, #8
+ vst1.32 {q2}, [rp:128]!
+ vld1.32 {q2}, [up]!
+ LOGOP( q0, q0, q1)
+ vld1.32 {q3}, [vp]!
+ POSTOP( q0, q0)
+ vst1.32 {q0}, [rp:128]!
+ bge L(top)
+
+L(end): LOGOP( q2, q2, q3)
+ POSTOP( q2, q2)
+ vst1.32 {q2}, [rp:128]!
+
+C Handle last 0-7 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tst n, #4
+ beq L(tl1)
+ vld1.32 {q0}, [up]!
+ vld1.32 {q1}, [vp]!
+ LOGOP( q0, q0, q1)
+ POSTOP( q0, q0)
+ vst1.32 {q0}, [rp]!
+L(tl1): tst n, #2
+ beq L(tl2)
+ vld1.32 {d0}, [up]!
+ vld1.32 {d1}, [vp]!
+ LOGOP( d0, d0, d1)
+ POSTOP( d0, d0)
+ vst1.32 {d0}, [rp]!
+L(tl2): tst n, #1
+ beq L(tl3)
+ vld1.32 {d0[0]}, [up]!
+ vld1.32 {d1[0]}, [vp]!
+ LOGOP( d0, d0, d1)
+ POSTOP( d0, d0)
+ vst1.32 {d0[0]}, [rp]!
+L(tl3): bx lr
+EPILOGUE()
More information about the gmp-commit
mailing list