[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Mar 3 16:31:30 CET 2013


details:   /var/hg/gmp/rev/2d16079c1f87
changeset: 15514:2d16079c1f87
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 03 16:30:39 2013 +0100
description:
Add arm/neon tabselect.  Currently not chosen for any config.

details:   /var/hg/gmp/rev/eacfcf96788a
changeset: 15515:eacfcf96788a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 03 16:31:03 2013 +0100
description:
Add an arm/neon README

details:   /var/hg/gmp/rev/dc0589921b80
changeset: 15516:dc0589921b80
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 03 16:31:25 2013 +0100
description:
ChangeLog

diffstat:

 ChangeLog                  |    2 +
 mpn/arm/neon/README        |    2 +
 mpn/arm/neon/tabselect.asm |  101 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 0 deletions(-)

diffs (122 lines):

diff -r 7ed9a2831799 -r dc0589921b80 ChangeLog
--- a/ChangeLog	Sun Mar 03 16:22:54 2013 +0100
+++ b/ChangeLog	Sun Mar 03 16:31:25 2013 +0100
@@ -1,5 +1,7 @@
 2013-03-03  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/arm/neon/tabselect.asm: New file.
+
 	* mpn/arm/copyi.asm: Software pipeline.
 	* mpn/arm/copyd.asm: Likewise.
 
diff -r 7ed9a2831799 -r dc0589921b80 mpn/arm/neon/README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/neon/README	Sun Mar 03 16:31:25 2013 +0100
@@ -0,0 +1,2 @@
+This directory contains Neon code which runs and is efficient on all
+ARM CPUs which support Neon.
diff -r 7ed9a2831799 -r dc0589921b80 mpn/arm/neon/tabselect.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/neon/tabselect.asm	Sun Mar 03 16:31:25 2013 +0100
@@ -0,0 +1,101 @@
+dnl  ARM mpn_tabselect
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 2.25
+C Cortex-A15	 0.95
+
+C This is a basic implementation using 64-bit Neon, with shallow software
+C pipelining and no unrolling.  It is probably close to optimal for A9, while
+C 128-bit Neon code runs significantly faster on A15.
+
+define(`rp',    `r0')
+define(`tabp',  `r1')
+define(`n',     `r2')
+define(`nents', `r3')
+C      which  on stack
+
+define(`mask',  `r6')
+define(`maskv', `d6')
+C 
+ASM_START()
+PROLOGUE(mpn_tabselect)
+	push	{r4-r7}
+	ldr	r7, [sp, #16]
+	sub	r7, r7, nents
+L(outer):
+	add	mask, r7, nents
+	subs	mask, mask, #1
+	sbc	mask, mask, mask
+	mov	r4, rp
+	mov	r5, rp
+	mov	r12, n
+
+	vdup.32	maskv, mask
+
+	tst	rp, #4			C Is rp 64-bit aligned?
+	beq	L(ali)			C Yes, skip!
+	vld1.32	{d4[0]}, [tabp]!	C Else perform one 32-bit...
+	vld1.32	{d0[0]}, [r4]!		C ...operation in...
+	vbit	d0, d4, maskv		C ...order to make...
+	subs	r12, r12, #1		C ...aligned operations...
+	vst1.32	{d0[0]}, [r5]!		C ...on rp[] in loop.
+
+L(ali):	subs	r12, r12, #4
+	blt	L(ed1)
+	subs	r12, r12, #4
+	vld1.32	{d4,d5}, [tabp]!
+	b	L(mid)
+
+L(top):	subs	r12, r12, #4
+	vld1.32	{d4,d5}, [tabp]!
+	vst1.32	{d0,d1}, [r5:64]!
+L(mid):	vld1.32	{d0,d1}, [r4:64]!
+	vbit	d0, d4, maskv
+	vbit	d1, d5, maskv
+	bge	L(top)
+
+L(end):	vst1.32	{d0,d1}, [r5:64]!
+
+L(ed1):	tst	r12, #2			C 2 or 3 more limbs to go?
+	beq	L(ed2)			C No, skip!
+	vld1.32	{d4}, [tabp]!		C Handle 2 limbs
+	vld1.32	{d0}, [r4:64]!
+	vbit	d0, d4, maskv
+	vst1.32	{d0}, [r5:64]!
+
+L(ed2):	tst	r12, #1			C One more limb to go?
+	beq	L(ed3)			C No, skip!
+	vld1.32	{d4[0]}, [tabp]!	C Handle last limb
+	vld1.32	{d0[0]}, [r4]
+	vbit	d0, d4, maskv
+	vst1.32	{d0[0]}, [r5]
+
+L(ed3):	subs	nents, nents, #1
+	bne	L(outer)
+	pop	{r4-r7}
+	bx	lr
+EPILOGUE()


More information about the gmp-commit mailing list