[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Mar 3 21:49:44 CET 2013


details:   /var/hg/gmp/rev/3a1989e4569d
changeset: 15522:3a1989e4569d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 03 21:49:21 2013 +0100
description:
Add arm A15 copyi and copyd.

details:   /var/hg/gmp/rev/14723c542cc4
changeset: 15523:14723c542cc4
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 03 21:49:40 2013 +0100
description:
ChangeLog

diffstat:

 ChangeLog                    |   3 +
 mpn/arm/v7a/cora15/copyd.asm |  99 ++++++++++++++++++++++++++++++++++++++++++++
 mpn/arm/v7a/cora15/copyi.asm |  78 ++++++++++++++++++++++++++++++++++
 3 files changed, 180 insertions(+), 0 deletions(-)

diffs (197 lines):

diff -r c803ab4677f0 -r 14723c542cc4 ChangeLog
--- a/ChangeLog	Sun Mar 03 19:24:26 2013 +0100
+++ b/ChangeLog	Sun Mar 03 21:49:40 2013 +0100
@@ -1,5 +1,8 @@
 2013-03-03  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/arm/v7a/cora15/copyd.asm: New file.
+	* mpn/arm/v7a/cora15/copyi.asm: New file.
+
 	* mpn/arm64/logops_n.asm: New file.
 	* mpn/arm64/gcd_1.asm: New file.
 	* mpn/arm64/aorsmul_1.asm: New file.
diff -r c803ab4677f0 -r 14723c542cc4 mpn/arm/v7a/cora15/copyd.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/copyd.asm	Sun Mar 03 21:49:40 2013 +0100
@@ -0,0 +1,99 @@
+dnl  ARM mpn_copyd.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.75		slower than core register code
+C Cortex-A15	 0.52
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+	mov	r12, n, lsl #2
+	add	rp, rp, r12
+	add	up, up, r12
+
+	cmp	n, #7
+	ble	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tst	rp, #4
+	beq	L(al1)
+	sub	up, up, #4
+	vld1.32	{d22[0]}, [up]
+	sub	n, n, #1
+	sub	rp, rp, #4
+	vst1.32	{d22[0]}, [rp]
+L(al1):	tst	rp, #8
+	beq	L(al2)
+	sub	up, up, #8
+	vld1.32	{d22}, [up]
+	sub	n, n, #2
+	sub	rp, rp, #8
+	vst1.32	{d22}, [rp:64]
+L(al2):	sub	up, up, #16
+	vld1.32	{d26-d27}, [up]
+	subs	n, n, #12
+	sub	rp, rp, #16			C offset rp for loop
+	blt	L(end)
+
+	sub	up, up, #16			C offset up for loop
+	mov	r12, #-16
+
+	ALIGN(16)
+L(top):	vld1.32	{d22-d23}, [up], r12
+	vst1.32	{d26-d27}, [rp:128], r12
+	vld1.32	{d26-d27}, [up], r12
+	vst1.32	{d22-d23}, [rp:128], r12
+	subs	n, n, #8
+	bge	L(top)
+
+	add	up, up, #16			C undo up offset
+		    				C rp offset undoing folded
+L(end):	vst1.32	{d26-d27}, [rp:128]
+
+C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tst	n, #4
+	beq	L(tl1)
+	sub	up, up, #16
+	vld1.32	{d22-d23}, [up]
+	sub	rp, rp, #16
+	vst1.32	{d22-d23}, [rp]
+L(tl1):	tst	n, #2
+	beq	L(tl2)
+	sub	up, up, #8
+	vld1.32	{d22}, [up]
+	sub	rp, rp, #8
+	vst1.32	{d22}, [rp]
+L(tl2):	tst	n, #1
+	beq	L(tl3)
+	sub	up, up, #4
+	vld1.32	{d22[0]}, [up]
+	sub	rp, rp, #4
+	vst1.32	{d22[0]}, [rp]
+L(tl3):	bx	lr
+EPILOGUE()
diff -r c803ab4677f0 -r 14723c542cc4 mpn/arm/v7a/cora15/copyi.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/copyi.asm	Sun Mar 03 21:49:40 2013 +0100
@@ -0,0 +1,78 @@
+dnl  ARM mpn_copyi.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.75		slower than core register code
+C Cortex-A15	 0.52
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+	cmp	n, #7
+	ble	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tst	rp, #4
+	beq	L(al1)
+	vld1.32	{d22[0]}, [up]!
+	sub	n, n, #1
+	vst1.32	{d22[0]}, [rp]!
+L(al1):	tst	rp, #8
+	beq	L(al2)
+	vld1.32	{d22}, [up]!
+	sub	n, n, #2
+	vst1.32	{d22}, [rp:64]!
+L(al2):	vld1.32	{d26-d27}, [up]!
+	subs	n, n, #12
+	blt	L(end)
+
+	ALIGN(16)
+L(top):	vld1.32	{d22-d23}, [up]!
+	vst1.32	{d26-d27}, [rp:128]!
+	vld1.32	{d26-d27}, [up]!
+	vst1.32	{d22-d23}, [rp:128]!
+	subs	n, n, #8
+	bge	L(top)
+
+L(end):	vst1.32	{d26-d27}, [rp:128]!
+
+C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tst	n, #4
+	beq	L(tl1)
+	vld1.32	{d22-d23}, [up]!
+	vst1.32	{d22-d23}, [rp]!
+L(tl1):	tst	n, #2
+	beq	L(tl2)
+	vld1.32	{d22}, [up]!
+	vst1.32	{d22}, [rp]!
+L(tl2):	tst	n, #1
+	beq	L(tl3)
+	vld1.32	{d22[0]}, [up]!
+	vst1.32	{d22[0]}, [rp]!
+L(tl3):	bx	lr
+EPILOGUE()


More information about the gmp-commit mailing list