[Gmp-commit] /var/hg/gmp: Provide arm64 com.

mercurial at gmplib.org mercurial at gmplib.org
Sat Aug 23 16:59:46 UTC 2014


details:   /var/hg/gmp/rev/bc48f84721e5
changeset: 16476:bc48f84721e5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Aug 23 18:58:00 2014 +0200
description:
Provide arm64 com.

diffstat:

 mpn/arm64/com.asm |  83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 83 insertions(+), 0 deletions(-)

diffs (87 lines):

diff -r 5ae59fd07b0d -r bc48f84721e5 mpn/arm64/com.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/com.asm	Sat Aug 23 18:58:00 2014 +0200
@@ -0,0 +1,83 @@
+dnl  ARM64 mpn_com.
+
+dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 ?
+C Cortex-A57	 ?
+
+changecom(@&*$)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n',  `x2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+	cmp	n, #3
+	b.le	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tbz	rp, #3, L(al2)
+	ld1	{v22.1d}, [up], #8
+	sub	n, n, #1
+	mvn	v22.8b, v22.8b
+	st1	{v22.1d}, [rp], #8
+
+L(al2):	ld1	{v26.2d}, [up], #16
+	subs	n, n, #6
+	b.lt	L(end)
+
+	ALIGN(16)
+L(top):	ld1	{v22.2d}, [up], #16
+	mvn	v26.16b, v26.16b
+	st1	{v26.2d}, [rp], #16
+	ld1	{v26.2d}, [up], #16
+	mvn	v22.16b, v22.16b
+	st1	{v22.2d}, [rp], #16
+	subs	n, n, #4
+	b.ge	L(top)
+
+L(end):	mvn	v26.16b, v26.16b
+	st1	{v26.2d}, [rp], #16
+
+C Copy last 0-3 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tbz	n, #1, L(tl1)
+	ld1	{v22.2d}, [up], #16
+	mvn	v22.16b, v22.16b
+	st1	{v22.2d}, [rp], #16
+L(tl1):	tbz	n, #0, L(tl2)
+	ld1	{v22.1d}, [up]
+	mvn	v22.8b, v22.8b
+	st1	{v22.1d}, [rp]
+L(tl2):	ret
+EPILOGUE()


More information about the gmp-commit mailing list