[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Aug 31 19:28:10 UTC 2014


details:   /var/hg/gmp/rev/08e648f847bc
changeset: 16480:08e648f847bc
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Aug 30 23:47:16 2014 +0200
description:
Provide arm64 mod_34lsub1.

details:   /var/hg/gmp/rev/dead77cd3b7c
changeset: 16481:dead77cd3b7c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Aug 31 21:28:07 2014 +0200
description:
Provide arm64 lshift/rshift.

diffstat:

 mpn/arm64/lshift.asm      |  122 +++++++++++++++++++++++++++++++++++++++++++++
 mpn/arm64/mod_34lsub1.asm |  123 ++++++++++++++++++++++++++++++++++++++++++++++
 mpn/arm64/rshift.asm      |  121 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 366 insertions(+), 0 deletions(-)

diffs (truncated from 378 to 300 lines):

diff -r 33ff2f5edbef -r dead77cd3b7c mpn/arm64/lshift.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/lshift.asm	Sun Aug 31 21:28:07 2014 +0200
@@ -0,0 +1,122 @@
+dnl  ARM64 mpn_lshift.
+
+dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 ?
+C Cortex-A57	 ?
+
+changecom(@&*$)
+
+define(`rp_arg', `x0')
+define(`up',     `x1')
+define(`n',      `x2')
+define(`cnt',    `x3')
+
+define(`rp',     `x16')
+
+define(`tnc',`x8')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	add	rp, rp_arg, n, lsl #3
+	add	up, up, n, lsl #3
+	sub	tnc, xzr, cnt
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x4, [up,#-8]
+	tbnz	n, #1, L(b11)
+
+L(b01):	lsr	x0, x4, tnc
+	lsl	x18, x4, cnt
+	sub	n, n, #1
+	cbnz	n, L(gt1)
+	str	x18, [rp,#-8]
+	ret
+L(gt1):	ldp	x4, x5, [up,#-24]
+	add	up, up, #-8
+	add	rp, rp, #16
+	b	L(lo2)
+
+L(b11):	lsr	x0, x4, tnc
+	lsl	x9, x4, cnt
+	ldp	x6, x7, [up,#-24]
+	add	n, n, #1
+	add	up, up, #8
+	add	rp, rp, #32
+	b	L(lo0)
+
+L(bx0):	ldp	x4, x5, [up,#-16]
+	tbz	n, #1, L(b00)
+
+L(b10):	lsr	x0, x5, tnc
+	lsl	x13, x5, cnt
+	lsr	x10, x4, tnc
+	lsl	x18, x4, cnt
+	sub	n, n, #2
+	cbnz	n, L(gt2)
+	orr	x10, x10, x13
+	stp	x18, x10, [rp,#-16]
+	ret
+L(gt2):	ldp	x4, x5, [up,#-32]
+	orr	x10, x10, x13
+	str	x10, [rp,#-8]
+	add	up, up, #-16
+	add	rp, rp, #8
+	b	L(lo2)
+
+L(b00):	lsr	x0, x5, tnc
+	lsl	x13, x5, cnt
+	lsr	x10, x4, tnc
+	lsl	x9, x4, cnt
+	ldp	x6, x7, [up,#-32]
+	orr	x10, x10, x13
+	str	x10, [rp,#-8]
+	add	rp, rp, #24
+	b	L(lo0)
+
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up,#-48]
+	add	rp, rp, #-32		C integrate with stp?
+	add	up, up, #-32		C integrate with ldp?
+	orr	x11, x11, x9
+	orr	x10, x10, x13
+	stp	x10, x11, [rp,#-16]
+L(lo2):	lsr	x11, x5, tnc
+	lsl	x13, x5, cnt
+	lsr	x10, x4, tnc
+	lsl	x9, x4, cnt
+	ldp	x6, x7, [up,#-32]
+	orr	x11, x11, x18
+	orr	x10, x10, x13
+	stp	x10, x11, [rp,#-32]
+L(lo0):	sub	n, n, #4
+	lsr	x11, x7, tnc
+	lsl	x13, x7, cnt
+	lsr	x10, x6, tnc
+	lsl	x18, x6, cnt
+	cbnz	n, L(top)
+
+L(end):	orr	x11, x11, x9
+	orr	x10, x10, x13
+	stp	x10, x11, [rp,#-48]
+	str	x18, [rp,#-56]
+	ret
+EPILOGUE()
diff -r 33ff2f5edbef -r dead77cd3b7c mpn/arm64/mod_34lsub1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/mod_34lsub1.asm	Sun Aug 31 21:28:07 2014 +0200
@@ -0,0 +1,123 @@
+dnl  ARM64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl  Copyright 2012-2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 ?
+C Cortex-A57	 ?
+
+define(`ap',	x0)
+define(`n',	x1)
+
+changecom(@&*$)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * An alternative inner loop which could run at 0.722 c/l:
+C	adds	x8, x8, x2
+C	adcs	x9, x9, x3
+C	ldp	x2, x3, [ap, #-32]
+C	adcs	x10, x10, x4
+C	adc	x12, x12, xzr
+C	adds	x8, x8, x5
+C	ldp	x4, x5, [ap, #-16]
+C	sub	n, n, #6
+C	adcs	x9, x9, x6
+C	adcs	x10, x10, x7
+C	ldp	x6, x7, [ap], #48
+C	adc	x12, x12, xzr
+C	tbz	n, #63, L(top)
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+	subs	n, n, #3
+	mov	x8, #0
+	b.lt	L(le2)			C n <= 2
+
+	ldp	x2, x3, [ap, #0]
+	ldr	x4, [ap, #16]
+	add	ap, ap, #24
+	subs	n, n, #3
+	b.lt	L(sum)			C n <= 5
+	cmn	x0, #0			C clear carry
+
+L(top):	ldp	x5, x6, [ap, #0]
+	ldr	x7, [ap, #16]
+	add	ap, ap, #24
+	sub	n, n, #3
+	adcs	x2, x2, x5
+	adcs	x3, x3, x6
+	adcs	x4, x4, x7
+	tbz	n, #63, L(top)
+
+	adc	x8, xzr, xzr		C x8 <= 1
+
+L(sum):	cmn	n, #2
+	mov	x5, #0
+	b.lo	1f
+	ldr	x5, [ap], #8
+1:	mov	x6, #0
+	b.ls	1f
+	ldr	x6, [ap], #8
+1:	adds	x2, x2, x5
+	adcs	x3, x3, x6
+	adcs	x4, x4, xzr
+	adc	x8, x8, xzr		C x8 <= 2
+
+L(sum2):
+	and	x0, x2, #0xffffffffffff
+	add	x0, x0, x2, lsr #48
+	add	x0, x0, x8
+
+	lsl	x8, x3, #16
+	and	x1, x8, #0xffffffffffff
+	add	x0, x0, x1
+	add	x0, x0, x3, lsr #32
+
+	lsl	x8, x4, #32
+	and	x1, x8, #0xffffffffffff
+	add	x0, x0, x1
+	add	x0, x0, x4, lsr #16
+	ret
+
+L(le2):	cmn	n, #1
+	bne	L(1)
+	ldp	x2, x3, [ap]
+	mov	x4, #0
+	b	L(sum2)
+L(1):	ldr	x2, [ap]
+	and	x0, x2, #0xffffffffffff
+	add	x0, x0, x2, lsr #48
+	ret
+EPILOGUE()
diff -r 33ff2f5edbef -r dead77cd3b7c mpn/arm64/rshift.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/rshift.asm	Sun Aug 31 21:28:07 2014 +0200
@@ -0,0 +1,121 @@
+dnl  ARM64 mpn_rshift.
+
+dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 ?
+C Cortex-A57	 ?
+
+changecom(@&*$)
+
+define(`rp_arg', `x0')
+define(`up',     `x1')
+define(`n',      `x2')
+define(`cnt',    `x3')
+
+define(`rp',     `x16')
+
+define(`tnc',`x8')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	mov	rp, rp_arg
+	sub	tnc, xzr, cnt
+	tbz	n, #0, L(bx0)
+
+L(bx1):	ldr	x4, [up,#0]


More information about the gmp-commit mailing list