[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Apr 24 01:13:54 CEST 2013


details:   /var/hg/gmp/rev/c34a35870cda
changeset: 15745:c34a35870cda
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Apr 24 01:06:28 2013 +0200
description:
ARM A15 logops_n.

details:   /var/hg/gmp/rev/46bfe0a1bb40
changeset: 15746:46bfe0a1bb40
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Apr 24 01:13:44 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                       |    4 +
 mpn/arm/v7a/cora15/logops_n.asm |  240 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 244 insertions(+), 0 deletions(-)

diffs (255 lines):

diff -r 782e3bfbcab0 -r 46bfe0a1bb40 ChangeLog
--- a/ChangeLog	Fri Apr 19 16:13:19 2013 +0200
+++ b/ChangeLog	Wed Apr 24 01:13:44 2013 +0200
@@ -1,3 +1,7 @@
+2013-04-24  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/arm/v7a/cora15/logops_n.asm: New file.
+
 2013-04-19  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/arm/v7a/cora15/aors_n.asm: New file.
diff -r 782e3bfbcab0 -r 46bfe0a1bb40 mpn/arm/v7a/cora15/logops_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/logops_n.asm	Wed Apr 24 01:13:44 2013 +0200
@@ -0,0 +1,240 @@
+dnl  ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc, optimised for A15.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb             cycles/limb
+C          and andn ior xor         nand iorn nior xnor
+C StrongARM	 ?			 ?
+C XScale	 ?			 ?
+C Cortex-A7	 ?			 ?
+C Cortex-A8	 ?			 ?
+C Cortex-A9	3.5			3.56
+C Cortex-A15	1.27			1.64
+
+C This is great A15 core register code, but it is a bit large.
+C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`FEEDIN_VARIANT', 1)	C alternatives: 0 1 2
+define(`UNROLL', 4x2)	 	C alternatives: 4 4x2
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',    `mpn_and_n')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',    `mpn_andn_n')
+  define(`LOGOP',   `bic	$1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',    `mpn_nand_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `and	$1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',    `mpn_ior_n')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',    `mpn_iorn_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `bic	$1, $3, $2')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',    `mpn_nior_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `orr	$1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',    `mpn_xor_n')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',    `mpn_xnor_n')
+  define(`POSTOP',  `mvn	$1, $1')
+  define(`LOGOP',   `eor	$1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+	push	{ r4-r9 }
+
+ifelse(FEEDIN_VARIANT,0,`
+	ands	r6, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00a)
+	tst	r6, #1
+	beq	L(bx0)
+	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	LOGOP(	r9, r5, r7)
+	POSTOP(	r9)
+	str	r9, [rp], #4
+	tst	r6, #2
+	beq	L(b00)
+L(bx0):	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	sub	rp, rp, #8
+	b	L(lo)
+L(b00):	tst	n, n
+	beq	L(wd1)
+L(b00a):ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+')
+ifelse(FEEDIN_VARIANT,1,`
+	and	r6, n, #3
+	mov	n, n, lsr #2
+	tst	r6, #1
+	beq	L(bx0)
+	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	LOGOP(	r9, r5, r7)
+	POSTOP(	r9)
+	str	r9, [rp], #4
+L(bx0):	tst	r6, #2
+	beq	L(b00)
+	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	sub	rp, rp, #8
+	b	L(lo)
+L(b00):	tst	n, n
+	beq	L(wd1)
+	ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+')
+ifelse(FEEDIN_VARIANT,2,`
+	ands	r6, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00)
+	cmp	r6, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	POSTOP(	r9)
+	str	r9, [rp], #-4
+	b	L(lo)
+
+L(b00):	ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	sub	rp, rp, #16
+	b	L(mid)
+
+L(b01):	ldr	r5, [up], #-4
+	ldr	r7, [vp], #-4
+	LOGOP(	r9, r5, r7)
+	POSTOP(	r9)
+	str	r9, [rp], #-12
+	tst	n, n
+	beq	L(wd1)
+L(gt1):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	b	L(mid)
+
+L(b10):	ldrd	r4, r5, [up]
+	ldrd	r6, r7, [vp]
+	sub	rp, rp, #8
+	b	L(lo)
+')
+	ALIGN(16)
+ifelse(UNROLL,4,`
+L(top):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #8]
+L(mid):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #16]!
+	ldrd	r6, r7, [vp, #16]!
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #16]!
+	sub	n, n, #1
+L(lo):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	tst	n, n
+	bne	L(top)
+')
+ifelse(UNROLL,4x2,`
+L(top):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #8]
+L(mid):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #16]
+	ldrd	r6, r7, [vp, #16]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #16]
+	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	sub	n, n, #2
+	tst	n, n
+	bmi	L(dne)
+	ldrd	r4, r5, [up, #24]
+	ldrd	r6, r7, [vp, #24]
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #24]
+	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	ldrd	r4, r5, [up, #32]!
+	ldrd	r6, r7, [vp, #32]!
+	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #32]!
+L(lo):	LOGOP(	r8, r4, r6)
+	LOGOP(	r9, r5, r7)
+	tst	n, n
+	bne	L(top)
+')
+
+L(end):	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #8]
+L(wd1):	pop	{ r4-r9 }
+	bx	r14
+L(dne):	POSTOP(	r8)
+	POSTOP(	r9)
+	strd	r8, r9, [rp, #24]
+	pop	{ r4-r9 }
+	bx	r14
+EPILOGUE()


More information about the gmp-commit mailing list