[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Apr 19 16:13:33 CEST 2013


details:   /var/hg/gmp/rev/1f18124cb728
changeset: 15742:1f18124cb728
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Apr 19 16:12:25 2013 +0200
description:
ARM A15 aors_n.

details:   /var/hg/gmp/rev/b9e2ee245df9
changeset: 15743:b9e2ee245df9
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Apr 19 16:12:39 2013 +0200
description:
ChangeLog

details:   /var/hg/gmp/rev/782e3bfbcab0
changeset: 15744:782e3bfbcab0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Apr 19 16:13:19 2013 +0200
description:
Update cycle/arch tables.

diffstat:

 ChangeLog                       |    2 +
 mpn/arm/v7a/cora15/addmul_1.asm |   13 ++-
 mpn/arm/v7a/cora15/aors_n.asm   |  151 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+), 4 deletions(-)

diffs (196 lines):

diff -r b81108588266 -r 782e3bfbcab0 ChangeLog
--- a/ChangeLog	Fri Apr 19 00:35:37 2013 +0200
+++ b/ChangeLog	Fri Apr 19 16:13:19 2013 +0200
@@ -1,5 +1,7 @@
 2013-04-19  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/arm/v7a/cora15/aors_n.asm: New file.
+
 	* mpn/arm/v7a/cora15/addmul_1.asm: Rewrite.
 
 2013-04-18  Torbjorn Granlund  <tege at gmplib.org>
diff -r b81108588266 -r 782e3bfbcab0 mpn/arm/v7a/cora15/addmul_1.asm
--- a/mpn/arm/v7a/cora15/addmul_1.asm	Fri Apr 19 00:35:37 2013 +0200
+++ b/mpn/arm/v7a/cora15/addmul_1.asm	Fri Apr 19 16:13:19 2013 +0200
@@ -20,7 +20,7 @@
 include(`../config.m4')
 
 C	     cycles/limb		best
-C StrongARM:     ?
+C StrongARM:     -
 C XScale	 ?
 C Cortex-A7	 ?
 C Cortex-A8	 ?
@@ -37,9 +37,14 @@
 C overhead).
 C
 C We don't use r12 due to ldrd and strd limitations.
-C
-C This is armv5 code, optimised for the armv7a cpu A15.  Its location in the
-C GMP file structure might be misleading.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
 
 define(`rp', `r0')
 define(`up', `r1')
diff -r b81108588266 -r 782e3bfbcab0 mpn/arm/v7a/cora15/aors_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/aors_n.asm	Fri Apr 19 16:13:19 2013 +0200
@@ -0,0 +1,151 @@
+dnl  ARM mpn_add_n/mpn_sub_n optimised for A15.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:     -
+C XScale	 ?
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 3.55			2.5
+C Cortex-A15	 1.27			this
+
+C This was a major improvement compared to the code we had before, but it might
+C not be the best 8-way code possible.  We've tried some permutations of auto-
+C increments and separate pointer updates, but they all ran at the same speed
+C on A15.
+
+C Architecture requirements:
+C v5	-
+C v5t	-
+C v5te	ldrd strd
+C v6	-
+C v6t2	-
+C v7a	-
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_add_n', `
+  define(`ADDSUBC',	adcs)
+  define(`IFADD',	`$1')
+  define(`SETCY',	`cmp	$1, #1')
+  define(`RETVAL',	`adc	r0, n, #0')
+  define(`RETVAL2',	`adc	r0, n, #1')
+  define(`func',	mpn_add_n)
+  define(`func_nc',	mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(`ADDSUBC',	sbcs)
+  define(`IFADD',	`')
+  define(`SETCY',	`rsbs	$1, $1, #0')
+  define(`RETVAL',	`sbc	r0, r0, r0
+			and	r0, r0, #1')
+  define(`RETVAL2',	`RETVAL')
+  define(`func',	mpn_sub_n)
+  define(`func_nc',	mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+	ldr	r12, [sp]
+	b	L(ent)
+EPILOGUE()
+PROLOGUE(func)
+	mov	r12, #0
+L(ent):	push	{ r4-r9 }
+
+	ands	r6, n, #3
+	mov	n, n, lsr #2
+	beq	L(b00)
+	cmp	r6, #2
+	bcc	L(b01)
+	beq	L(b10)
+
+L(b11):	ldr	r5, [up], #4
+	ldr	r7, [vp], #4
+	SETCY(	r12)
+	ADDSUBC	r9, r5, r7
+	ldrd	r4, r5, [up, #0]
+	ldrd	r6, r7, [vp, #0]
+	str	r9, [rp], #-4
+	b	L(lo)
+
+L(b00):	ldrd	r4, r5, [up], #-8
+	ldrd	r6, r7, [vp], #-8
+	SETCY(	r12)
+	sub	rp, rp, #16
+	b	L(mid)
+
+L(b01):	ldr	r5, [up], #-4
+	ldr	r7, [vp], #-4
+	SETCY(	r12)
+	ADDSUBC	r9, r5, r7
+	str	r9, [rp], #-12
+	tst	n, n
+	beq	L(wd1)
+L(gt1):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	b	L(mid)
+
+L(b10):	ldrd	r4, r5, [up]
+	ldrd	r6, r7, [vp]
+	SETCY(	r12)
+	sub	rp, rp, #8
+	b	L(lo)
+
+	ALIGN(16)
+L(top):	ldrd	r4, r5, [up, #8]
+	ldrd	r6, r7, [vp, #8]
+	strd	r8, r9, [rp, #8]
+L(mid):	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	ldrd	r4, r5, [up, #16]
+	ldrd	r6, r7, [vp, #16]
+	strd	r8, r9, [rp, #16]
+	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	sub	n, n, #2
+	tst	n, n
+	bmi	L(dne)
+	ldrd	r4, r5, [up, #24]
+	ldrd	r6, r7, [vp, #24]
+	strd	r8, r9, [rp, #24]
+	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	ldrd	r4, r5, [up, #32]!
+	ldrd	r6, r7, [vp, #32]!
+	strd	r8, r9, [rp, #32]!
+L(lo):	ADDSUBC	r8, r4, r6
+	ADDSUBC	r9, r5, r7
+	tst	n, n
+	bne	L(top)
+
+L(end):	strd	r8, r9, [rp, #8]
+L(wd1):	RETVAL
+	pop	{ r4-r9 }
+	bx	r14
+L(dne):	strd	r8, r9, [rp, #24]
+	RETVAL2
+	pop	{ r4-r9 }
+	bx	r14
+EPILOGUE()


More information about the gmp-commit mailing list