[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Apr 27 00:18:45 CEST 2012


details:   /var/hg/gmp/rev/56336a2c0e95
changeset: 14895:56336a2c0e95
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Apr 27 00:07:15 2012 +0200
description:
Define LEA for ARM.

details:   /var/hg/gmp/rev/06430c60bb82
changeset: 14896:06430c60bb82
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Apr 27 00:12:04 2012 +0200
description:
Add ARM mpn_modexact_1c_odd (v4 and slightly faster v6).

details:   /var/hg/gmp/rev/6a7e129fe145
changeset: 14897:6a7e129fe145
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Apr 27 00:18:27 2012 +0200
description:
Add ARM gcd_1 (v5 and sligtly faster for v6t2).

diffstat:

 ChangeLog               |   11 ++++
 mpn/arm/arm-defs.m4     |   26 +++++++++++-
 mpn/arm/invert_limb.asm |    8 +--
 mpn/arm/mode1o.asm      |   72 ++++++++++++++++++++++++++++++++
 mpn/arm/v5/gcd_1.asm    |  106 ++++++++++++++++++++++++++++++++++++++++++++++++
 mpn/arm/v6/mode1o.asm   |   75 +++++++++++++++++++++++++++++++++
 mpn/arm/v6t2/gcd_1.asm  |  100 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 391 insertions(+), 7 deletions(-)

diffs (truncated from 459 to 300 lines):

diff -r 20cf5542e78e -r 6a7e129fe145 ChangeLog
--- a/ChangeLog	Thu Apr 26 13:08:24 2012 +0200
+++ b/ChangeLog	Fri Apr 27 00:18:27 2012 +0200
@@ -1,3 +1,14 @@
+2012-04-27  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/arm/v5/gcd_1.asm: New file.
+	* mpn/arm/v6t2/gcd_1.asm: New file.
+
+	* mpn/arm/mode1o.asm: New file.
+	* mpn/arm/v6/mode1o.asm: New file.
+
+	* mpn/arm/arm-defs.m4 (LEA): New define.
+	* mpn/arm/invert_limb.asm: Use LEA.
+
 2012-04-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpz/bin_uiui.c (bc_bin_uiui): Nail support.
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/arm-defs.m4
--- a/mpn/arm/arm-defs.m4	Thu Apr 26 13:08:24 2012 +0200
+++ b/mpn/arm/arm-defs.m4	Fri Apr 27 00:18:27 2012 +0200
@@ -23,7 +23,7 @@
 dnl  Standard commenting is with @, the default m4 # is for constants and we
 dnl  don't want to disable macro expansions in or after them.
 
-changecom(@)
+changecom(@&*$)
 
 
 dnl  APCS register names.
@@ -47,4 +47,28 @@
 deflit(lr,r14)
 deflit(pc,r15)
 
+
+define(`lea_num',0)
+
+dnl  LEA(reg,gmp_symbol)
+dnl  
+dnl  Load the address of gmp_symbol into a register.  The gmp_symbol must be
+dnl  either local or protected/hidden, since we assume it has a fixed distance
+dnl  from the point of use.
+
+define(`LEA',`dnl
+ldr	$1, L(ptr`'lea_num)
+ifdef(`PIC',dnl
+`
+L(bas`'lea_num):dnl
+	add	$1, $1, pc`'dnl
+	define(`EPILOGUE_cpu',
+		L(ptr`'lea_num):	.word	GSYM_PREFIX`'$2-L(bas`'lea_num)-8)dnl
+	define(`lea_num', eval(lea_num+1))dnl
+',`dnl
+	define(`EPILOGUE_cpu',
+		L(ptr`'lea_num):	.word	GSYM_PREFIX`'$2)')dnl
+')
+
+
 divert
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/invert_limb.asm
--- a/mpn/arm/invert_limb.asm	Thu Apr 26 13:08:24 2012 +0200
+++ b/mpn/arm/invert_limb.asm	Fri Apr 27 00:18:27 2012 +0200
@@ -1,6 +1,6 @@
 dnl  ARM mpn_invert_limb -- Invert a normalized limb.
 
-dnl  Copyright 2001, 2009, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2009, 2011, 2012 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -21,8 +21,7 @@
 
 ASM_START()
 PROLOGUE(mpn_invert_limb)
-	ldr	r2, L(4)
-L(2):	add	r2, pc, r2
+	LEA(	r2, approx_tab-512)
 	mov	r3, r0, lsr #23
 	mov	r3, r3, asl #1
 	ldrh	r3, [r3, r2]
@@ -43,9 +42,6 @@
 	adc	r3, r3, r0
 	rsb	r0, r3, r2
 	bx	lr
-
-	ALIGN(4)
-L(4):	.word	approx_tab-8-512-L(2)
 EPILOGUE()
 
 	.section .rodata
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/mode1o.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/mode1o.asm	Fri Apr 27 00:18:27 2012 +0200
@@ -0,0 +1,72 @@
+dnl  ARM mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A8	 ?
+C Cortex-A9	10
+C Cortex-A15	 ?
+
+define(`up', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cy', `r3')
+
+	.protected	binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+	stmfd	sp!, {r4, r5}
+
+	LEA(	r4, binvert_limb_table)
+
+	ldr	r5, [up], #4		C up[0]
+
+	ubfx	r12, d, #1, #7
+	ldrb	r4, [r4, r12]
+	smulbb	r12, r4, r4
+	mul	r12, d, r12
+	rsb	r12, r12, r4, asl #1
+	mul	r4, r12, r12
+	mul	r4, d, r4
+	rsb	r4, r4, r12, asl #1	C r4 = inverse
+
+	subs	n, n, #1		C set carry as side-effect
+	beq	L(end)
+
+L(top):	sbcs	cy, r5, cy
+	ldr	r5, [up], #4
+	sub	n, n, #1
+	mul	r12, r4, cy
+	tst	n, n
+	umull	r12, cy, r12, d
+	bne	L(top)
+
+L(end):	sbcs	cy, r5, cy
+	mul	r12, r4, cy
+	umull	r12, r0, r12, d
+	addcc	r0, r0, #1
+
+	ldmfd	sp!, {r4, r5}
+	bx	r14
+EPILOGUE()
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/v5/gcd_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v5/gcd_1.asm	Fri Apr 27 00:18:27 2012 +0200
@@ -0,0 +1,106 @@
+dnl  ARM v5 mpn_gcd_1.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/bit (approx)
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 5.9
+C Cortex-A15	 ?
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+C TODO
+C  * Optimise inner-loop better.
+
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+C INPUT PARAMETERS
+define(`up',    `r0')
+define(`n',     `r1')
+define(`v0',    `r2')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+	push	{r4, r7, lr}
+	ldr	r3, [up]	C U low limb
+
+	orr	r3, r3, v0
+	rsb	r4, r3, #0
+	and	r4, r4, r3
+	clz	r4, r4		C min(ctz(u0),ctz(v0))
+	rsb	r4, r4, #31
+
+	rsb	r12, v0, #0
+	and	r12, r12, v0
+	clz	r12, r12
+	rsb	r12, r12, #31
+	lsr	v0, v0, r12
+
+	mov	r7, v0
+
+	cmp	n, #1
+	bne	L(nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+	ldr	r3, [up]
+	cmp	v0, r3, lsr #BMOD_THRES_LOG2
+	bhi	L(red1)
+
+L(bmod):mov	r3, #0		C carry argument
+	bl	mpn_modexact_1c_odd
+	b	L(red0)
+
+L(nby1):cmp	n, #BMOD_1_TO_MOD_1_THRESHOLD
+	blo	L(bmod)
+
+	bl	mpn_mod_1
+
+L(red0):mov	r3, r0
+L(red1):rsbs	r12, r3, #0
+	and	r12, r12, r3
+	clz	r12, r12
+	rsb	r12, r12, #31
+	bne	L(mid)
+	b	L(end)
+
+	ALIGN(8)
+L(top):	rsb	r12, r12, #31
+	movcc	r3, r1		C if x-y < 0
+	movcc	r7, r0		C use x,y-x
+L(mid):	lsr	r3, r3, r12	C
+	mov	r0, r3		C
+	sub	r1, r7, r3	C
+	rsbs	r3, r7, r3	C
+	and	r12, r1, r3	C
+	clz	r12, r12	C
+	bne	L(top)		C
+
+L(end):	lsl	r0, r7, r4
+	pop	{r4, r7, pc}
+EPILOGUE()
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/v6/mode1o.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6/mode1o.asm	Fri Apr 27 00:18:27 2012 +0200
@@ -0,0 +1,75 @@
+dnl  ARM mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.


More information about the gmp-commit mailing list