[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Apr 28 20:07:18 CEST 2012


details:   /var/hg/gmp/rev/4334ec4c931c
changeset: 14902:4334ec4c931c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Apr 28 20:05:17 2012 +0200
description:
Rewrite for stable speed, smaller size.

details:   /var/hg/gmp/rev/2229dbbb2b06
changeset: 14903:2229dbbb2b06
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Apr 28 20:07:15 2012 +0200
description:
Replace broken thumb code.

diffstat:

 ChangeLog               |  10 ++++++
 mpn/arm/v6/addmul_1.asm |  77 +++++++++++++++++-------------------------------
 mpn/arm/v6/mul_1.asm    |  67 +++++++++++++++++++-----------------------
 mpn/thumb/add_n.asm     |  52 +++++++++++++++++++++++++++++++++
 mpn/thumb/add_n.s       |  48 ------------------------------
 mpn/thumb/sub_n.asm     |  52 +++++++++++++++++++++++++++++++++
 mpn/thumb/sub_n.s       |  48 ------------------------------
 7 files changed, 172 insertions(+), 182 deletions(-)

diffs (truncated from 468 to 300 lines):

diff -r 9068add0d4fb -r 2229dbbb2b06 ChangeLog
--- a/ChangeLog	Fri Apr 27 11:15:36 2012 +0200
+++ b/ChangeLog	Sat Apr 28 20:07:15 2012 +0200
@@ -1,3 +1,13 @@
+2012-04-28  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/thumb/add_n.asm: New file.
+	* mpn/thumb/sub_n.asm: New file.
+	* mpn/thumb/add_n.s: Remove broken code.
+	* mpn/thumb/sub_n.s: Likewise.
+
+	* mpn/arm/v6/addmul_1.asm: Rewrite for stable speed, smaller size.
+	* mpn/arm/v6/mul_1.asm: Likewise.
+
 2012-04-27  Torbjorn Granlund  <tege at gmplib.org>
 
 	* configure.in: Search arm/v6t2 for arm7.
diff -r 9068add0d4fb -r 2229dbbb2b06 mpn/arm/v6/addmul_1.asm
--- a/mpn/arm/v6/addmul_1.asm	Fri Apr 27 11:15:36 2012 +0200
+++ b/mpn/arm/v6/addmul_1.asm	Sat Apr 28 20:07:15 2012 +0200
@@ -30,7 +30,6 @@
 C  * Micro-optimise feed-in code.
 C  * Optimise for n=1,2 by delaying register saving.
 C  * Try using ldm/stm.
-C  * Performance degenerates to 4.49 c/l for some alignments.
 
 define(`rp',`r0')
 define(`up',`r1')
@@ -39,82 +38,62 @@
 
 ASM_START()
 PROLOGUE(mpn_addmul_1)
-	stmfd	sp!, { r4, r5, r6, r7, r8 }
+	stmfd	sp!, { r4, r5, r6, r7 }
 
-	ands	r12, n, #3
+	ands	r6, n, #3
+	mov	r12, #0
 	beq	L(fi0)
-	cmp	r12, #2
+	cmp	r6, #2
 	bcc	L(fi1)
 	beq	L(fi2)
 
 L(fi3):	ldr	r4, [up], #4
-	ldr	r12, [rp, #0]
+	ldr	r6, [rp, #0]
 	ldr	r5, [up], #4
-	ldr	r6, [rp, #4]
-	mov	r8, #0
-	umaal	r8, r12, r4, v0
 	b	L(lo3)
 
 L(fi0):	ldr	r5, [up], #4
-	ldr	r8, [rp, #0]
+	ldr	r7, [rp], #4
 	ldr	r4, [up], #4
-	ldr	r12, [rp, #4]
-	mov	r7, #0
-	umaal	r7, r8, r5, v0
-	add	rp, rp, #4
 	b	L(lo0)
 
 L(fi1):	ldr	r4, [up], #4
-	ldr	r7, [rp, #0]
+	ldr	r6, [rp], #8
 	subs	n, n, #1
-	mov	r6, #0
 	beq	L(1)
 	ldr	r5, [up], #4
-	ldr	r8, [rp, #4]
-	umaal	r6, r7, r4, v0
-	add	rp, rp, #8
 	b	L(lo1)
 
 L(fi2):	ldr	r5, [up], #4
-	ldr	r6, [rp, #0]
+	ldr	r7, [rp], #12
 	ldr	r4, [up], #4
-	ldr	r7, [rp, #4]
-	mov	r12, #0
-	umaal	r12, r6, r5, v0
-	subs	n, n, #2
-	add	rp, rp, #12
-	beq	L(end)
+	b	L(lo2)
 
 	ALIGN(16)
-L(top):	ldr	r5, [up], #4
-	ldr	r8, [rp, #-4]
-	umaal	r6, r7, r4, v0
-	str	r12, [rp, #-12]
-L(lo1):	ldr	r4, [up], #4
-	ldr	r12, [rp, #0]
-	umaal	r7, r8, r5, v0
+L(top):	ldr	r6, [rp, #-8]
+	ldr	r5, [up], #4
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
+	ldr	r7, [rp, #-4]
+	ldr	r4, [up], #4
 	str	r6, [rp, #-8]
-L(lo0):	ldr	r5, [up], #4
-	ldr	r6, [rp, #4]
-	umaal	r8, r12, r4, v0
+L(lo0):	umaal	r7, r12, r5, v0
+	ldr	r6, [rp, #0]
+	ldr	r5, [up], #4
 	str	r7, [rp, #-4]
-L(lo3):	ldr	r4, [up], #4
-	ldr	r7, [rp, #8]
-	umaal	r12, r6, r5, v0
-	str	r8, [rp], #16
+L(lo3):	umaal	r6, r12, r4, v0
+	ldr	r7, [rp, #4]
+	ldr	r4, [up], #4
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
 	subs	n, n, #4
 	bhi	L(top)
 
-L(end):	umaal	r6, r7, r4, v0
-	str	r12, [rp, #-12]
+	ldr	r6, [rp, #-8]
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
 	str	r6, [rp, #-8]
-	mov	r0, r7
-	ldmfd	sp!, { r4, r5, r6, r7, r8 }
-	bx	lr
-
-L(1):	umaal	r6, r7, r4, v0
-	str	r6, [rp, #0]
-	mov	r0, r7
-	ldmfd	sp!, { r4, r5, r6, r7, r8 }
+	mov	r0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
 	bx	lr
 EPILOGUE()
diff -r 9068add0d4fb -r 2229dbbb2b06 mpn/arm/v6/mul_1.asm
--- a/mpn/arm/v6/mul_1.asm	Fri Apr 27 11:15:36 2012 +0200
+++ b/mpn/arm/v6/mul_1.asm	Sat Apr 28 20:07:15 2012 +0200
@@ -30,7 +30,6 @@
 C  * Micro-optimise feed-in code.
 C  * Optimise for n=1,2 by delaying register saving.
 C  * Try using ldm/stm.
-C  * Performance degenerates to 4.49 c/l for some alignments.
 
 define(`rp',`r0')
 define(`up',`r1')
@@ -39,71 +38,65 @@
 
 ASM_START()
 PROLOGUE(mpn_mul_1)
-	stmfd	sp!, { r4, r5, r6, r7, r8 }
+	stmfd	sp!, { r4, r5, r6, r7 }
 
-	ands	r12, n, #3
+	ands	r6, n, #3
+	mov	r12, #0
 	beq	L(fi0)
-	cmp	r12, #2
+	cmp	r6, #2
 	bcc	L(fi1)
 	beq	L(fi2)
 
 L(fi3):	ldr	r4, [up], #4
+	mov	r6, #0
 	ldr	r5, [up], #4
-	umull	r8, r12, r4, v0
 	b	L(lo3)
 
 L(fi0):	ldr	r5, [up], #4
+	add	rp, rp, #4
+	mov	r7, #0
 	ldr	r4, [up], #4
-	umull	r7, r8, r5, v0
-	add	rp, rp, #4
 	b	L(lo0)
 
 L(fi1):	ldr	r4, [up], #4
+	mov	r6, #0
+	add	rp, rp, #8
 	subs	n, n, #1
 	beq	L(1)
 	ldr	r5, [up], #4
-	umull	r6, r7, r4, v0
-	add	rp, rp, #8
 	b	L(lo1)
 
 L(fi2):	ldr	r5, [up], #4
+	add	rp, rp, #12
+	mov	r7, #0
 	ldr	r4, [up], #4
-	umull	r12, r6, r5, v0
-	subs	n, n, #2
-	add	rp, rp, #12
-	beq	L(end)
+	b	L(lo2)
 
 	ALIGN(16)
-L(top):	ldr	r5, [up], #4
+L(top):	mov	r6, #0
+	ldr	r5, [up], #4
+	str	r7, [rp, #-12]
+L(lo1):	umaal	r6, r12, r4, v0
 	mov	r7, #0
-	umaal	r6, r7, r4, v0
-	str	r12, [rp, #-12]
-L(lo1):	ldr	r4, [up], #4
-	mov	r8, #0
-	umaal	r7, r8, r5, v0
+	ldr	r4, [up], #4
 	str	r6, [rp, #-8]
-L(lo0):	ldr	r5, [up], #4
-	mov	r12, #0
-	umaal	r8, r12, r4, v0
+L(lo0):	umaal	r7, r12, r5, v0
+	mov	r6, #0
+	ldr	r5, [up], #4
 	str	r7, [rp, #-4]
-L(lo3):	subs	n, n, #4
+L(lo3):	umaal	r6, r12, r4, v0
+	mov	r7, #0
 	ldr	r4, [up], #4
-	mov	r6, #0
-	umaal	r12, r6, r5, v0
-	str	r8, [rp], #16
+	str	r6, [rp], #16
+L(lo2):	umaal	r7, r12, r5, v0
+	subs	n, n, #4
 	bhi	L(top)
 
-L(end):	mov	r7, #0
-	umaal	r6, r7, r4, v0
-	str	r12, [rp, #-12]
+	mov	r6, #0
+	str	r7, [rp, #-12]
+L(1):	umaal	r6, r12, r4, v0
 	str	r6, [rp, #-8]
-	mov	r0, r7
-	ldmfd	sp!, { r4, r5, r6, r7, r8 }
-	bx	lr
-
-L(1):	umull	r6, r7, r4, v0
-	str	r6, [rp, #0]
-	mov	r0, r7
-	ldmfd	sp!, { r4, r5, r6, r7, r8 }
+	mov	r0, r12
+	ldmfd	sp!, { r4, r5, r6, r7 }
 	bx	lr
 EPILOGUE()
diff -r 9068add0d4fb -r 2229dbbb2b06 mpn/thumb/add_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/thumb/add_n.asm	Sat Apr 28 20:07:15 2012 +0200
@@ -0,0 +1,52 @@
+dnl  ARM/Thumb mpn_add_n.
+
+dnl  Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published by
+dnl  the Free Software Foundation; either version 3 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',	r0)
+define(`up',	r1)
+define(`vp',	r2)
+define(`n',	r3)
+
+ASM_START()
+	.thumb
+PROLOGUE(mpn_add_nc)
+	push	{r4, r5, r6}
+	ldr	r6, [sp, #12]		C init carry save register
+	sub	r6, #1
+	b	L(top)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+	push	{r4, r5, r6}
+	neg	r6, n			C init carry save register
+
+L(top):	ldmia	up!, {r4}		C load next limb from S1
+	cmp	n, r6			C tricky carry restore
+	ldmia	vp!, {r5}		C load next limb from S2
+	adc	r4, r5
+	stmia	rp!, {r4}		C store result limb to RES


More information about the gmp-commit mailing list