[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Apr 28 20:07:18 CEST 2012
details: /var/hg/gmp/rev/4334ec4c931c
changeset: 14902:4334ec4c931c
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Apr 28 20:05:17 2012 +0200
description:
Rewrite for stable speed, smaller size.
details: /var/hg/gmp/rev/2229dbbb2b06
changeset: 14903:2229dbbb2b06
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Apr 28 20:07:15 2012 +0200
description:
Replace broken thumb code.
diffstat:
ChangeLog | 10 ++++++
mpn/arm/v6/addmul_1.asm | 77 +++++++++++++++++-------------------------------
mpn/arm/v6/mul_1.asm | 67 +++++++++++++++++++-----------------------
mpn/thumb/add_n.asm | 52 +++++++++++++++++++++++++++++++++
mpn/thumb/add_n.s | 48 ------------------------------
mpn/thumb/sub_n.asm | 52 +++++++++++++++++++++++++++++++++
mpn/thumb/sub_n.s | 48 ------------------------------
7 files changed, 172 insertions(+), 182 deletions(-)
diffs (truncated from 468 to 300 lines):
diff -r 9068add0d4fb -r 2229dbbb2b06 ChangeLog
--- a/ChangeLog Fri Apr 27 11:15:36 2012 +0200
+++ b/ChangeLog Sat Apr 28 20:07:15 2012 +0200
@@ -1,3 +1,13 @@
+2012-04-28 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/thumb/add_n.asm: New file.
+ * mpn/thumb/sub_n.asm: New file.
+ * mpn/thumb/add_n.s: Remove broken code.
+ * mpn/thumb/sub_n.s: Likewise.
+
+ * mpn/arm/v6/addmul_1.asm: Rewrite for stable speed, smaller size.
+ * mpn/arm/v6/mul_1.asm: Likewise.
+
2012-04-27 Torbjorn Granlund <tege at gmplib.org>
* configure.in: Search arm/v6t2 for arm7.
diff -r 9068add0d4fb -r 2229dbbb2b06 mpn/arm/v6/addmul_1.asm
--- a/mpn/arm/v6/addmul_1.asm Fri Apr 27 11:15:36 2012 +0200
+++ b/mpn/arm/v6/addmul_1.asm Sat Apr 28 20:07:15 2012 +0200
@@ -30,7 +30,6 @@
C * Micro-optimise feed-in code.
C * Optimise for n=1,2 by delaying register saving.
C * Try using ldm/stm.
-C * Performance degenerates to 4.49 c/l for some alignments.
define(`rp',`r0')
define(`up',`r1')
@@ -39,82 +38,62 @@
ASM_START()
PROLOGUE(mpn_addmul_1)
- stmfd sp!, { r4, r5, r6, r7, r8 }
+ stmfd sp!, { r4, r5, r6, r7 }
- ands r12, n, #3
+ ands r6, n, #3
+ mov r12, #0
beq L(fi0)
- cmp r12, #2
+ cmp r6, #2
bcc L(fi1)
beq L(fi2)
L(fi3): ldr r4, [up], #4
- ldr r12, [rp, #0]
+ ldr r6, [rp, #0]
ldr r5, [up], #4
- ldr r6, [rp, #4]
- mov r8, #0
- umaal r8, r12, r4, v0
b L(lo3)
L(fi0): ldr r5, [up], #4
- ldr r8, [rp, #0]
+ ldr r7, [rp], #4
ldr r4, [up], #4
- ldr r12, [rp, #4]
- mov r7, #0
- umaal r7, r8, r5, v0
- add rp, rp, #4
b L(lo0)
L(fi1): ldr r4, [up], #4
- ldr r7, [rp, #0]
+ ldr r6, [rp], #8
subs n, n, #1
- mov r6, #0
beq L(1)
ldr r5, [up], #4
- ldr r8, [rp, #4]
- umaal r6, r7, r4, v0
- add rp, rp, #8
b L(lo1)
L(fi2): ldr r5, [up], #4
- ldr r6, [rp, #0]
+ ldr r7, [rp], #12
ldr r4, [up], #4
- ldr r7, [rp, #4]
- mov r12, #0
- umaal r12, r6, r5, v0
- subs n, n, #2
- add rp, rp, #12
- beq L(end)
+ b L(lo2)
ALIGN(16)
-L(top): ldr r5, [up], #4
- ldr r8, [rp, #-4]
- umaal r6, r7, r4, v0
- str r12, [rp, #-12]
-L(lo1): ldr r4, [up], #4
- ldr r12, [rp, #0]
- umaal r7, r8, r5, v0
+L(top): ldr r6, [rp, #-8]
+ ldr r5, [up], #4
+ str r7, [rp, #-12]
+L(lo1): umaal r6, r12, r4, v0
+ ldr r7, [rp, #-4]
+ ldr r4, [up], #4
str r6, [rp, #-8]
-L(lo0): ldr r5, [up], #4
- ldr r6, [rp, #4]
- umaal r8, r12, r4, v0
+L(lo0): umaal r7, r12, r5, v0
+ ldr r6, [rp, #0]
+ ldr r5, [up], #4
str r7, [rp, #-4]
-L(lo3): ldr r4, [up], #4
- ldr r7, [rp, #8]
- umaal r12, r6, r5, v0
- str r8, [rp], #16
+L(lo3): umaal r6, r12, r4, v0
+ ldr r7, [rp, #4]
+ ldr r4, [up], #4
+ str r6, [rp], #16
+L(lo2): umaal r7, r12, r5, v0
subs n, n, #4
bhi L(top)
-L(end): umaal r6, r7, r4, v0
- str r12, [rp, #-12]
+ ldr r6, [rp, #-8]
+ str r7, [rp, #-12]
+L(1): umaal r6, r12, r4, v0
str r6, [rp, #-8]
- mov r0, r7
- ldmfd sp!, { r4, r5, r6, r7, r8 }
- bx lr
-
-L(1): umaal r6, r7, r4, v0
- str r6, [rp, #0]
- mov r0, r7
- ldmfd sp!, { r4, r5, r6, r7, r8 }
+ mov r0, r12
+ ldmfd sp!, { r4, r5, r6, r7 }
bx lr
EPILOGUE()
diff -r 9068add0d4fb -r 2229dbbb2b06 mpn/arm/v6/mul_1.asm
--- a/mpn/arm/v6/mul_1.asm Fri Apr 27 11:15:36 2012 +0200
+++ b/mpn/arm/v6/mul_1.asm Sat Apr 28 20:07:15 2012 +0200
@@ -30,7 +30,6 @@
C * Micro-optimise feed-in code.
C * Optimise for n=1,2 by delaying register saving.
C * Try using ldm/stm.
-C * Performance degenerates to 4.49 c/l for some alignments.
define(`rp',`r0')
define(`up',`r1')
@@ -39,71 +38,65 @@
ASM_START()
PROLOGUE(mpn_mul_1)
- stmfd sp!, { r4, r5, r6, r7, r8 }
+ stmfd sp!, { r4, r5, r6, r7 }
- ands r12, n, #3
+ ands r6, n, #3
+ mov r12, #0
beq L(fi0)
- cmp r12, #2
+ cmp r6, #2
bcc L(fi1)
beq L(fi2)
L(fi3): ldr r4, [up], #4
+ mov r6, #0
ldr r5, [up], #4
- umull r8, r12, r4, v0
b L(lo3)
L(fi0): ldr r5, [up], #4
+ add rp, rp, #4
+ mov r7, #0
ldr r4, [up], #4
- umull r7, r8, r5, v0
- add rp, rp, #4
b L(lo0)
L(fi1): ldr r4, [up], #4
+ mov r6, #0
+ add rp, rp, #8
subs n, n, #1
beq L(1)
ldr r5, [up], #4
- umull r6, r7, r4, v0
- add rp, rp, #8
b L(lo1)
L(fi2): ldr r5, [up], #4
+ add rp, rp, #12
+ mov r7, #0
ldr r4, [up], #4
- umull r12, r6, r5, v0
- subs n, n, #2
- add rp, rp, #12
- beq L(end)
+ b L(lo2)
ALIGN(16)
-L(top): ldr r5, [up], #4
+L(top): mov r6, #0
+ ldr r5, [up], #4
+ str r7, [rp, #-12]
+L(lo1): umaal r6, r12, r4, v0
mov r7, #0
- umaal r6, r7, r4, v0
- str r12, [rp, #-12]
-L(lo1): ldr r4, [up], #4
- mov r8, #0
- umaal r7, r8, r5, v0
+ ldr r4, [up], #4
str r6, [rp, #-8]
-L(lo0): ldr r5, [up], #4
- mov r12, #0
- umaal r8, r12, r4, v0
+L(lo0): umaal r7, r12, r5, v0
+ mov r6, #0
+ ldr r5, [up], #4
str r7, [rp, #-4]
-L(lo3): subs n, n, #4
+L(lo3): umaal r6, r12, r4, v0
+ mov r7, #0
ldr r4, [up], #4
- mov r6, #0
- umaal r12, r6, r5, v0
- str r8, [rp], #16
+ str r6, [rp], #16
+L(lo2): umaal r7, r12, r5, v0
+ subs n, n, #4
bhi L(top)
-L(end): mov r7, #0
- umaal r6, r7, r4, v0
- str r12, [rp, #-12]
+ mov r6, #0
+ str r7, [rp, #-12]
+L(1): umaal r6, r12, r4, v0
str r6, [rp, #-8]
- mov r0, r7
- ldmfd sp!, { r4, r5, r6, r7, r8 }
- bx lr
-
-L(1): umull r6, r7, r4, v0
- str r6, [rp, #0]
- mov r0, r7
- ldmfd sp!, { r4, r5, r6, r7, r8 }
+ mov r0, r12
+ ldmfd sp!, { r4, r5, r6, r7 }
bx lr
EPILOGUE()
diff -r 9068add0d4fb -r 2229dbbb2b06 mpn/thumb/add_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/thumb/add_n.asm Sat Apr 28 20:07:15 2012 +0200
@@ -0,0 +1,52 @@
+dnl ARM/Thumb mpn_add_n.
+
+dnl Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', r0)
+define(`up', r1)
+define(`vp', r2)
+define(`n', r3)
+
+ASM_START()
+ .thumb
+PROLOGUE(mpn_add_nc)
+ push {r4, r5, r6}
+ ldr r6, [sp, #12] C init carry save register
+ sub r6, #1
+ b L(top)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+ push {r4, r5, r6}
+ neg r6, n C init carry save register
+
+L(top): ldmia up!, {r4} C load next limb from S1
+ cmp n, r6 C tricky carry restore
+ ldmia vp!, {r5} C load next limb from S2
+ adc r4, r5
+ stmia rp!, {r4} C store result limb to RES
More information about the gmp-commit
mailing list