[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu May 2 21:39:03 CEST 2013
details: /var/hg/gmp/rev/cc0037a9e93d
changeset: 15784:cc0037a9e93d
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu May 02 21:03:48 2013 +0200
description:
ARM Neon com for A15.
details: /var/hg/gmp/rev/ad543a795a09
changeset: 15785:ad543a795a09
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu May 02 21:03:55 2013 +0200
description:
ChangeLog
details: /var/hg/gmp/rev/22159501ed34
changeset: 15786:22159501ed34
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu May 02 21:38:32 2013 +0200
description:
Suppress dead pointer update.
details: /var/hg/gmp/rev/80437a229f4d
changeset: 15787:80437a229f4d
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu May 02 21:38:47 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 15 ++++++-
mpn/arm/copyd.asm | 2 +-
mpn/arm/copyi.asm | 2 +-
mpn/arm/neon/logops_n.asm | 6 +-
mpn/arm/neon/tabselect.asm | 2 +-
mpn/arm/rshift.asm | 2 +-
mpn/arm/tabselect.asm | 2 +-
mpn/arm/v6/dive_1.asm | 2 +-
mpn/arm/v7a/cora15/neon/com.asm | 86 +++++++++++++++++++++++++++++++++++++++
mpn/arm/v7a/cora15/neon/copyi.asm | 4 +-
10 files changed, 111 insertions(+), 12 deletions(-)
diffs (221 lines):
diff -r b5fac5fb3c21 -r 80437a229f4d ChangeLog
--- a/ChangeLog Wed May 01 20:54:14 2013 +0200
+++ b/ChangeLog Thu May 02 21:38:47 2013 +0200
@@ -1,3 +1,16 @@
+2013-05-02 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/arm/copyd.asm: Suppress dead pointer update.
+ * mpn/arm/copyi.asm: Likewise.
+ * mpn/arm/neon/logops_n.asm: Likewise.
+ * mpn/arm/neon/tabselect.asm: Likewise.
+ * mpn/arm/rshift.asm: Likewise.
+ * mpn/arm/tabselect.asm: Likewise.
+ * mpn/arm/v6/dive_1.asm: Likewise
+ * mpn/arm/v7a/cora15/neon/copyi.asm: Likewise.
+
+ * mpn/arm/v7a/cora15/neon/com.asm: New file.
+
2013-05-01 Torbjorn Granlund <tege at gmplib.org>
* mpn/sparc64/ultrasparct3/aormul_4.asm: New file.
@@ -31,7 +44,7 @@
2013-04-27 Mike Frysinger <vapier at gentoo.org>
- * configure.ac (arm*-*-*): Set up path also for plainest CPU varaints.
+ * configure.ac (arm*-*-*): Set up path also for plainest CPU variants.
2013-04-27 Torbjorn Granlund <tege at gmplib.org>
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/copyd.asm
--- a/mpn/arm/copyd.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/copyd.asm Thu May 02 21:38:47 2013 +0200
@@ -67,7 +67,7 @@
ldmda up!, { r3,r4,r5,r12 }
bne L(top)
-L(end): stmda rp!, { r3,r4,r5,r12 }
+L(end): stmda rp, { r3,r4,r5,r12 }
pop { r4-r5 }
L(rtn): bx lr
EPILOGUE()
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/copyi.asm
--- a/mpn/arm/copyi.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/copyi.asm Thu May 02 21:38:47 2013 +0200
@@ -62,7 +62,7 @@
ldmia up!, { r3,r4,r5,r12 }
bne L(top)
-L(end): stm rp!, { r3,r4,r5,r12 }
+L(end): stm rp, { r3,r4,r5,r12 }
pop { r4-r5 }
L(rtn): bx lr
EPILOGUE()
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/neon/logops_n.asm
--- a/mpn/arm/neon/logops_n.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/neon/logops_n.asm Thu May 02 21:38:47 2013 +0200
@@ -130,10 +130,10 @@
vst1.32 {d0}, [rp]!
L(tl2): tst n, #1
beq L(tl3)
- vld1.32 {d0[0]}, [up]!
- vld1.32 {d1[0]}, [vp]!
+ vld1.32 {d0[0]}, [up]
+ vld1.32 {d1[0]}, [vp]
LOGOP( d0, d0, d1)
POSTOP( d0, d0)
- vst1.32 {d0[0]}, [rp]!
+ vst1.32 {d0[0]}, [rp]
L(tl3): bx lr
EPILOGUE()
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/neon/tabselect.asm
--- a/mpn/arm/neon/tabselect.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/neon/tabselect.asm Thu May 02 21:38:47 2013 +0200
@@ -122,7 +122,7 @@
add tp, tp, n, lsl #2
subs i, i, #1
bne L(tp1)
- vst1.32 {d4[0]}, [rp]!
+ vst1.32 {d4[0]}, [rp]
L(b000):pop {r4-r5}
bx r14
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/rshift.asm
--- a/mpn/arm/rshift.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/rshift.asm Thu May 02 21:38:47 2013 +0200
@@ -68,7 +68,7 @@
L(end): orr r7, r7, r6, lsl tnc
str r7, [rp], #4
mov r7, r6, lsr cnt
-L(1): str r7, [rp], #4
+L(1): str r7, [rp]
mov r0, r4, lsl tnc
pop {r4, r6, r7, r8}
bx r14
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/tabselect.asm
--- a/mpn/arm/tabselect.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/tabselect.asm Thu May 02 21:38:47 2013 +0200
@@ -96,7 +96,7 @@
orr r8, r8, r4
orr r9, r9, r5
bge L(tp2)
- stmia rp!, {r8,r9}
+ stmia rp, {r8,r9}
pop {r4-r11, r14}
bx lr
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/v6/dive_1.asm
--- a/mpn/arm/v6/dive_1.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/v6/dive_1.asm Thu May 02 21:38:47 2013 +0200
@@ -90,7 +90,7 @@
subs n, n, #1
bne L(top)
-L(end): str r5, [rp], #4
+L(end): str r5, [rp]
pop {r4,r5,r6,r7,r8,r9}
bx r14
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/v7a/cora15/neon/com.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/neon/com.asm Thu May 02 21:38:47 2013 +0200
@@ -0,0 +1,86 @@
+dnl ARM Neon mpn_com optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 2.1
+C Cortex-A15 0.65
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+ cmp n, #7
+ ble L(bc)
+
+C Perform a few initial operation until rp is 128-bit aligned
+ tst rp, #4
+ beq L(al1)
+ vld1.32 {d0[0]}, [up]!
+ sub n, n, #1
+ vmvn d0, d0
+ vst1.32 {d0[0]}, [rp]!
+L(al1): tst rp, #8
+ beq L(al2)
+ vld1.32 {d0}, [up]!
+ sub n, n, #2
+ vmvn d0, d0
+ vst1.32 {d0}, [rp:64]!
+L(al2): vld1.32 {q2}, [up]!
+ subs n, n, #12
+ blt L(end)
+
+ ALIGN(16)
+L(top): vld1.32 {q0}, [up]!
+ vmvn q2, q2
+ subs n, n, #8
+ vst1.32 {q2}, [rp:128]!
+ vld1.32 {q2}, [up]!
+ vmvn q0, q0
+ vst1.32 {q0}, [rp:128]!
+ bge L(top)
+
+L(end): vmvn q2, q2
+ vst1.32 {q2}, [rp:128]!
+
+C Handle last 0-7 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tst n, #4
+ beq L(tl1)
+ vld1.32 {q0}, [up]!
+ vmvn q0, q0
+ vst1.32 {q0}, [rp]!
+L(tl1): tst n, #2
+ beq L(tl2)
+ vld1.32 {d0}, [up]!
+ vmvn d0, d0
+ vst1.32 {d0}, [rp]!
+L(tl2): tst n, #1
+ beq L(tl3)
+ vld1.32 {d0[0]}, [up]
+ vmvn d0, d0
+ vst1.32 {d0[0]}, [rp]
+L(tl3): bx lr
+EPILOGUE()
diff -r b5fac5fb3c21 -r 80437a229f4d mpn/arm/v7a/cora15/neon/copyi.asm
--- a/mpn/arm/v7a/cora15/neon/copyi.asm Wed May 01 20:54:14 2013 +0200
+++ b/mpn/arm/v7a/cora15/neon/copyi.asm Thu May 02 21:38:47 2013 +0200
@@ -73,7 +73,7 @@
vst1.32 {d22}, [rp]!
L(tl2): tst n, #1
beq L(tl3)
- vld1.32 {d22[0]}, [up]!
- vst1.32 {d22[0]}, [rp]!
+ vld1.32 {d22[0]}, [up]
+ vst1.32 {d22[0]}, [rp]
L(tl3): bx lr
EPILOGUE()
More information about the gmp-commit
mailing list