[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Mar 20 00:54:30 CET 2013
details: /var/hg/gmp/rev/05d5d9392bc7
changeset: 15621:05d5d9392bc7
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 20 00:51:02 2013 +0100
description:
Reorganise neon code, in order to pass proper compiler options.
details: /var/hg/gmp/rev/e019df52e18b
changeset: 15622:e019df52e18b
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 20 00:53:16 2013 +0100
description:
Move cora15 neon code into "neon" subdirectory.
details: /var/hg/gmp/rev/f2549c92f8d1
changeset: 15623:f2549c92f8d1
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 20 00:54:25 2013 +0100
description:
ChangeLog
diffstat:
ChangeLog | 3 +
configure.ac | 47 ++++++++++++-----
mpn/arm/v7a/cora15/copyd.asm | 98 ---------------------------------------
mpn/arm/v7a/cora15/copyi.asm | 78 -------------------------------
mpn/arm/v7a/cora15/neon/copyd.asm | 98 +++++++++++++++++++++++++++++++++++++++
mpn/arm/v7a/cora15/neon/copyi.asm | 78 +++++++++++++++++++++++++++++++
6 files changed, 211 insertions(+), 191 deletions(-)
diffs (truncated from 462 to 300 lines):
diff -r ba0d8ba83621 -r f2549c92f8d1 ChangeLog
--- a/ChangeLog Wed Mar 20 00:23:49 2013 +0100
+++ b/ChangeLog Wed Mar 20 00:54:25 2013 +0100
@@ -1,5 +1,8 @@
2013-03-20 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/arm/v7a/cora15/neon/copyi.asm: Move from "..".
+ * mpn/arm/v7a/cora15/neon/copyd.asm: Likewise.
+
* config.guess: Tack on "neon" for appropriate arm CPUs.
* configure.ac (arm*-*-*): Recognise neon suffix for a8, a9, and a15.
diff -r ba0d8ba83621 -r f2549c92f8d1 configure.ac
--- a/configure.ac Wed Mar 20 00:23:49 2013 +0100
+++ b/configure.ac Wed Mar 20 00:54:25 2013 +0100
@@ -566,11 +566,15 @@
arm*-*-*)
gcc_cflags="$gcc_cflags $fomit_frame_pointer"
- gcc_cflags_optlist="arch tune"
+ gcc_cflags_optlist="arch neon tune"
gcc_cflags_maybe="-marm"
gcc_testlist="gcc-arm-umodsi"
GMP_INCLUDE_MPN(arm/arm-defs.m4)
CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+
+ # FIXME: We make mandatory compiler options optional here. We should
+ # either enforce them, or organise to strip paths as the corresponding
+ # options fail.
case $host_cpu in
armsa1 | armv4*)
;;
@@ -586,34 +590,47 @@
path="arm/v6t2 arm/v6 arm/v5 arm"
gcc_cflags_arch="-march=armv6"
;;
- armcortexa9 | armcortexa9neon)
+ armcortexa5 | armv7a*)
+ path="arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ ;;
+ armcortexa8)
+ path="arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_tune="-mtune=cortex-a8"
+ ;;
+ armcortexa8neon)
+ path="arm/v6t2 arm/v6 arm/v5 arm/neon arm"
+ gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_neon="-mfpu=neon"
+ gcc_cflags_tune="-mtune=cortex-a8"
+ ;;
+ armcortexa9)
path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
gcc_cflags_arch="-march=armv7-a"
gcc_cflags_tune="-mtune=cortex-a9"
;;
- armcortexa15 | armcortexa15neon)
+ armcortexa9neon)
+ path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm/neon arm"
+ gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_neon="-mfpu=neon"
+ gcc_cflags_tune="-mtune=cortex-a9"
+ ;;
+ armcortexa15)
path="arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
gcc_cflags_arch="-march=armv7-a"
gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
;;
- armcortexa5 | armcortexa8 | armcortexa8neon | armv7a*)
- path="arm/v6t2 arm/v6 arm/v5 arm"
+ armcortexa15neon)
+ path="arm/v7a/cora15/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm/neon arm"
gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_neon="-mfpu=neon"
+ gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
;;
*)
path="arm"
;;
esac
-
- case $host_cpu in
- *neon)
- path2=""
- for p in $path
- do path2="$path2 $p/neon $p"
- done
- path="$path2"
- ;;
- esac
;;
diff -r ba0d8ba83621 -r f2549c92f8d1 mpn/arm/v7a/cora15/copyd.asm
--- a/mpn/arm/v7a/cora15/copyd.asm Wed Mar 20 00:23:49 2013 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-dnl ARM mpn_copyd.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM ?
-C XScale ?
-C Cortex-A8 ?
-C Cortex-A9 1.75 slower than core register code
-C Cortex-A15 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
- add rp, rp, n, lsl #2
- add up, up, n, lsl #2
-
- cmp n, #7
- ble L(bc)
-
-C Copy until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- sub up, up, #4
- vld1.32 {d22[0]}, [up]
- sub n, n, #1
- sub rp, rp, #4
- vst1.32 {d22[0]}, [rp]
-L(al1): tst rp, #8
- beq L(al2)
- sub up, up, #8
- vld1.32 {d22}, [up]
- sub n, n, #2
- sub rp, rp, #8
- vst1.32 {d22}, [rp:64]
-L(al2): sub up, up, #16
- vld1.32 {d26-d27}, [up]
- subs n, n, #12
- sub rp, rp, #16 C offset rp for loop
- blt L(end)
-
- sub up, up, #16 C offset up for loop
- mov r12, #-16
-
- ALIGN(16)
-L(top): vld1.32 {d22-d23}, [up], r12
- vst1.32 {d26-d27}, [rp:128], r12
- vld1.32 {d26-d27}, [up], r12
- vst1.32 {d22-d23}, [rp:128], r12
- subs n, n, #8
- bge L(top)
-
- add up, up, #16 C undo up offset
- C rp offset undoing folded
-L(end): vst1.32 {d26-d27}, [rp:128]
-
-C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- sub up, up, #16
- vld1.32 {d22-d23}, [up]
- sub rp, rp, #16
- vst1.32 {d22-d23}, [rp]
-L(tl1): tst n, #2
- beq L(tl2)
- sub up, up, #8
- vld1.32 {d22}, [up]
- sub rp, rp, #8
- vst1.32 {d22}, [rp]
-L(tl2): tst n, #1
- beq L(tl3)
- sub up, up, #4
- vld1.32 {d22[0]}, [up]
- sub rp, rp, #4
- vst1.32 {d22[0]}, [rp]
-L(tl3): bx lr
-EPILOGUE()
diff -r ba0d8ba83621 -r f2549c92f8d1 mpn/arm/v7a/cora15/copyi.asm
--- a/mpn/arm/v7a/cora15/copyi.asm Wed Mar 20 00:23:49 2013 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-dnl ARM mpn_copyi.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM ?
-C XScale ?
-C Cortex-A8 ?
-C Cortex-A9 1.75 slower than core register code
-C Cortex-A15 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
- cmp n, #7
- ble L(bc)
-
-C Copy until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- vld1.32 {d22[0]}, [up]!
- sub n, n, #1
- vst1.32 {d22[0]}, [rp]!
-L(al1): tst rp, #8
- beq L(al2)
- vld1.32 {d22}, [up]!
- sub n, n, #2
- vst1.32 {d22}, [rp:64]!
-L(al2): vld1.32 {d26-d27}, [up]!
- subs n, n, #12
- blt L(end)
-
- ALIGN(16)
-L(top): vld1.32 {d22-d23}, [up]!
- vst1.32 {d26-d27}, [rp:128]!
- vld1.32 {d26-d27}, [up]!
- vst1.32 {d22-d23}, [rp:128]!
- subs n, n, #8
- bge L(top)
-
-L(end): vst1.32 {d26-d27}, [rp:128]!
-
-C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- vld1.32 {d22-d23}, [up]!
- vst1.32 {d22-d23}, [rp]!
-L(tl1): tst n, #2
- beq L(tl2)
- vld1.32 {d22}, [up]!
- vst1.32 {d22}, [rp]!
-L(tl2): tst n, #1
- beq L(tl3)
- vld1.32 {d22[0]}, [up]!
- vst1.32 {d22[0]}, [rp]!
-L(tl3): bx lr
-EPILOGUE()
diff -r ba0d8ba83621 -r f2549c92f8d1 mpn/arm/v7a/cora15/neon/copyd.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/neon/copyd.asm Wed Mar 20 00:54:25 2013 +0100
@@ -0,0 +1,98 @@
+dnl ARM mpn_copyd.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
More information about the gmp-commit
mailing list