[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Mar 20 00:54:30 CET 2013


details:   /var/hg/gmp/rev/05d5d9392bc7
changeset: 15621:05d5d9392bc7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 20 00:51:02 2013 +0100
description:
Reorganise neon code, in order to pass proper compiler options.

details:   /var/hg/gmp/rev/e019df52e18b
changeset: 15622:e019df52e18b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 20 00:53:16 2013 +0100
description:
Move cora15 neon code into "neon" subdirectory.

details:   /var/hg/gmp/rev/f2549c92f8d1
changeset: 15623:f2549c92f8d1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 20 00:54:25 2013 +0100
description:
ChangeLog

diffstat:

 ChangeLog                         |   3 +
 configure.ac                      |  47 ++++++++++++-----
 mpn/arm/v7a/cora15/copyd.asm      |  98 ---------------------------------------
 mpn/arm/v7a/cora15/copyi.asm      |  78 -------------------------------
 mpn/arm/v7a/cora15/neon/copyd.asm |  98 +++++++++++++++++++++++++++++++++++++++
 mpn/arm/v7a/cora15/neon/copyi.asm |  78 +++++++++++++++++++++++++++++++
 6 files changed, 211 insertions(+), 191 deletions(-)

diffs (truncated from 462 to 300 lines):

diff -r ba0d8ba83621 -r f2549c92f8d1 ChangeLog
--- a/ChangeLog	Wed Mar 20 00:23:49 2013 +0100
+++ b/ChangeLog	Wed Mar 20 00:54:25 2013 +0100
@@ -1,5 +1,8 @@
 2013-03-20  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/arm/v7a/cora15/neon/copyi.asm: Move from "..".
+	* mpn/arm/v7a/cora15/neon/copyd.asm: Likewise.
+
 	* config.guess: Tack on "neon" for appropriate arm CPUs.
 	* configure.ac (arm*-*-*): Recognise neon suffix for a8, a9, and a15.
 
diff -r ba0d8ba83621 -r f2549c92f8d1 configure.ac
--- a/configure.ac	Wed Mar 20 00:23:49 2013 +0100
+++ b/configure.ac	Wed Mar 20 00:54:25 2013 +0100
@@ -566,11 +566,15 @@
 
   arm*-*-*)
     gcc_cflags="$gcc_cflags $fomit_frame_pointer"
-    gcc_cflags_optlist="arch tune"
+    gcc_cflags_optlist="arch neon tune"
     gcc_cflags_maybe="-marm"
     gcc_testlist="gcc-arm-umodsi"
     GMP_INCLUDE_MPN(arm/arm-defs.m4)
     CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+
+    # FIXME: We make mandatory compiler options optional here.  We should
+    # either enforce them, or organise to strip paths as the corresponding
+    # options fail.
     case $host_cpu in
       armsa1 | armv4*)
 	;;
@@ -586,34 +590,47 @@
 	path="arm/v6t2 arm/v6 arm/v5 arm"
 	gcc_cflags_arch="-march=armv6"
 	;;
-      armcortexa9 | armcortexa9neon)
+      armcortexa5 | armv7a*)
+	path="arm/v6t2 arm/v6 arm/v5 arm"
+	gcc_cflags_arch="-march=armv7-a"
+	;;
+      armcortexa8)
+	path="arm/v6t2 arm/v6 arm/v5 arm"
+	gcc_cflags_arch="-march=armv7-a"
+	gcc_cflags_tune="-mtune=cortex-a8"
+	;;
+      armcortexa8neon)
+	path="arm/v6t2 arm/v6 arm/v5 arm/neon arm"
+	gcc_cflags_arch="-march=armv7-a"
+	gcc_cflags_neon="-mfpu=neon"
+	gcc_cflags_tune="-mtune=cortex-a8"
+	;;
+      armcortexa9)
 	path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
 	gcc_cflags_arch="-march=armv7-a"
 	gcc_cflags_tune="-mtune=cortex-a9"
 	;;
-      armcortexa15 | armcortexa15neon)
+      armcortexa9neon)
+	path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm/neon arm"
+	gcc_cflags_arch="-march=armv7-a"
+	gcc_cflags_neon="-mfpu=neon"
+	gcc_cflags_tune="-mtune=cortex-a9"
+	;;
+      armcortexa15)
 	path="arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
 	gcc_cflags_arch="-march=armv7-a"
 	gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
 	;;
-      armcortexa5 | armcortexa8 | armcortexa8neon | armv7a*)
-	path="arm/v6t2 arm/v6 arm/v5 arm"
+      armcortexa15neon)
+	path="arm/v7a/cora15/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm/neon arm"
 	gcc_cflags_arch="-march=armv7-a"
+	gcc_cflags_neon="-mfpu=neon"
+	gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
 	;;
       *)
 	path="arm"
 	;;
     esac
-
-    case $host_cpu in
-      *neon)
-	path2=""
-	for p in $path
-	  do path2="$path2 $p/neon $p"
-	  done
-	path="$path2"
-	;;
-    esac
     ;;
 
 
diff -r ba0d8ba83621 -r f2549c92f8d1 mpn/arm/v7a/cora15/copyd.asm
--- a/mpn/arm/v7a/cora15/copyd.asm	Wed Mar 20 00:23:49 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-dnl  ARM mpn_copyd.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.75		slower than core register code
-C Cortex-A15	 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
-	add	rp, rp, n, lsl #2
-	add	up, up, n, lsl #2
-
-	cmp	n, #7
-	ble	L(bc)
-
-C Copy until rp is 128-bit aligned
-	tst	rp, #4
-	beq	L(al1)
-	sub	up, up, #4
-	vld1.32	{d22[0]}, [up]
-	sub	n, n, #1
-	sub	rp, rp, #4
-	vst1.32	{d22[0]}, [rp]
-L(al1):	tst	rp, #8
-	beq	L(al2)
-	sub	up, up, #8
-	vld1.32	{d22}, [up]
-	sub	n, n, #2
-	sub	rp, rp, #8
-	vst1.32	{d22}, [rp:64]
-L(al2):	sub	up, up, #16
-	vld1.32	{d26-d27}, [up]
-	subs	n, n, #12
-	sub	rp, rp, #16			C offset rp for loop
-	blt	L(end)
-
-	sub	up, up, #16			C offset up for loop
-	mov	r12, #-16
-
-	ALIGN(16)
-L(top):	vld1.32	{d22-d23}, [up], r12
-	vst1.32	{d26-d27}, [rp:128], r12
-	vld1.32	{d26-d27}, [up], r12
-	vst1.32	{d22-d23}, [rp:128], r12
-	subs	n, n, #8
-	bge	L(top)
-
-	add	up, up, #16			C undo up offset
-						C rp offset undoing folded
-L(end):	vst1.32	{d26-d27}, [rp:128]
-
-C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc):	tst	n, #4
-	beq	L(tl1)
-	sub	up, up, #16
-	vld1.32	{d22-d23}, [up]
-	sub	rp, rp, #16
-	vst1.32	{d22-d23}, [rp]
-L(tl1):	tst	n, #2
-	beq	L(tl2)
-	sub	up, up, #8
-	vld1.32	{d22}, [up]
-	sub	rp, rp, #8
-	vst1.32	{d22}, [rp]
-L(tl2):	tst	n, #1
-	beq	L(tl3)
-	sub	up, up, #4
-	vld1.32	{d22[0]}, [up]
-	sub	rp, rp, #4
-	vst1.32	{d22[0]}, [rp]
-L(tl3):	bx	lr
-EPILOGUE()
diff -r ba0d8ba83621 -r f2549c92f8d1 mpn/arm/v7a/cora15/copyi.asm
--- a/mpn/arm/v7a/cora15/copyi.asm	Wed Mar 20 00:23:49 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-dnl  ARM mpn_copyi.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.75		slower than core register code
-C Cortex-A15	 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
-	cmp	n, #7
-	ble	L(bc)
-
-C Copy until rp is 128-bit aligned
-	tst	rp, #4
-	beq	L(al1)
-	vld1.32	{d22[0]}, [up]!
-	sub	n, n, #1
-	vst1.32	{d22[0]}, [rp]!
-L(al1):	tst	rp, #8
-	beq	L(al2)
-	vld1.32	{d22}, [up]!
-	sub	n, n, #2
-	vst1.32	{d22}, [rp:64]!
-L(al2):	vld1.32	{d26-d27}, [up]!
-	subs	n, n, #12
-	blt	L(end)
-
-	ALIGN(16)
-L(top):	vld1.32	{d22-d23}, [up]!
-	vst1.32	{d26-d27}, [rp:128]!
-	vld1.32	{d26-d27}, [up]!
-	vst1.32	{d22-d23}, [rp:128]!
-	subs	n, n, #8
-	bge	L(top)
-
-L(end):	vst1.32	{d26-d27}, [rp:128]!
-
-C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc):	tst	n, #4
-	beq	L(tl1)
-	vld1.32	{d22-d23}, [up]!
-	vst1.32	{d22-d23}, [rp]!
-L(tl1):	tst	n, #2
-	beq	L(tl2)
-	vld1.32	{d22}, [up]!
-	vst1.32	{d22}, [rp]!
-L(tl2):	tst	n, #1
-	beq	L(tl3)
-	vld1.32	{d22[0]}, [up]!
-	vst1.32	{d22[0]}, [rp]!
-L(tl3):	bx	lr
-EPILOGUE()
diff -r ba0d8ba83621 -r f2549c92f8d1 mpn/arm/v7a/cora15/neon/copyd.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/neon/copyd.asm	Wed Mar 20 00:54:25 2013 +0100
@@ -0,0 +1,98 @@
+dnl  ARM mpn_copyd.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.


More information about the gmp-commit mailing list