[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Apr 15 21:56:30 CEST 2013


details:   /var/hg/gmp/rev/149b7cb35a5a
changeset: 15719:149b7cb35a5a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 15 21:32:51 2013 +0200
description:
Add cycle counts.

details:   /var/hg/gmp/rev/b30bd9ea2c13
changeset: 15720:b30bd9ea2c13
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 15 21:41:12 2013 +0200
description:
Add an x86/mmx tabselect.

details:   /var/hg/gmp/rev/a4bffd7a3cc0
changeset: 15721:a4bffd7a3cc0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 15 21:41:52 2013 +0200
description:
Add an x86/mmx tabselect.

details:   /var/hg/gmp/rev/0b90b31bd2b7
changeset: 15722:0b90b31bd2b7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 15 21:53:23 2013 +0200
description:
Add generic sparc64 tabselect.

diffstat:

 ChangeLog                   |    5 +
 configure.ac                |   46 ++++++------
 mpn/powerpc64/tabselect.asm |    2 +-
 mpn/sparc64/tabselect.asm   |  151 +++++++++++++++++++++++++++++++++++++++++++
 mpn/x86/mmx/tabselect.asm   |  152 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 332 insertions(+), 24 deletions(-)

diffs (truncated from 505 to 300 lines):

diff -r 827551142828 -r 0b90b31bd2b7 ChangeLog
--- a/ChangeLog	Mon Apr 15 21:32:16 2013 +0200
+++ b/ChangeLog	Mon Apr 15 21:53:23 2013 +0200
@@ -1,5 +1,10 @@
 2013-04-15  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/sparc64/tabselect.asm: New file.
+
+	* mpn/x86/mmx/tabselect.asm: New file.
+	* configure.ac (x86): Add x86/mmx to path for relevant CPUs.
+
 	* mpn/sparc64/gcd_1.asm: Use rdpc for PIC.
 	* mpn/sparc64/ultrasparct3/mode1o.asm: Use rdpc for PIC.
 	* mpn/sparc64/ultrasparct3/dive_1.asm: Use rdpc for PIC.
diff -r 827551142828 -r 0b90b31bd2b7 configure.ac
--- a/configure.ac	Mon Apr 15 21:32:16 2013 +0200
+++ b/configure.ac	Mon Apr 15 21:53:23 2013 +0200
@@ -1572,7 +1572,7 @@
       pentiummmx)
 	gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
 	gcc_cflags_arch="-march=pentium-mmx -march=pentium"
-	path="x86/pentium/mmx x86/pentium x86"
+	path="x86/pentium/mmx x86/pentium x86/mmx x86"
 	;;
       i686 | pentiumpro)
 	gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
@@ -1582,37 +1582,37 @@
       pentium2)
 	gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
-	path="x86/p6/mmx x86/p6 x86"
+	path="x86/p6/mmx x86/p6 x86/mmx x86"
 	;;
       pentium3)
 	gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
-	path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	;;
       pentiumm)
 	gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
-	path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	;;
       k6)
 	gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=k6"
-	path="x86/k6/mmx x86/k6 x86"
+	path="x86/k6/mmx x86/k6 x86/mmx x86"
 	;;
       k62)
 	gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=k6-2 -march=k6"
-	path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+	path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86/mmx x86"
 	;;
       k63)
 	gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=k6-3 -march=k6"
-	path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+	path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86/mmx x86"
 	;;
       geode)
 	gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=k6-3 -march=k6"
-	path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+	path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86/mmx x86"
 	;;
       athlon)
 	# Athlon instruction costs are close to P6 (3 cycle load latency,
@@ -1620,7 +1620,7 @@
 	# know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
 	gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
-	path="x86/k7/mmx x86/k7 x86"
+	path="x86/k7/mmx x86/k7 x86/mmx x86"
 	;;
       i786 | pentium4)
 	# pentiumpro is the primary fallback when gcc doesn't know pentium4.
@@ -1630,7 +1630,7 @@
 	gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
 	gcc_64_cflags_cpu="-mtune=nocona"
-	path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
+	path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86/mmx x86"
 	path_64="x86_64/pentium4 x86_64"
 	;;
       viac32)
@@ -1638,79 +1638,79 @@
 	# c3-2 has sse and mmx, so pentium3 is good for -march.
 	gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
-	path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	;;
       viac3*)
 	# Not sure of the best fallbacks here.
 	gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
-	path="x86/pentium/mmx x86/pentium x86"
+	path="x86/pentium/mmx x86/pentium x86/mmx x86"
 	;;
       athlon64 | k8 | x86_64)
 	gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
 	gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
-	path="x86/k8 x86/k7/mmx x86/k7 x86"
+	path="x86/k8 x86/k7/mmx x86/k7 x86/mmx x86"
 	path_64="x86_64/k8 x86_64"
 	;;
       k10)
 	gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
 	gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
-	path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86"
+	path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86/mmx x86"
 	path_64="x86_64/k10 x86_64/k8 x86_64"
 	;;
       bobcat)
 	gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
 	gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
-	path="x86/bobcat x86/k7/mmx x86/k7 x86"
+	path="x86/bobcat x86/k7/mmx x86/k7 x86/mmx x86"
 	path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
 	;;
       bulldozer | bd1)
 	gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
 	gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
-	path="x86/bd1 x86/k7/mmx x86/k7 x86"
+	path="x86/bd1 x86/k7/mmx x86/k7 x86/mmx x86"
 	path_64="x86_64/bd1 x86_64/k10 x86_64/k8 x86_64"
 	;;
       core2)
 	gcc_cflags_cpu="-mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/core2 x86_64"
        ;;
       corei | coreinhm | coreiwsm)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/coreinhm x86_64/core2 x86_64"
 	;;
       coreisbr)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	;;
       coreihwl)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	;;
       coreibwl)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
 	path_64="x86_64/mulx/adx x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	extra_functions_64="missing"	     # FIXME: remove when qemu's adx flags handling works
 	;;
       atom)
 	gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
 	gcc_cflags_arch="-march=atom -march=pentium3"
-	path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
+	path="x86/atom/sse2 x86/atom/mmx x86/atom x86/mmx x86"
 	path_64="x86_64/atom x86_64"
 	;;
       nano)
 	gcc_cflags_cpu="-mtune=nano"
 	gcc_cflags_arch="-march=nano"
-	path="x86/nano x86"
+	path="x86/nano x86/mmx x86"
 	path_64="x86_64/nano x86_64"
 	;;
       *)
diff -r 827551142828 -r 0b90b31bd2b7 mpn/powerpc64/tabselect.asm
--- a/mpn/powerpc64/tabselect.asm	Mon Apr 15 21:32:16 2013 +0200
+++ b/mpn/powerpc64/tabselect.asm	Mon Apr 15 21:53:23 2013 +0200
@@ -22,7 +22,7 @@
 include(`../config.m4')
 
 C                   cycles/limb
-C POWER3/PPC630		 ?
+C POWER3/PPC630		 1.75
 C POWER4/PPC970		 2.0
 C POWER5		 ?
 C POWER6		 5.0
diff -r 827551142828 -r 0b90b31bd2b7 mpn/sparc64/tabselect.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/tabselect.asm	Mon Apr 15 21:53:23 2013 +0200
@@ -0,0 +1,151 @@
+dnl  SPARC v9 mpn_tabselect.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund and David Miller.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C UltraSPARC 1&2:	 2 hopefully
+C UltraSPARC 3:		 3
+C UltraSPARC T1:	17
+C UltraSPARC T3:	 ?
+C UltraSPARC T4/T5:	 2.25 hopefully
+
+C INPUT PARAMETERS
+define(`rp',     `%i0')
+define(`tp',     `%i1')
+define(`n',      `%i2')
+define(`nents',  `%i3')
+define(`which',  `%i4')
+
+define(`i',      `%g1')
+define(`j',      `%g3')
+define(`stride', `%g4')
+define(`tporig', `%g5')
+define(`mask',   `%o0')
+
+define(`data0',  `%l0')
+define(`data1',  `%l1')
+define(`data2',  `%l2')
+define(`data3',  `%l3')
+define(`t0',     `%l4')
+define(`t1',     `%l5')
+define(`t2',     `%l6')
+define(`t3',     `%l7')
+
+ASM_START()
+	REGISTER(%g2,#scratch)
+	REGISTER(%g3,#scratch)
+PROLOGUE(mpn_tabselect)
+	save	%sp, -176, %sp
+
+	sllx	n, 3, stride
+	sub	n, 4, j
+	brlz	j, L(outer_end)
+	 mov	tp, tporig
+
+L(outer_loop):
+	clr	data0
+	clr	data1
+	clr	data2
+	clr	data3
+	mov	tporig, tp
+	mov	nents, i
+	mov	which, %o1
+
+L(top):	subcc	%o1, 1, %o1		C set carry iff o1 = 0
+	ldx	[tp + 0], t0
+	subc	%g0, %g0, mask
+	ldx	[tp + 8], t1
+	sub	i, 1, i
+	ldx	[tp + 16], t2
+	ldx	[tp + 24], t3
+	add	tp, stride, tp
+	and	t0, mask, t0
+	and	t1, mask, t1
+	or	t0, data0, data0
+	and	t2, mask, t2
+	or	t1, data1, data1
+	and	t3, mask, t3
+	or	t2, data2, data2
+	brnz	i, L(top)
+	 or	t3, data3, data3
+
+	stx	data0, [rp + 0]
+	subcc	j, 4, j
+	stx	data1, [rp + 8]
+	stx	data2, [rp + 16]
+	stx	data3, [rp + 24]
+	add	tporig, (4 * 8), tporig
+
+	brgez	j, L(outer_loop)
+	 add	rp, (4 * 8), rp
+L(outer_end):
+
+


More information about the gmp-commit mailing list