[PATCH 1/2] Add 64-bit sparc multiply routines for T3 and later.
David Miller
davem at davemloft.net
Wed Mar 6 00:28:13 CET 2013
This is a respin of patch #2 from last night, it incorporates all of
the improvements either explicitly or implicitly suggested :-)
Torbjorn, I'm leaving out the configure regeneration from the patch,
so that the patch is not so large, since I'm pretty sure you're going
to regenerate it yourself. Let me know if this is not acceptable. :-)
* config.guess: Recognize UltraSparc T4 under Linux.
* configure.ac: Add sparc64/ultrasparct3 to path_64 when T3 or T4. Append
-xarch=v8plusd or -xarch=v9d to command line, as needed.
* configure: Regenerate.
* mpn/sparc64/ultrasparct3/mul_1.asm: New file.
* mpn/sparc64/ultrasparct3/addmul_1.asm: New file.
* mpn/sparc64/ultrasparct3/submul_1.asm: New file.
---
diff --git a/config.guess b/config.guess
index 7147bf6..bcd1d89 100755
--- a/config.guess
+++ b/config.guess
@@ -581,6 +581,8 @@ sparc-*-* | sparc64-*-*)
elif grep 'cpu.*MB86904' /proc/cpuinfo >/dev/null; then
# actually MicroSPARC-II
exact_cpu=microsparc
+ elif grep 'cpu.*UltraSparc T4' /proc/cpuinfo >/dev/null; then
+ exact_cpu="ultrasparct4"
elif grep 'cpu.*UltraSparc T3' /proc/cpuinfo >/dev/null; then
exact_cpu="ultrasparct3"
elif grep 'cpu.*UltraSparc T2' /proc/cpuinfo >/dev/null; then
diff --git a/configure.ac b/configure.ac
index 189363f..3559056 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1284,6 +1284,8 @@ case $host in
# abilist="64" only.
#
case $host_cpu in
+ [ultrasparct[34]])
+ gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*)
gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
esac
@@ -1324,6 +1326,7 @@ case $host in
case $host_cpu in
sparcv8 | microsparc | supersparc | turbosparc)
cc_cflags_arch="-xarch=v8" ;;
+ [ultrasparct[34]]) cc_cflags_arch="-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*) cc_cflags_arch="-xarch=v8plus" ;;
*) cc_cflags_arch="-xarch=v7" ;;
esac
@@ -1380,8 +1383,10 @@ case $host in
path_64="sparc64/ultrasparc1234 sparc64" ;;
[ultrasparc[34]])
path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
- [ultrasparct[1234]])
+ [ultrasparct[12]])
path_64="sparc64/ultrasparct1 sparc64" ;;
+ [ultrasparct[34]])
+ path_64="sparc64/ultrasparct3 sparc64/ultrasparct1 sparc64" ;;
*)
path_64="sparc64"
esac
@@ -1404,6 +1409,11 @@ case $host in
gcc_64_ldflags="-Wc,-m64"
gcc_64_cflags_optlist="cpu"
+ case $host_cpu in
+ [ultrasparct[34]])
+ gcc_64_cflags="$gcc_64_cflags -Wa,-xarch=v9d" ;;
+ esac
+
case $host in
*-*-solaris*)
# Sun cc.
@@ -1413,8 +1423,13 @@ case $host in
# -fast is documented as miscompiling things for the sake of speed.
#
cclist_64="$cclist_64 cc"
- cc_64_cflags="-xO3 -xarch=v9"
cc_64_cflags_optlist="cpu"
+ case $host_cpu in
+ [ultrasparct[34]])
+ cc_64_cflags="$cc_64_cflags -xO3 -xarch=v9d" ;;
+ *)
+ cc_64_cflags="-xO3 -xarch=v9" ;;
+ esac
;;
esac
diff --git a/mpn/sparc64/ultrasparct3/addmul_1.asm b/mpn/sparc64/ultrasparct3/addmul_1.asm
new file mode 100644
index 0000000..98a9fe4
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/addmul_1.asm
@@ -0,0 +1,80 @@
+dnl SPARC v9 mpn_addmul_1 for T3/T4.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 28
+C UltraSPARC T4: 5.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+ ALIGN(32)
+PROLOGUE(mpn_addmul_1)
+ save %sp, -176, %sp
+ subcc n, 1, n
+ be L(final_one)
+ subcc %g0, %g0, %o5
+
+L(top):
+ ldx [up+0], %l0
+ ldx [up+8], %l1
+ ldx [rp+0], %l2
+ ldx [rp+8], %l3
+ mulx %l0, v0, %o0
+ add up, 16, up
+ umulxhi %l0, v0, %o1
+ add rp, 16, rp
+ mulx %l1, v0, %o2
+ sub n, 2, n
+ umulxhi %l1, v0, %o3
+ addxccc %o0, %o5, %o0
+ addxccc %o2, %o1, %o2
+ addxc %g0, %o3, %o5
+ addcc %o0, %l2, %o0
+ stx %o0, [rp-16]
+ addxccc %o2, %l3, %o2
+ brgz n, L(top)
+ stx %o2, [rp-8]
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ ldx [up+0], %l0
+ ldx [rp+0], %l2
+ mulx %l0, v0, %o0
+ umulxhi %l0, v0, %o1
+ addxccc %o5, %o0, %o0
+ addxc %g0, %o1, %o5
+ addcc %l2, %o0, %o0
+ stx %o0, [rp+0]
+
+L(done):
+ addxc %g0, %o5, %o5
+ ret
+ restore %o5, 0, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct3/mul_1.asm b/mpn/sparc64/ultrasparct3/mul_1.asm
new file mode 100644
index 0000000..55cb8d8
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/mul_1.asm
@@ -0,0 +1,70 @@
+dnl SPARC v9 mpn_mul_1 for T3/T4.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 25.5
+C UltraSPARC T4: 3.8
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`v0', `%o3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+ ALIGN(32)
+PROLOGUE(mpn_mul_1)
+ subcc n, 1, n
+ be L(final_one)
+ subcc %g0, %g0, %o5
+
+L(top):
+ ldx [up+0], %g1
+ sub n, 2, n
+ ldx [up+8], %o4
+ mulx %g1, v0, %g3
+ add up, 16, up
+ umulxhi %g1, v0, %g2
+ mulx %o4, v0, %g1
+ add rp, 16, rp
+ addxccc %g3, %o5, %g3
+ umulxhi %o4, v0, %o5
+ stx %g3, [rp-16]
+ addxccc %g1, %g2, %g1
+ brgz n, L(top)
+ stx %g1, [rp-8]
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ ldx [up+0], %g1
+ mulx %g1, v0, %g3
+ addxccc %g3, %o5, %g3
+ umulxhi %g1, v0, %o5
+ stx %g3, [rp+0]
+
+L(done):
+ retl
+ addxc %g0, %o5, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct3/submul_1.asm b/mpn/sparc64/ultrasparct3/submul_1.asm
new file mode 100644
index 0000000..fb7abb2
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/submul_1.asm
@@ -0,0 +1,83 @@
+dnl SPARC v9 mpn_submul_1 for T3/T4.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 29.5
+C UltraSPARC T4: 6.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+ ALIGN(32)
+PROLOGUE(mpn_submul_1)
+ save %sp, -176, %sp
+ subcc n, 1, n
+ be L(final_one)
+ clr %o5
+
+L(top):
+ ldx [up+0], %l0
+ ldx [rp+0], %l2
+ ldx [up+8], %l1
+ ldx [rp+8], %l3
+ mulx %l0, v0, %o0
+ add up, 16, up
+ umulxhi %l0, v0, %o1
+ add rp, 16, rp
+ mulx %l1, v0, %o2
+ sub n, 2, n
+ umulxhi %l1, v0, %o3
+ addcc %o5, %o0, %o0
+ addxc %g0, %o1, %o5
+ subcc %l2, %o0, %o0
+ addxc %g0, %o5, %o5
+ stx %o0, [rp-16]
+ addcc %o5, %o2, %o2
+ addxc %g0, %o3, %o5
+ subcc %l3, %o2, %o2
+ addxc %g0, %o5, %o5
+ brgz n, L(top)
+ stx %o2, [rp-8]
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ ldx [up+0], %l0
+ ldx [rp+0], %l2
+ mulx %l0, v0, %o0
+ umulxhi %l0, v0, %o1
+ addcc %o5, %o0, %o0
+ addxc %g0, %o1, %o5
+ subcc %l2, %o0, %o0
+ addxc %g0, %o5, %o5
+ stx %o0, [rp+0]
+
+L(done):
+ ret
+ restore %o5, 0, %o0
+EPILOGUE()
--
1.8.1.2
More information about the gmp-devel
mailing list