[PATCH] Add 64-bit sparc multiply routines for T3 and later.
David Miller
davem at davemloft.net
Thu Jan 3 11:11:47 CET 2013
2013-01-02 David S. Miller <davem at davemloft.net>
* config.guess: Recognize UltraSparc T4 under Linux.
* configure.ac: Add sparc64/ultrasparct3 to path_64 when T3 or T4. Append
-xarch=v8plusd or -xarch=v9d to command line, as needed.
* configure: Regenerate.
* mpn/sparc64/ultrasparct3/mul_1.asm: New file.
* mpn/sparc64/ultrasparct3/addmul_1.asm: New file.
* mpn/sparc64/ultrasparct3/submul_1.asm: New file.
diff --git a/config.guess b/config.guess
index 2832cdb..3cedfd3 100755
--- a/config.guess
+++ b/config.guess
@@ -573,6 +573,8 @@ sparc-*-* | sparc64-*-*)
elif grep 'cpu.*MB86904' /proc/cpuinfo >/dev/null; then
# actually MicroSPARC-II
exact_cpu=microsparc
+ elif grep 'cpu.*UltraSparc T4' /proc/cpuinfo >/dev/null; then
+ exact_cpu="ultrasparct4"
elif grep 'cpu.*UltraSparc T3' /proc/cpuinfo >/dev/null; then
exact_cpu="ultrasparct3"
elif grep 'cpu.*UltraSparc T2' /proc/cpuinfo >/dev/null; then
diff --git a/configure b/configure
index 3ac2bb7..e83a3a0 100755
--- a/configure
+++ b/configure
@@ -4861,6 +4861,8 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
# abilist="64" only.
#
case $host_cpu in
+ ultrasparct[34])
+ gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*)
gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
esac
@@ -4901,6 +4903,7 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
case $host_cpu in
sparcv8 | microsparc | supersparc | turbosparc)
cc_cflags_arch="-xarch=v8" ;;
+ ultrasparct[34]) cc_cflags_arch="-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*) cc_cflags_arch="-xarch=v8plus" ;;
*) cc_cflags_arch="-xarch=v7" ;;
esac
@@ -4957,8 +4960,10 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
path_64="sparc64/ultrasparc1234 sparc64" ;;
ultrasparc[34])
path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
- ultrasparct[1234])
+ ultrasparct[12])
path_64="sparc64/ultrasparct1 sparc64" ;;
+ ultrasparct[34])
+ path_64="sparc64/ultrasparct3 sparc64/ultrasparct1 sparc64" ;;
*)
path_64="sparc64"
esac
@@ -4981,6 +4986,11 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
gcc_64_ldflags="-Wc,-m64"
gcc_64_cflags_optlist="cpu"
+ case $host_cpu in
+ ultrasparct[34])
+ gcc_64_cflags="$gcc_64_cflags -Wa,-xarch=v9d" ;;
+ esac
+
case $host in
*-*-solaris*)
# Sun cc.
@@ -4990,8 +5000,13 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
# -fast is documented as miscompiling things for the sake of speed.
#
cclist_64="$cclist_64 cc"
- cc_64_cflags="-xO3 -xarch=v9"
cc_64_cflags_optlist="cpu"
+ case $host_cpu in
+ ultrasparct[34])
+ cc_64_cflags="$cc_64_cflags -xO3 -xarch=v9d" ;;
+ *)
+ cc_64_cflags="-xO3 -xarch=v9" ;;
+ esac
;;
esac
diff --git a/configure.ac b/configure.ac
index 9425f60..53dd518 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1279,6 +1279,8 @@ case $host in
# abilist="64" only.
#
case $host_cpu in
+ [ultrasparct[34]])
+ gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*)
gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
esac
@@ -1319,6 +1321,7 @@ case $host in
case $host_cpu in
sparcv8 | microsparc | supersparc | turbosparc)
cc_cflags_arch="-xarch=v8" ;;
+ [ultrasparct[34]]) cc_cflags_arch="-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*) cc_cflags_arch="-xarch=v8plus" ;;
*) cc_cflags_arch="-xarch=v7" ;;
esac
@@ -1375,8 +1378,10 @@ case $host in
path_64="sparc64/ultrasparc1234 sparc64" ;;
[ultrasparc[34]])
path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
- [ultrasparct[1234]])
+ [ultrasparct[12]])
path_64="sparc64/ultrasparct1 sparc64" ;;
+ [ultrasparct[34]])
+ path_64="sparc64/ultrasparct3 sparc64/ultrasparct1 sparc64" ;;
*)
path_64="sparc64"
esac
@@ -1399,6 +1404,11 @@ case $host in
gcc_64_ldflags="-Wc,-m64"
gcc_64_cflags_optlist="cpu"
+ case $host_cpu in
+ [ultrasparct[34]])
+ gcc_64_cflags="$gcc_64_cflags -Wa,-xarch=v9d" ;;
+ esac
+
case $host in
*-*-solaris*)
# Sun cc.
@@ -1408,8 +1418,13 @@ case $host in
# -fast is documented as miscompiling things for the sake of speed.
#
cclist_64="$cclist_64 cc"
- cc_64_cflags="-xO3 -xarch=v9"
cc_64_cflags_optlist="cpu"
+ case $host_cpu in
+ [ultrasparct[34]])
+ cc_64_cflags="$cc_64_cflags -xO3 -xarch=v9d" ;;
+ *)
+ cc_64_cflags="-xO3 -xarch=v9" ;;
+ esac
;;
esac
diff --git a/mpn/sparc64/ultrasparct3/addmul_1.asm b/mpn/sparc64/ultrasparct3/addmul_1.asm
new file mode 100644
index 0000000..45daaa0
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/addmul_1.asm
@@ -0,0 +1,83 @@
+dnl SPARC v9 mpn_addmul_1 for T3/T4.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 29.5
+C UltraSPARC T4: 6.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+ ALIGN(32)
+PROLOGUE(mpn_addmul_1)
+ save %sp, -176, %sp
+ subcc n, 1, n
+ be L(final_one)
+ clr %o5
+
+L(top):
+ ldx [up+0], %l0
+ ldx [rp+0], %l2
+ ldx [up+8], %l1
+ ldx [rp+8], %l3
+ mulx %l0, v0, %o0
+ add up, 16, up
+ umulxhi %l0, v0, %o1
+ add rp, 16, rp
+ mulx %l1, v0, %o2
+ sub n, 2, n
+ umulxhi %l1, v0, %o3
+ addcc %o5, %o0, %o0
+ addxc %g0, %o1, %o5
+ addcc %l2, %o0, %o0
+ addxc %g0, %o5, %o5
+ stx %o0, [rp-16]
+ addcc %o5, %o2, %o2
+ addxc %g0, %o3, %o5
+ addcc %l3, %o2, %o2
+ addxc %g0, %o5, %o5
+ brgz n, L(top)
+ stx %o2, [rp-8]
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ ldx [up+0], %l0
+ ldx [rp+0], %l2
+ mulx %l0, v0, %o0
+ umulxhi %l0, v0, %o1
+ addcc %o5, %o0, %o0
+ addxc %g0, %o1, %o5
+ addcc %l2, %o0, %o0
+ addxc %g0, %o5, %o5
+ stx %o0, [rp+0]
+
+L(done):
+ ret
+ restore %o5, 0, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct3/mul_1.asm b/mpn/sparc64/ultrasparct3/mul_1.asm
new file mode 100644
index 0000000..df52647
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/mul_1.asm
@@ -0,0 +1,73 @@
+dnl SPARC v9 mpn_mul_1 for T3/T4.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 27
+C UltraSPARC T4: 4
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`v0', `%o3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+ ALIGN(32)
+PROLOGUE(mpn_mul_1)
+ subcc n, 1, n
+ be L(final_one)
+ clr %o5
+
+L(top):
+ ldx [up+0], %g1
+ ldx [up+8], %o4
+ mulx %g1, v0, %g3
+ add up, 16, up
+ umulxhi %g1, v0, %g2
+ sub n, 2, n
+ mulx %o4, v0, %g1
+ add rp, 16, rp
+ umulxhi %o4, v0, %o4
+ addcc %o5, %g3, %g3
+ stx %g3, [rp-16]
+ addxc %g0, %g2, %o5
+ addcc %o5, %g1, %g1
+ addxc %g0, %o4, %o5
+ brgz n, L(top)
+ stx %g1, [rp-8]
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ ldx [up+0], %g1
+ mulx %g1, v0, %g3
+ umulxhi %g1, v0, %g2
+ addcc %o5, %g3, %g3
+ addxc %g0, %g2, %o5
+ stx %g3, [rp+0]
+
+L(done):
+ retl
+ mov %o5, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct3/submul_1.asm b/mpn/sparc64/ultrasparct3/submul_1.asm
new file mode 100644
index 0000000..fb7abb2
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/submul_1.asm
@@ -0,0 +1,83 @@
+dnl SPARC v9 mpn_submul_1 for T3/T4.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 29.5
+C UltraSPARC T4: 6.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+ ALIGN(32)
+PROLOGUE(mpn_submul_1)
+ save %sp, -176, %sp
+ subcc n, 1, n
+ be L(final_one)
+ clr %o5
+
+L(top):
+ ldx [up+0], %l0
+ ldx [rp+0], %l2
+ ldx [up+8], %l1
+ ldx [rp+8], %l3
+ mulx %l0, v0, %o0
+ add up, 16, up
+ umulxhi %l0, v0, %o1
+ add rp, 16, rp
+ mulx %l1, v0, %o2
+ sub n, 2, n
+ umulxhi %l1, v0, %o3
+ addcc %o5, %o0, %o0
+ addxc %g0, %o1, %o5
+ subcc %l2, %o0, %o0
+ addxc %g0, %o5, %o5
+ stx %o0, [rp-16]
+ addcc %o5, %o2, %o2
+ addxc %g0, %o3, %o5
+ subcc %l3, %o2, %o2
+ addxc %g0, %o5, %o5
+ brgz n, L(top)
+ stx %o2, [rp-8]
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ ldx [up+0], %l0
+ ldx [rp+0], %l2
+ mulx %l0, v0, %o0
+ umulxhi %l0, v0, %o1
+ addcc %o5, %o0, %o0
+ addxc %g0, %o1, %o5
+ subcc %l2, %o0, %o0
+ addxc %g0, %o5, %o5
+ stx %o0, [rp+0]
+
+L(done):
+ ret
+ restore %o5, 0, %o0
+EPILOGUE()
--
1.7.10.4
More information about the gmp-devel
mailing list