[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Oct 16 23:50:15 CEST 2011
details: /var/hg/gmp/rev/3c8490c6318b
changeset: 14348:3c8490c6318b
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 23:41:35 2011 +0200
description:
Further s390 improvements.
details: /var/hg/gmp/rev/fd708d43ef7e
changeset: 14349:fd708d43ef7e
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 23:48:42 2011 +0200
description:
New file.
details: /var/hg/gmp/rev/33895d7848dc
changeset: 14350:33895d7848dc
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 23:48:55 2011 +0200
description:
New file.
details: /var/hg/gmp/rev/571eabed4de3
changeset: 14351:571eabed4de3
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 23:49:01 2011 +0200
description:
New file.
details: /var/hg/gmp/rev/7b6f108f792f
changeset: 14352:7b6f108f792f
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Oct 16 23:49:49 2011 +0200
description:
*** empty log message ***
diffstat:
ChangeLog | 5 +
configure.in | 11 ++-
mpn/s390_32/copyd.asm | 129 +++++++++++++++++++++++++++++++++
mpn/s390_32/copyi.asm | 54 +++++++++++++
mpn/s390_32/esame/sqr_diag_addlsh1.asm | 73 ++++++++++++++++++
5 files changed, 271 insertions(+), 1 deletions(-)
diffs (truncated from 338 to 300 lines):
diff -r 765aaa80da2c -r 7b6f108f792f ChangeLog
--- a/ChangeLog Sun Oct 16 18:48:31 2011 +0200
+++ b/ChangeLog Sun Oct 16 23:49:49 2011 +0200
@@ -1,5 +1,10 @@
2011-10-16 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/s390_32/esame/sqr_diag_addlsh1.asm: New file.
+
+ * mpn/s390_32/copyi.asm: New file.
+ * mpn/s390_32/copyd.asm: New file.
+
* mpn/s390_64/copyd.asm: Optimise.
* mpn/s390_64/copyi.asm: Rewrite along the lines of glibc memcpy.
diff -r 765aaa80da2c -r 7b6f108f792f configure.in
--- a/configure.in Sun Oct 16 18:48:31 2011 +0200
+++ b/configure.in Sun Oct 16 23:49:49 2011 +0200
@@ -1139,6 +1139,8 @@
path="s390_32/esame/$cpu s390_32/esame s390_32"
gcc_cflags_arch="-march=$gccarch"
AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
;;
z990 | z990esa)
cpu="z990"
@@ -1146,6 +1148,8 @@
path="s390_32/esame/$cpu s390_32/esame s390_32"
gcc_cflags_arch="-march=$gccarch"
AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
;;
z9 | z9esa)
cpu="z9"
@@ -1153,6 +1157,8 @@
path="s390_32/esame/$cpu s390_32/esame s390_32"
gcc_cflags_arch="-march=$gccarch"
AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
;;
z10 | z10esa)
cpu="z10"
@@ -1160,6 +1166,8 @@
path="s390_32/esame/$cpu s390_32/esame s390_32"
gcc_cflags_arch="-march=$gccarch"
AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
;;
z196 | z196esa)
cpu="z196"
@@ -1167,6 +1175,8 @@
path="s390_32/esame/$cpu s390_32/esame s390_32"
gcc_cflags_arch="-march=$gccarch"
AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
;;
esac
@@ -1179,7 +1189,6 @@
gcc_32_cflags="$gcc_cflags -m31"
path_64="s390_64/$host_cpu s390_64"
extra_functions=""
- AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
;;
esac
;;
diff -r 765aaa80da2c -r 7b6f108f792f mpn/s390_32/copyd.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_32/copyd.asm Sun Oct 16 23:49:49 2011 +0200
@@ -0,0 +1,129 @@
+dnl S/390-32 mpn_copyd
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C z990 1.125
+
+C FIXME:
+C * Avoid saving/restoring callee-saves registers for n < 3. This could be
+C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
+C We could then use r3...r10 in main loop.
+
+C INPUT PARAMETERS
+define(`rp_param', `%r2')
+define(`up_param', `%r3')
+define(`n', `%r4')
+
+define(`rp', `%r8')
+define(`up', `%r9')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ stm %r6, %r11, 24(%r15)
+
+ lr %r1, n
+ sll %r1, 2
+ la %r10, 8(n)
+ ahi %r1, -32
+ srl %r10, 3
+ lhi %r11, -32
+
+ la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later
+ la up, 0(%r1,up_param) C FIXME use lay on z990 and later
+
+ lhi %r7, 7
+ nr %r7, n C n mod 8
+ chi %r7, 2
+ jh L(b34567)
+ chi %r7, 1
+ je L(b1)
+ jh L(b2)
+
+L(b0): brct %r10, L(top)
+ j L(end)
+
+L(b1): l %r0, 28(up)
+ ahi up, -4
+ st %r0, 28(rp)
+ ahi rp, -4
+ brct %r10, L(top)
+ j L(end)
+
+L(b2): lm %r0, %r1, 24(up)
+ ahi up, -8
+ stm %r0, %r1, 24(rp)
+ ahi rp, -8
+ brct %r10, L(top)
+ j L(end)
+
+L(b34567):
+ chi %r7, 4
+ jl L(b3)
+ je L(b4)
+ chi %r7, 6
+ je L(b6)
+ jh L(b7)
+
+L(b5): lm %r0, %r4, 12(up)
+ ahi up, -20
+ stm %r0, %r4, 12(rp)
+ ahi rp, -20
+ brct %r10, L(top)
+ j L(end)
+
+L(b3): lm %r0, %r2, 20(up)
+ ahi up, -12
+ stm %r0, %r2, 20(rp)
+ ahi rp, -12
+ brct %r10, L(top)
+ j L(end)
+
+L(b4): lm %r0, %r3, 16(up)
+ ahi up, -16
+ stm %r0, %r3, 16(rp)
+ ahi rp, -16
+ brct %r10, L(top)
+ j L(end)
+
+L(b6): lm %r0, %r5, 8(up)
+ ahi up, -24
+ stm %r0, %r5, 8(rp)
+ ahi rp, -24
+ brct %r10, L(top)
+ j L(end)
+
+L(b7): lm %r0, %r6, 4(up)
+ ahi up, -28
+ stm %r0, %r6, 4(rp)
+ ahi rp, -28
+ brct %r10, L(top)
+ j L(end)
+
+L(top): lm %r0, %r7, 0(up)
+ la up, 0(%r11,up)
+ stm %r0, %r7, 0(rp)
+ la rp, 0(%r11,rp)
+ brct %r10, L(top)
+
+L(end): lm %r6, %r11, 24(%r15)
+ br %r14
+EPILOGUE()
diff -r 765aaa80da2c -r 7b6f108f792f mpn/s390_32/copyi.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_32/copyi.asm Sun Oct 16 23:49:49 2011 +0200
@@ -0,0 +1,54 @@
+dnl S/390-32 mpn_copyi
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C z990 0.375
+
+C NOTE
+C * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ ltr %r4, %r4
+ sll %r4, 2
+ je L(rtn)
+ ahi %r4, -1
+ lr %r5, %r4
+ srl %r5, 8
+ ltr %r5, %r5 C < 256 bytes to copy?
+ je L(1)
+
+L(top): mvc 0(256, rp), 0(up)
+ la rp, 256(rp)
+ la up, 256(up)
+ brct %r5, L(top)
+
+L(1): bras %r5, L(2) C make r5 point to mvc insn
+ mvc 0(1, rp), 0(up)
+L(2): ex %r4, 0(%r5) C execute mvc with length ((n-1) mod 256)+1
+L(rtn): br %r14
+EPILOGUE()
diff -r 765aaa80da2c -r 7b6f108f792f mpn/s390_32/esame/sqr_diag_addlsh1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_32/esame/sqr_diag_addlsh1.asm Sun Oct 16 23:49:49 2011 +0200
@@ -0,0 +1,73 @@
+dnl S/390-32 mpn_sqr_diag_addlsh1
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z990 8
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`tp', `%r3')
+define(`up', `%r4')
+define(`n', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+ stm %r6, %r11, 24(%r15)
+
+ lhi %r11, -1 C save non-carry state
More information about the gmp-commit
mailing list