[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Aug 31 19:28:10 UTC 2014
details: /var/hg/gmp/rev/08e648f847bc
changeset: 16480:08e648f847bc
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Aug 30 23:47:16 2014 +0200
description:
Provide arm64 mod_34lsub1.
details: /var/hg/gmp/rev/dead77cd3b7c
changeset: 16481:dead77cd3b7c
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Aug 31 21:28:07 2014 +0200
description:
Provide arm64 lshift/rshift.
diffstat:
mpn/arm64/lshift.asm | 122 +++++++++++++++++++++++++++++++++++++++++++++
mpn/arm64/mod_34lsub1.asm | 123 ++++++++++++++++++++++++++++++++++++++++++++++
mpn/arm64/rshift.asm | 121 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 366 insertions(+), 0 deletions(-)
diffs (truncated from 378 to 300 lines):
diff -r 33ff2f5edbef -r dead77cd3b7c mpn/arm64/lshift.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/lshift.asm Sun Aug 31 21:28:07 2014 +0200
@@ -0,0 +1,122 @@
+dnl ARM64 mpn_lshift.
+
+dnl Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 ?
+C Cortex-A57 ?
+
+changecom(@&*$)
+
+define(`rp_arg', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`cnt', `x3')
+
+define(`rp', `x16')
+
+define(`tnc',`x8')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ add rp, rp_arg, n, lsl #3
+ add up, up, n, lsl #3
+ sub tnc, xzr, cnt
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x4, [up,#-8]
+ tbnz n, #1, L(b11)
+
+L(b01): lsr x0, x4, tnc
+ lsl x18, x4, cnt
+ sub n, n, #1
+ cbnz n, L(gt1)
+ str x18, [rp,#-8]
+ ret
+L(gt1): ldp x4, x5, [up,#-24]
+ add up, up, #-8
+ add rp, rp, #16
+ b L(lo2)
+
+L(b11): lsr x0, x4, tnc
+ lsl x9, x4, cnt
+ ldp x6, x7, [up,#-24]
+ add n, n, #1
+ add up, up, #8
+ add rp, rp, #32
+ b L(lo0)
+
+L(bx0): ldp x4, x5, [up,#-16]
+ tbz n, #1, L(b00)
+
+L(b10): lsr x0, x5, tnc
+ lsl x13, x5, cnt
+ lsr x10, x4, tnc
+ lsl x18, x4, cnt
+ sub n, n, #2
+ cbnz n, L(gt2)
+ orr x10, x10, x13
+ stp x18, x10, [rp,#-16]
+ ret
+L(gt2): ldp x4, x5, [up,#-32]
+ orr x10, x10, x13
+ str x10, [rp,#-8]
+ add up, up, #-16
+ add rp, rp, #8
+ b L(lo2)
+
+L(b00): lsr x0, x5, tnc
+ lsl x13, x5, cnt
+ lsr x10, x4, tnc
+ lsl x9, x4, cnt
+ ldp x6, x7, [up,#-32]
+ orr x10, x10, x13
+ str x10, [rp,#-8]
+ add rp, rp, #24
+ b L(lo0)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#-48]
+ add rp, rp, #-32 C integrate with stp?
+ add up, up, #-32 C integrate with ldp?
+ orr x11, x11, x9
+ orr x10, x10, x13
+ stp x10, x11, [rp,#-16]
+L(lo2): lsr x11, x5, tnc
+ lsl x13, x5, cnt
+ lsr x10, x4, tnc
+ lsl x9, x4, cnt
+ ldp x6, x7, [up,#-32]
+ orr x11, x11, x18
+ orr x10, x10, x13
+ stp x10, x11, [rp,#-32]
+L(lo0): sub n, n, #4
+ lsr x11, x7, tnc
+ lsl x13, x7, cnt
+ lsr x10, x6, tnc
+ lsl x18, x6, cnt
+ cbnz n, L(top)
+
+L(end): orr x11, x11, x9
+ orr x10, x10, x13
+ stp x10, x11, [rp,#-48]
+ str x18, [rp,#-56]
+ ret
+EPILOGUE()
diff -r 33ff2f5edbef -r dead77cd3b7c mpn/arm64/mod_34lsub1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/mod_34lsub1.asm Sun Aug 31 21:28:07 2014 +0200
@@ -0,0 +1,123 @@
+dnl ARM64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl Copyright 2012-2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 ?
+C Cortex-A57 ?
+
+define(`ap', x0)
+define(`n', x1)
+
+changecom(@&*$)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C * An alternative inner loop which could run at 0.722 c/l:
+C adds x8, x8, x2
+C adcs x9, x9, x3
+C ldp x2, x3, [ap, #-32]
+C adcs x10, x10, x4
+C adc x12, x12, xzr
+C adds x8, x8, x5
+C ldp x4, x5, [ap, #-16]
+C sub n, n, #6
+C adcs x9, x9, x6
+C adcs x10, x10, x7
+C ldp x6, x7, [ap], #48
+C adc x12, x12, xzr
+C tbz n, #63, L(top)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+ subs n, n, #3
+ mov x8, #0
+ b.lt L(le2) C n <= 2
+
+ ldp x2, x3, [ap, #0]
+ ldr x4, [ap, #16]
+ add ap, ap, #24
+ subs n, n, #3
+ b.lt L(sum) C n <= 5
+ cmn x0, #0 C clear carry
+
+L(top): ldp x5, x6, [ap, #0]
+ ldr x7, [ap, #16]
+ add ap, ap, #24
+ sub n, n, #3
+ adcs x2, x2, x5
+ adcs x3, x3, x6
+ adcs x4, x4, x7
+ tbz n, #63, L(top)
+
+ adc x8, xzr, xzr C x8 <= 1
+
+L(sum): cmn n, #2
+ mov x5, #0
+ b.lo 1f
+ ldr x5, [ap], #8
+1: mov x6, #0
+ b.ls 1f
+ ldr x6, [ap], #8
+1: adds x2, x2, x5
+ adcs x3, x3, x6
+ adcs x4, x4, xzr
+ adc x8, x8, xzr C x8 <= 2
+
+L(sum2):
+ and x0, x2, #0xffffffffffff
+ add x0, x0, x2, lsr #48
+ add x0, x0, x8
+
+ lsl x8, x3, #16
+ and x1, x8, #0xffffffffffff
+ add x0, x0, x1
+ add x0, x0, x3, lsr #32
+
+ lsl x8, x4, #32
+ and x1, x8, #0xffffffffffff
+ add x0, x0, x1
+ add x0, x0, x4, lsr #16
+ ret
+
+L(le2): cmn n, #1
+ bne L(1)
+ ldp x2, x3, [ap]
+ mov x4, #0
+ b L(sum2)
+L(1): ldr x2, [ap]
+ and x0, x2, #0xffffffffffff
+ add x0, x0, x2, lsr #48
+ ret
+EPILOGUE()
diff -r 33ff2f5edbef -r dead77cd3b7c mpn/arm64/rshift.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/rshift.asm Sun Aug 31 21:28:07 2014 +0200
@@ -0,0 +1,121 @@
+dnl ARM64 mpn_rshift.
+
+dnl Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 ?
+C Cortex-A57 ?
+
+changecom(@&*$)
+
+define(`rp_arg', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`cnt', `x3')
+
+define(`rp', `x16')
+
+define(`tnc',`x8')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mov rp, rp_arg
+ sub tnc, xzr, cnt
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x4, [up,#0]
More information about the gmp-commit
mailing list