[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Sep 3 23:33:22 UTC 2019
details: /var/hg/gmp/rev/7e07a3cbec2a
changeset: 17858:7e07a3cbec2a
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Sep 04 01:23:13 2019 +0200
description:
New file.
details: /var/hg/gmp/rev/4c638f7bebcb
changeset: 17859:4c638f7bebcb
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Sep 04 01:30:07 2019 +0200
description:
New file.
details: /var/hg/gmp/rev/5b9461face51
changeset: 17860:5b9461face51
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Sep 04 01:30:18 2019 +0200
description:
New file.
diffstat:
mpn/arm/v6t2/gcd_22.asm | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
mpn/arm64/gcd_22.asm | 115 +++++++++++++++++++++++++++++++++++++++++++++
mpn/ia64/gcd_11.asm | 110 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 346 insertions(+), 0 deletions(-)
diffs (truncated from 358 to 300 lines):
diff -r 228585220bca -r 5b9461face51 mpn/arm/v6t2/gcd_22.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6t2/gcd_22.asm Wed Sep 04 01:30:18 2019 +0200
@@ -0,0 +1,121 @@
+dnl ARM v6t2 mpn_gcd_22.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C StrongARM -
+C XScale -
+C Cortex-A5 13.2
+C Cortex-A7 10.3
+C Cortex-A8 8.4
+C Cortex-A9 ?
+C Cortex-A12 9.1
+C Cortex-A15 8.0
+C Cortex-A17 ?
+C Cortex-A53 8.5
+
+
+define(`gp', `r0')
+
+define(`u1', `r1')
+define(`u0', `r2')
+define(`v1', `r3')
+define(`v0', `r4')
+
+define(`t0', `r5')
+define(`t1', `r6')
+define(`s0', `r7')
+define(`s1', `r8')
+define(`cnt', `r9')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+ push { r4-r9 }
+
+ ldr v0, [sp,#24] C
+
+L(top): subs t0, u0, v0 C 0 7
+ beq L(lowz)
+ sbc t1, u1, v1 C 1 8
+
+ mov s0, u0
+ subs u0, v0, u0 C 0
+ mov s1, u1
+ sbcs u1, v1, u1 C 1
+
+L(bck): rbit cnt, t0 C 1
+ clz cnt, cnt C 2
+
+ movcc u0, t0 C 6
+ movcc u1, t1 C 4
+ rsb r12, cnt, #32 C 3
+ movcs v0, s0 C 6
+ movcs v1, s1 C 4
+
+ lsr u0, u0, cnt C 3
+ lsl r12, u1, r12 C 4
+ lsr u1, u1, cnt C 3
+ orr u0, u0, r12 C 5
+
+ orrs r12, u1, v1
+ bne L(top)
+
+
+ str r12, [gp,#4] C high result limb <= 0
+
+ mov r8, gp
+ mov r0, u0 C pass 1st argument
+ mov r1, v0 C pass 2nd argument
+ mov r9, r14 C preserve link register
+ bl mpn_gcd_11
+ str r0, [r8,#0]
+ mov r14, r9
+ pop { r4-r9 }
+ bx r14
+
+L(lowz):C We come here when v0 - u0 = 0
+ C 1. If v1 - u1 = 0, then gcd is u = v.
+ C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+ subs t0, u1, v1
+ beq L(end)
+ mov t1, #0
+ mov s0, u0
+ mov s1, u1
+ subs u0, v1, u1
+ mov u1, #0
+ b L(bck)
+
+L(end): str v0, [gp,#0]
+ str v1, [gp,#4]
+ pop { r4-r9 }
+ bx r14
+EPILOGUE()
diff -r 228585220bca -r 5b9461face51 mpn/arm64/gcd_22.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/gcd_22.asm Wed Sep 04 01:30:18 2019 +0200
@@ -0,0 +1,115 @@
+dnl ARM v8a mpn_gcd_22.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+changecom(blah)
+
+C cycles/bit (approx)
+C Cortex-A35 ?
+C Cortex-A53 7.26
+C Cortex-A55 ?
+C Cortex-A57 ?
+C Cortex-A72 5.72
+C Cortex-A73 7.12
+C Cortex-A75 ?
+C Cortex-A76 ?
+C Cortex-A77 ?
+
+
+define(`u1', `x0')
+define(`u0', `x1')
+define(`v1', `x2')
+define(`v0', `x3')
+
+define(`t0', `x5')
+define(`t1', `x6')
+define(`s0', `x7')
+define(`s1', `x8')
+define(`cnt', `x9')
+define(`tnc', `x10')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+
+ ALIGN(16)
+L(top): subs t0, u0, v0 C 0 6
+ cbz t0, L(lowz)
+ sbc t1, u1, v1 C 1 7
+
+ subs s0, v0, u0 C 0
+ sbcs s1, v1, u1 C 1 s = v - u, cs = (u < v)
+
+L(bck): rbit cnt, t0 C 1
+ clz cnt, cnt C 2
+ csel s0, t0, s0, cc C 2
+ csel s1, t1, s1, cc C 2 u = |u - v|
+ sub tnc, xzr, cnt C 3
+ csel v0, v0, u0, cc C 2
+ csel v1, v1, u1, cc C 2 v = min(u,v)
+
+ lsr u0, s0, cnt C 3
+ lsl x14, s1, tnc C 4
+ lsr u1, s1, cnt C 3
+ orr u0, u0, x14 C 5
+
+ orr x11, u1, v1
+ cbnz x11, L(top)
+
+
+ subs x4, u0, v0 C 0
+ b.eq L(end1) C
+
+ ALIGN(16)
+L(top1):rbit x12, x4 C 1,5
+ clz x12, x12 C 2
+ csneg x4, x4, x4, cs C v = abs(u-v), even 1
+ csel u0, v0, u0, cs C u = min(u,v) 1
+ lsr v0, x4, x12 C 3
+ subs x4, u0, v0 C 4
+ b.ne L(top1) C
+L(end1):mov x0, u0
+ mov x1, #0
+ ret
+
+L(lowz):C We come here when v0 - u0 = 0
+ C 1. If v1 - u1 = 0, then gcd is u = v.
+ C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+ subs t0, u1, v1
+ beq L(end)
+ mov t1, #0
+ subs s0, v1, u1
+ mov s1, #0
+ b L(bck) C FIXME: make conditional
+
+L(end): mov x0, v0
+ mov x1, v1
+ ret
+EPILOGUE()
diff -r 228585220bca -r 5b9461face51 mpn/ia64/gcd_11.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/ia64/gcd_11.asm Wed Sep 04 01:30:18 2019 +0200
@@ -0,0 +1,110 @@
+dnl Itanium-2 mpn_gcd_11
+
+dnl Copyright 2002-2005, 2012, 2013, 2015, 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bitpair (1x1 gcd)
+C Itanium: ?
+C Itanium 2: 4.5
+
+
+ASM_START()
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+ .rodata
+ ALIGN(m4_lshift(1,MAXSHIFT)) C align table to allow using dep
+ctz_table:
+ data1 MAXSHIFT
+forloop(i,1,MASK,
+` data1 m4_count_trailing_zeros(i)-1
+')
More information about the gmp-commit
mailing list