[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Feb 11 20:52:49 UTC 2017
details: /var/hg/gmp/rev/8abfd697f191
changeset: 17253:8abfd697f191
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 11 21:32:11 2017 +0100
description:
(SETCY, RETVAL): Shorten insn sequences.
details: /var/hg/gmp/rev/e136ed2d4f49
changeset: 17254:e136ed2d4f49
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 11 21:35:28 2017 +0100
description:
Cosmetic changes to minimise edit distance.
details: /var/hg/gmp/rev/f52324bfe60f
changeset: 17255:f52324bfe60f
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 11 21:52:24 2017 +0100
description:
Move new v6/bdiv_q_1.asm code to v7/cora8 and grab that from cora9, cora15.
details: /var/hg/gmp/rev/b6f94acb23c1
changeset: 17256:b6f94acb23c1
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 11 21:52:47 2017 +0100
description:
ChangeLog
diffstat:
ChangeLog | 10 ++-
mpn/arm/v6/bdiv_q_1.asm | 164 ----------------------------------------
mpn/arm/v7a/cora15/bdiv_q_1.asm | 36 ++++++++
mpn/arm/v7a/cora8/bdiv_q_1.asm | 158 ++++++++++++++++++++++++++++++++++++++
mpn/arm/v7a/cora9/bdiv_q_1.asm | 36 ++++++++
mpn/arm64/aors_n.asm | 14 +-
mpn/arm64/cnd_aors_n.asm | 20 ++--
mpn/arm64/xgene1/aors_n.asm | 13 +-
mpn/arm64/xgene1/cnd_aors_n.asm | 19 ++--
mpn/powerpc64/mode64/dive_1.asm | 19 ++-
10 files changed, 282 insertions(+), 207 deletions(-)
diffs (truncated from 660 to 300 lines):
diff -r 10a86ac116c4 -r b6f94acb23c1 ChangeLog
--- a/ChangeLog Sat Feb 11 04:14:03 2017 +0100
+++ b/ChangeLog Sat Feb 11 21:52:47 2017 +0100
@@ -1,5 +1,14 @@
2017-02-11 Torbjörn Granlund <tg at gmplib.org>
+ * mpn/arm/v7a/cora8/bdiv_q_1.asm: New file, based on v6/dive_1.asm.
+ * mpn/arm/v7a/cora9/bdiv_q_1.asm: New file, grabbing cora8 code.
+ * mpn/arm/v7a/cora15/bdiv_q_1.asm: Likewise.
+
+ * mpn/arm64/aors_n.asm: (SETCY, RETVAL): Shorten insn sequences.
+ * mpn/arm64/cnd_aors_n.asm: Likewise.
+ * mpn/arm64/xgene1/aors_n.asm: Likewise.
+ * mpn/arm64/xgene1/cnd_aors_n.asm: Likewise.
+
* mpn/arm/bdiv_q_1.asm: New file.
* mpn/generic/bdiv_q_1.c (mpn_bdiv_q_1): Remove odd d special case.
@@ -10,7 +19,6 @@
2017-02-10 Torbjörn Granlund <tg at gmplib.org>
- * mpn/arm/v6/bdiv_q_1.asm: New file, based on dive_1.asm.
* mpn/arm/arm-defs.m4 (EPILOGUE_cpu): Zap lea_list to avoid repetition.
diff -r 10a86ac116c4 -r b6f94acb23c1 mpn/arm/v6/bdiv_q_1.asm
--- a/mpn/arm/v6/bdiv_q_1.asm Sat Feb 11 04:14:03 2017 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,164 +0,0 @@
-dnl ARM v6 mpn_bdiv_q_1
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb cycles/limb
-C norm unorm modexact_1c_odd
-C StrongARM - -
-C XScale - -
-C Cortex-A7 ? ?
-C Cortex-A8 ? ?
-C Cortex-A9 9 10 9
-C Cortex-A15 7 7 7
-
-C Architecture requirements:
-C v5 -
-C v5t clz
-C v5te -
-C v6 umaal
-C v6t2 -
-C v7a -
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-define(`d', `r3')
-define(`di_arg', `sp[0]')
-define(`cnt_arg', `sp[4]')
-
-define(`cy', `r7')
-define(`cnt', `r6')
-define(`tnc', `r4')
-
-ASM_START()
-PROLOGUE(mpn_bdiv_q_1)
- push {r6,r7,r8,r9,r10,r11}
-
- tst d, #1
-
- rsb r10, d, #0
- and r10, r10, d
- clz r10, r10
- rsb cnt, r10, #31 C count_trailing_zeros
- mov d, d, lsr cnt
-
-C binvert limb
- LEA( r10, binvert_limb_table)
- and r12, d, #254
- ldrb r10, [r10, r12, lsr #1]
- mul r12, r10, r10
- mul r12, d, r12
- rsb r12, r12, r10, lsl #1
- mul r10, r12, r12
- mul r10, d, r10
- rsb r10, r10, r12, lsl #1 C r10 = inverse
-
- ldr r11, [up], #4 C up[0]
- mov cy, #0
- rsb r8, r10, #0 C r8 = -inverse
- bne L(norm)
- b L(unnorm)
-EPILOGUE()
-
-PROLOGUE(mpn_pi1_bdiv_q_1)
- push {r6,r7,r8,r9,r10,r11}
-
- ldr cnt, [sp, #28]
- ldr r10, [sp, #24]
- cmp cnt, #0
-
- ldr r11, [up], #4 C up[0]
- mov cy, #0
- rsb r8, r10, #0 C r8 = -inverse
-
- bne L(unnorm)
-
-L(norm):
- subs n, n, #1
- mul r11, r11, r10
- beq L(end)
-
- ALIGN(16)
-L(top): ldr r9, [up], #4
- mov r12, #0
- str r11, [rp], #4
- umaal r12, cy, r11, d
- mul r11, r9, r10
- mla r11, cy, r8, r11
- subs n, n, #1
- bne L(top)
-
-L(end): str r11, [rp]
- pop {r10,r11,r6,r7,r8,r9}
- bx r14
-
-L(unnorm):
- push {r4,r5}
- rsb tnc, cnt, #32
- mov r5, r11, lsr cnt
- subs n, n, #1
- beq L(edx)
-
- ldr r12, [up], #4
- orr r9, r5, r12, lsl tnc
- mov r5, r12, lsr cnt
- mul r11, r9, r10
- subs n, n, #1
- beq L(edu)
-
- ALIGN(16)
-L(tpu): ldr r12, [up], #4
- orr r9, r5, r12, lsl tnc
- mov r5, r12, lsr cnt
- mov r12, #0
- str r11, [rp], #4
- umaal r12, cy, r11, d
- mul r11, r9, r10
- mla r11, cy, r8, r11
- subs n, n, #1
- bne L(tpu)
-
-L(edu): str r11, [rp], #4
- mov r12, #0
- umaal r12, cy, r11, d
- mul r11, r5, r10
- mla r11, cy, r8, r11
- str r11, [rp]
- pop {r4,r5,r6,r7,r8,r9,r10,r11}
- bx r14
-
-L(edx): mul r11, r5, r10
- str r11, [rp]
- pop {r4,r5,r6,r7,r8,r9,r10,r11}
- bx r14
-EPILOGUE()
diff -r 10a86ac116c4 -r b6f94acb23c1 mpn/arm/v7a/cora15/bdiv_q_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/bdiv_q_1.asm Sat Feb 11 21:52:47 2017 +0100
@@ -0,0 +1,36 @@
+dnl ARM mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`arm/v7a/cora8/bdiv_q_1.asm')
diff -r 10a86ac116c4 -r b6f94acb23c1 mpn/arm/v7a/cora8/bdiv_q_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora8/bdiv_q_1.asm Sat Feb 11 21:52:47 2017 +0100
@@ -0,0 +1,158 @@
+dnl ARM v6 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+dnl This is v6 code but it runs well on just the v7a Cortex-A8, A9, and A15.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C norm unorm
+C 1176 - -
+C Cortex-A5 9 13
+C Cortex-A7 12 18
+C Cortex-A8 13 14
+C Cortex-A9 9 10 not measured since latest edits
+C Cortex-A15 7 7
+C Cortex-A53 16 24
+
+C Architecture requirements:
+C v5 -
+C v5t clz
+C v5te -
+C v6 umaal
+C v6t2 -
+C v7a -
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`d', `r3')
+define(`di_arg', `sp[0]') C just mpn_pi1_bdiv_q_1
+define(`cnt_arg', `sp[4]') C just mpn_pi1_bdiv_q_1
+
+define(`cy', `r7')
+define(`cnt', `r6')
More information about the gmp-commit
mailing list