[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Aug 7 15:22:00 UTC 2019
details: /var/hg/gmp/rev/fc9c25c468d8
changeset: 17791:fc9c25c468d8
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Aug 07 17:17:55 2019 +0200
description:
Add gcd_11.
details: /var/hg/gmp/rev/b6a7b65c8596
changeset: 17792:b6a7b65c8596
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Aug 07 17:21:10 2019 +0200
description:
Provide many gcd_11.
details: /var/hg/gmp/rev/fc77a3dc184b
changeset: 17793:fc77a3dc184b
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Aug 07 17:21:54 2019 +0200
description:
ChangeLog
diffstat:
ChangeLog | 49 ++++++++++++++++
mpn/alpha/ev67/gcd_11.asm | 79 +++++++++++++++++++++++++++
mpn/arm/v5/gcd_11.asm | 70 ++++++++++++++++++++++++
mpn/arm/v6t2/gcd_11.asm | 68 +++++++++++++++++++++++
mpn/arm64/gcd_11.asm | 70 ++++++++++++++++++++++++
mpn/asm-defs.m4 | 1 +
mpn/powerpc64/mode64/gcd_11.asm | 77 ++++++++++++++++++++++++++
mpn/powerpc64/mode64/p7/gcd_11.asm | 67 +++++++++++++++++++++++
mpn/powerpc64/mode64/p9/gcd_11.asm | 64 ++++++++++++++++++++++
mpn/sparc64/gcd_11.asm | 88 ++++++++++++++++++++++++++++++
mpn/x86/k7/gcd_11.asm | 107 +++++++++++++++++++++++++++++++++++++
mpn/x86/p6/gcd_11.asm | 83 ++++++++++++++++++++++++++++
mpn/x86_64/bd2/gcd_11.asm | 93 ++++++++++++++++++++++++++++++++
mpn/x86_64/core2/gcd_11.asm | 93 ++++++++++++++++++++++++++++++++
mpn/x86_64/gcd_11.asm | 102 +++++++++++++++++++++++++++++++++++
15 files changed, 1111 insertions(+), 0 deletions(-)
diffs (truncated from 1193 to 300 lines):
diff -r cdf9e11a028b -r fc77a3dc184b ChangeLog
--- a/ChangeLog Tue Aug 06 17:16:07 2019 +0200
+++ b/ChangeLog Wed Aug 07 17:21:54 2019 +0200
@@ -1,3 +1,19 @@
+2019-08-07 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/alpha/ev67/gcd_11.asm: New file, mostly extracted from gcd_1.asm.
+ * mpn/arm/v5/gcd_11.asm: Likewise.
+ * mpn/arm/v6t2/gcd_11.asm: Likewise.
+ * mpn/arm64/gcd_11.asm: Likewise.
+ * mpn/powerpc64/mode64/gcd_11.asm: Likewise.
+ * mpn/powerpc64/mode64/p7/gcd_11.asm: Likewise.
+ * mpn/powerpc64/mode64/p9/gcd_11.asm: Likewise.
+ * mpn/sparc64/gcd_11.asm: Likewise.
+ * mpn/x86/k7/gcd_11.asm: Likewise.
+ * mpn/x86/p6/gcd_11.asm: Likewise.
+ * mpn/x86_64/bd2/gcd_11.asm: Likewise.
+ * mpn/x86_64/core2/gcd_11.asm: Likewise.
+ * mpn/x86_64/gcd_11.asm: Likewise.
+
2019-08-06 Niels Möller <nisse at lysator.liu.se>
* tune/common.c (speed_mpn_gcd_11): New function.
@@ -12,12 +28,45 @@
* gmp-h.in (mpn_gcd_11): Declare it.
* mpn/generic/gcd_1.c (mpn_gcd_1): Adapted to call mpn_gcd_11.
+2019-08-04 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/bt2/gcd_1.asm: New grabber file.
+ * mpn/x86_64/zen/gcd_1.asm: Grab from "bd2" directory, was "core2".
+
+2019-08-02 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/bd2/gcd_1.asm: New file.
+
+2019-08-01 Torbjörn Granlund <tg at gmplib.org>
+
+ * tests/mpf/t-conv.c: Add several more fixed test cases.
+
+ * mpf/set_str.c: Ignore leading zeros including ones after radix point
+ to avoid invalid output formats.
+
+2019-07-30 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/powerpc64/mode64/p9/gcd_1.asm: New file.
+
2019-07-30 Niels Möller <nisse at lysator.liu.se>
From Seth Troisi:
* doc/gmp.texi (Jacobi Symbol): Update algorithm documentation.
* tests/mpz/t-jac.c: Comment update.
+2019-07-13 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac (arm): Generalise arm a72 pattern to match a73...a79.
+
+2019-07-08 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm/arm-defs.m4 (ASM_START): Rewrite (fix broken error handling).
+
+2019-07-02 Torbjörn Granlund <tg at gmplib.org>
+
+ * acinclude.m4 (GMP_C_DOUBLE_FORMAT): Compile conftest.c to executable
+ in order to trigger final compile in case of LTO.
+
2019-06-17 Torbjörn Granlund <tg at gmplib.org>
* config.guess: Work around upstream configfsf.guess's regression wrt
diff -r cdf9e11a028b -r fc77a3dc184b mpn/alpha/ev67/gcd_11.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/alpha/ev67/gcd_11.asm Wed Aug 07 17:21:54 2019 +0200
@@ -0,0 +1,79 @@
+dnl Alpha ev67 mpn_gcd_1 -- Nx1 greatest common divisor.
+
+dnl Copyright 2003, 2004 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C ev67: 3.4 cycles/bitpair for 1x1 part
+
+
+C mp_limb_t mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);
+C
+C In the 1x1 part, the algorithm is to change x,y to abs(x-y),min(x,y) and
+C strip trailing zeros from abs(x-y) to maintain x and y both odd.
+C
+C The trailing zeros are calculated from just x-y, since in twos-complement
+C there's the same number of trailing zeros on d or -d. This means the cttz
+C runs in parallel with abs(x-y).
+C
+C The loop takes 5 cycles, and at 0.68 iterations per bit for two N-bit
+C operands with this algorithm gives the measured 3.4 c/l.
+C
+C The slottings shown are for SVR4 style systems, Unicos differs in the
+C initial gp setup and the LEA.
+
+
+ASM_START()
+PROLOGUE(mpn_gcd_11)
+ mov r16, r0
+ mov r17, r1
+
+ ALIGN(16)
+L(top): subq r0, r1, r7 C l0 d = x - y
+ cmpult r0, r1, r16 C u0 test x >= y
+
+ subq r1, r0, r4 C l0 new_x = y - x
+ cttz r7, r8 C U0 d twos
+
+ cmoveq r16, r7, r4 C l0 new_x = d if x>=y
+ cmovne r16, r0, r1 C u0 y = x if x<y
+ unop C l \ force cmoveq into l0
+ unop C u /
+
+ C C cmoveq2 L0, cmovne2 U0
+
+ srl r4, r8, r0 C U0 x = new_x >> twos
+ bne r7, L(top) C U1 stop when d==0
+
+
+L(end): mov r1, r0 C U0 return y << common_twos
+ ret r31, (r26), 1 C L0
+EPILOGUE()
+ASM_END()
diff -r cdf9e11a028b -r fc77a3dc184b mpn/arm/v5/gcd_11.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v5/gcd_11.asm Wed Aug 07 17:21:54 2019 +0200
@@ -0,0 +1,70 @@
+dnl ARM v5 mpn_gcd_11.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjörn
+dnl Granlund.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C StrongARM -
+C XScale ?
+C Cortex-A5 6.45 obsolete
+C Cortex-A7 6.41 obsolete
+C Cortex-A8 5.0 obsolete
+C Cortex-A9 5.9 obsolete
+C Cortex-A15 4.40 obsolete
+C Cortex-A17 5.68 obsolete
+C Cortex-A53 4.37 obsolete
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+define(`u0', `r0')
+define(`v0', `r1')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+ subs r3, u0, v0 C 0
+ beq L(end) C
+
+ ALIGN(16)
+L(top): sub r2, v0, u0 C 0,5
+ and r12, r2, r3 C 1
+ clz r12, r12 C 2
+ rsb r12, r12, #31 C 3
+ rsbcc r3, r3, #0 C v = abs(u-v), even 1
+ movcs u0, v0 C u = min(u,v) 1
+ lsr v0, r3, r12 C 4
+ subs r3, u0, v0 C 5
+ bne L(top) C
+
+L(end): bx lr
+EPILOGUE()
diff -r cdf9e11a028b -r fc77a3dc184b mpn/arm/v6t2/gcd_11.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6t2/gcd_11.asm Wed Aug 07 17:21:54 2019 +0200
@@ -0,0 +1,68 @@
+dnl ARM v6t2 mpn_gcd_11.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjörn
+dnl Granlund.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C StrongARM -
+C XScale -
+C Cortex-A5 5.75 obsolete
+C Cortex-A7 6.38 obsolete
+C Cortex-A8 5.0 obsolete
+C Cortex-A9 5.3 obsolete
+C Cortex-A15 2.92 obsolete
+C Cortex-A17 5.63 obsolete
+C Cortex-A53 4.25 obsolete
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+define(`u0', `r0')
+define(`v0', `r1')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+ subs r3, u0, v0 C 0
+ beq L(end) C
+
+ ALIGN(16)
+L(top): rbit r12, r3 C 1,5
+ clz r12, r12 C 2
+ rsbcc r3, r3, #0 C v = abs(u-v), even 1
+ movcs u0, v0 C u = min(u,v) 1
+ lsr v0, r3, r12 C 3
+ subs r3, u0, v0 C 4
+ bne L(top) C
+
+L(end): bx lr
+EPILOGUE()
diff -r cdf9e11a028b -r fc77a3dc184b mpn/arm64/gcd_11.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
More information about the gmp-commit
mailing list