[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Aug 7 15:22:00 UTC 2019


details:   /var/hg/gmp/rev/fc9c25c468d8
changeset: 17791:fc9c25c468d8
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Aug 07 17:17:55 2019 +0200
description:
Add gcd_11.

details:   /var/hg/gmp/rev/b6a7b65c8596
changeset: 17792:b6a7b65c8596
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Aug 07 17:21:10 2019 +0200
description:
Provide many gcd_11.

details:   /var/hg/gmp/rev/fc77a3dc184b
changeset: 17793:fc77a3dc184b
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Aug 07 17:21:54 2019 +0200
description:
ChangeLog

diffstat:

 ChangeLog                          |   49 ++++++++++++++++
 mpn/alpha/ev67/gcd_11.asm          |   79 +++++++++++++++++++++++++++
 mpn/arm/v5/gcd_11.asm              |   70 ++++++++++++++++++++++++
 mpn/arm/v6t2/gcd_11.asm            |   68 +++++++++++++++++++++++
 mpn/arm64/gcd_11.asm               |   70 ++++++++++++++++++++++++
 mpn/asm-defs.m4                    |    1 +
 mpn/powerpc64/mode64/gcd_11.asm    |   77 ++++++++++++++++++++++++++
 mpn/powerpc64/mode64/p7/gcd_11.asm |   67 +++++++++++++++++++++++
 mpn/powerpc64/mode64/p9/gcd_11.asm |   64 ++++++++++++++++++++++
 mpn/sparc64/gcd_11.asm             |   88 ++++++++++++++++++++++++++++++
 mpn/x86/k7/gcd_11.asm              |  107 +++++++++++++++++++++++++++++++++++++
 mpn/x86/p6/gcd_11.asm              |   83 ++++++++++++++++++++++++++++
 mpn/x86_64/bd2/gcd_11.asm          |   93 ++++++++++++++++++++++++++++++++
 mpn/x86_64/core2/gcd_11.asm        |   93 ++++++++++++++++++++++++++++++++
 mpn/x86_64/gcd_11.asm              |  102 +++++++++++++++++++++++++++++++++++
 15 files changed, 1111 insertions(+), 0 deletions(-)

diffs (truncated from 1193 to 300 lines):

diff -r cdf9e11a028b -r fc77a3dc184b ChangeLog
--- a/ChangeLog	Tue Aug 06 17:16:07 2019 +0200
+++ b/ChangeLog	Wed Aug 07 17:21:54 2019 +0200
@@ -1,3 +1,19 @@
+2019-08-07  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/alpha/ev67/gcd_11.asm: New file, mostly extracted from gcd_1.asm.
+	* mpn/arm/v5/gcd_11.asm: Likewise.
+	* mpn/arm/v6t2/gcd_11.asm: Likewise.
+	* mpn/arm64/gcd_11.asm: Likewise.
+	* mpn/powerpc64/mode64/gcd_11.asm: Likewise.
+	* mpn/powerpc64/mode64/p7/gcd_11.asm: Likewise.
+	* mpn/powerpc64/mode64/p9/gcd_11.asm: Likewise.
+	* mpn/sparc64/gcd_11.asm: Likewise.
+	* mpn/x86/k7/gcd_11.asm: Likewise.
+	* mpn/x86/p6/gcd_11.asm: Likewise.
+	* mpn/x86_64/bd2/gcd_11.asm: Likewise.
+	* mpn/x86_64/core2/gcd_11.asm: Likewise.
+	* mpn/x86_64/gcd_11.asm: Likewise.
+
 2019-08-06  Niels Möller  <nisse at lysator.liu.se>
 
 	* tune/common.c (speed_mpn_gcd_11): New function.
@@ -12,12 +28,45 @@
 	* gmp-h.in (mpn_gcd_11): Declare it.
 	* mpn/generic/gcd_1.c (mpn_gcd_1): Adapted to call mpn_gcd_11.
 
+2019-08-04  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/bt2/gcd_1.asm: New grabber file.
+	* mpn/x86_64/zen/gcd_1.asm: Grab from "bd2" directory, was "core2".
+
+2019-08-02  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/bd2/gcd_1.asm: New file.
+
+2019-08-01  Torbjörn Granlund  <tg at gmplib.org>
+
+	* tests/mpf/t-conv.c: Add several more fixed test cases.
+
+	* mpf/set_str.c: Ignore leading zeros including ones after radix point
+	to avoid invalid output formats.
+
+2019-07-30  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/powerpc64/mode64/p9/gcd_1.asm: New file.
+
 2019-07-30  Niels Möller  <nisse at lysator.liu.se>
 
 	From Seth Troisi:
 	* doc/gmp.texi (Jacobi Symbol): Update algorithm documentation.
 	* tests/mpz/t-jac.c: Comment update.
 
+2019-07-13  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac (arm): Generalise arm a72 pattern to match a73...a79.
+
+2019-07-08  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm/arm-defs.m4 (ASM_START): Rewrite (fix broken error handling).
+
+2019-07-02  Torbjörn Granlund  <tg at gmplib.org>
+
+	* acinclude.m4 (GMP_C_DOUBLE_FORMAT): Compile conftest.c to executable
+	in order to trigger final compile in case of LTO.
+
 2019-06-17  Torbjörn Granlund  <tg at gmplib.org>
 
 	* config.guess: Work around upstream configfsf.guess's regression wrt
diff -r cdf9e11a028b -r fc77a3dc184b mpn/alpha/ev67/gcd_11.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/alpha/ev67/gcd_11.asm	Wed Aug 07 17:21:54 2019 +0200
@@ -0,0 +1,79 @@
+dnl  Alpha ev67 mpn_gcd_1 -- Nx1 greatest common divisor.
+
+dnl  Copyright 2003, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C ev67: 3.4 cycles/bitpair for 1x1 part
+
+
+C mp_limb_t mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);
+C
+C In the 1x1 part, the algorithm is to change x,y to abs(x-y),min(x,y) and
+C strip trailing zeros from abs(x-y) to maintain x and y both odd.
+C
+C The trailing zeros are calculated from just x-y, since in twos-complement
+C there's the same number of trailing zeros on d or -d.  This means the cttz
+C runs in parallel with abs(x-y).
+C
+C The loop takes 5 cycles, and at 0.68 iterations per bit for two N-bit
+C operands with this algorithm gives the measured 3.4 c/l.
+C
+C The slottings shown are for SVR4 style systems, Unicos differs in the
+C initial gp setup and the LEA.
+
+
+ASM_START()
+PROLOGUE(mpn_gcd_11)
+	mov	r16, r0
+	mov	r17, r1
+
+	ALIGN(16)
+L(top):	subq	r0, r1, r7		C l0  d = x - y
+	cmpult	r0, r1, r16		C u0  test x >= y
+
+	subq	r1, r0, r4		C l0  new_x = y - x
+	cttz	r7, r8			C U0  d twos
+
+	cmoveq	r16, r7, r4		C l0  new_x = d if x>=y
+	cmovne	r16, r0, r1		C u0  y = x if x<y
+	unop				C l   \ force cmoveq into l0
+	unop				C u   /
+
+	C				C cmoveq2 L0, cmovne2 U0
+
+	srl	r4, r8, r0		C U0  x = new_x >> twos
+	bne	r7, L(top)		C U1  stop when d==0
+
+
+L(end):	mov	r1, r0			C U0  return y << common_twos
+	ret	r31, (r26), 1		C L0
+EPILOGUE()
+ASM_END()
diff -r cdf9e11a028b -r fc77a3dc184b mpn/arm/v5/gcd_11.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v5/gcd_11.asm	Wed Aug 07 17:21:54 2019 +0200
@@ -0,0 +1,70 @@
+dnl  ARM v5 mpn_gcd_11.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjörn
+dnl  Granlund.
+
+dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 ?
+C Cortex-A5	 6.45	obsolete
+C Cortex-A7	 6.41	obsolete
+C Cortex-A8	 5.0	obsolete
+C Cortex-A9	 5.9	obsolete
+C Cortex-A15	 4.40	obsolete
+C Cortex-A17	 5.68	obsolete
+C Cortex-A53	 4.37	obsolete
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+define(`u0',    `r0')
+define(`v0',    `r1')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+	subs	r3, u0, v0	C			0
+	beq	L(end)		C
+
+	ALIGN(16)
+L(top):	sub	r2, v0, u0	C			0,5
+	and	r12, r2, r3	C			1
+	clz	r12, r12	C			2
+	rsb	r12, r12, #31	C			3
+	rsbcc	r3, r3, #0	C v = abs(u-v), even	1
+	movcs	u0, v0		C u = min(u,v)		1
+	lsr	v0, r3, r12	C			4
+	subs	r3, u0, v0	C			5
+	bne	L(top)		C
+
+L(end):	bx	lr
+EPILOGUE()
diff -r cdf9e11a028b -r fc77a3dc184b mpn/arm/v6t2/gcd_11.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6t2/gcd_11.asm	Wed Aug 07 17:21:54 2019 +0200
@@ -0,0 +1,68 @@
+dnl  ARM v6t2 mpn_gcd_11.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjörn
+dnl  Granlund.
+
+dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 -
+C Cortex-A5	 5.75	obsolete
+C Cortex-A7	 6.38	obsolete
+C Cortex-A8	 5.0	obsolete
+C Cortex-A9	 5.3	obsolete
+C Cortex-A15	 2.92	obsolete
+C Cortex-A17	 5.63	obsolete
+C Cortex-A53	 4.25	obsolete
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+define(`u0',    `r0')
+define(`v0',    `r1')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+	subs	r3, u0, v0	C			0
+	beq	L(end)		C
+
+	ALIGN(16)
+L(top):	rbit	r12, r3		C			1,5
+	clz	r12, r12	C			2
+	rsbcc	r3, r3, #0	C v = abs(u-v), even	1
+	movcs	u0, v0		C u = min(u,v)		1
+	lsr	v0, r3, r12	C			3
+	subs	r3, u0, v0	C			4
+	bne	L(top)		C
+
+L(end):	bx	lr
+EPILOGUE()
diff -r cdf9e11a028b -r fc77a3dc184b mpn/arm64/gcd_11.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000


More information about the gmp-commit mailing list