[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Sep 3 23:33:22 UTC 2019


details:   /var/hg/gmp/rev/7e07a3cbec2a
changeset: 17858:7e07a3cbec2a
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Sep 04 01:23:13 2019 +0200
description:
New file.

details:   /var/hg/gmp/rev/4c638f7bebcb
changeset: 17859:4c638f7bebcb
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Sep 04 01:30:07 2019 +0200
description:
New file.

details:   /var/hg/gmp/rev/5b9461face51
changeset: 17860:5b9461face51
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Sep 04 01:30:18 2019 +0200
description:
New file.

diffstat:

 mpn/arm/v6t2/gcd_22.asm |  121 ++++++++++++++++++++++++++++++++++++++++++++++++
 mpn/arm64/gcd_22.asm    |  115 +++++++++++++++++++++++++++++++++++++++++++++
 mpn/ia64/gcd_11.asm     |  110 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 346 insertions(+), 0 deletions(-)

diffs (truncated from 358 to 300 lines):

diff -r 228585220bca -r 5b9461face51 mpn/arm/v6t2/gcd_22.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6t2/gcd_22.asm	Wed Sep 04 01:30:18 2019 +0200
@@ -0,0 +1,121 @@
+dnl  ARM v6t2 mpn_gcd_22.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/bit (approx)
+C StrongARM	 -
+C XScale	 -
+C Cortex-A5	13.2
+C Cortex-A7	10.3
+C Cortex-A8	 8.4
+C Cortex-A9	 ?
+C Cortex-A12	 9.1
+C Cortex-A15	 8.0
+C Cortex-A17	 ?
+C Cortex-A53	 8.5
+
+
+define(`gp',    `r0')
+
+define(`u1',    `r1')
+define(`u0',    `r2')
+define(`v1',    `r3')
+define(`v0',    `r4')
+
+define(`t0',    `r5')
+define(`t1',    `r6')
+define(`s0',    `r7')
+define(`s1',    `r8')
+define(`cnt',   `r9')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+	push	{ r4-r9 }
+
+	ldr	v0, [sp,#24]		C
+
+L(top):	subs	t0, u0, v0		C 0 7
+	beq	L(lowz)
+	sbc	t1, u1, v1		C 1 8
+
+	mov	s0, u0
+	subs	u0, v0, u0		C 0
+	mov	s1, u1
+	sbcs	u1, v1, u1		C 1
+
+L(bck):	rbit	cnt, t0			C 1
+	clz	cnt, cnt		C 2
+
+	movcc	u0, t0			C 6
+	movcc	u1, t1			C 4
+	rsb	r12, cnt, #32		C 3
+	movcs	v0, s0			C 6
+	movcs	v1, s1			C 4
+
+	lsr	u0, u0, cnt		C 3
+	lsl	r12, u1, r12		C 4
+	lsr	u1, u1, cnt		C 3
+	orr	u0, u0, r12		C 5
+
+	orrs	r12, u1, v1
+	bne	L(top)
+
+
+	str	r12, [gp,#4]		C high result limb <= 0
+
+	mov	r8, gp
+	mov	r0, u0			C pass 1st argument
+	mov	r1, v0			C pass 2nd argument
+	mov	r9, r14			C preserve link register
+	bl	mpn_gcd_11
+	str	r0, [r8,#0]
+	mov	r14, r9
+	pop	{ r4-r9 }
+	bx	r14
+
+L(lowz):C We come here when v0 - u0 = 0
+	C 1. If v1 - u1 = 0, then gcd is u = v.
+	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+	subs	t0, u1, v1
+	beq	L(end)
+	mov	t1, #0
+	mov	s0, u0
+	mov	s1, u1
+	subs	u0, v1, u1
+	mov	u1, #0
+	b	L(bck)
+
+L(end):	str	v0, [gp,#0]
+	str	v1, [gp,#4]
+	pop	{ r4-r9 }
+	bx	r14
+EPILOGUE()
diff -r 228585220bca -r 5b9461face51 mpn/arm64/gcd_22.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/gcd_22.asm	Wed Sep 04 01:30:18 2019 +0200
@@ -0,0 +1,115 @@
+dnl  ARM v8a mpn_gcd_22.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+changecom(blah)
+
+C	     cycles/bit (approx)
+C Cortex-A35	 ?
+C Cortex-A53	 7.26
+C Cortex-A55	 ?
+C Cortex-A57	 ?
+C Cortex-A72	 5.72
+C Cortex-A73	 7.12
+C Cortex-A75	 ?
+C Cortex-A76	 ?
+C Cortex-A77	 ?
+
+
+define(`u1',    `x0')
+define(`u0',    `x1')
+define(`v1',    `x2')
+define(`v0',    `x3')
+
+define(`t0',    `x5')
+define(`t1',    `x6')
+define(`s0',    `x7')
+define(`s1',    `x8')
+define(`cnt',   `x9')
+define(`tnc',   `x10')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+
+	ALIGN(16)
+L(top):	subs	t0, u0, v0		C 0 6
+	cbz	t0, L(lowz)
+	sbc	t1, u1, v1		C 1 7
+
+	subs	s0, v0, u0		C 0
+	sbcs	s1, v1, u1		C 1 s = v - u, cs = (u < v)
+
+L(bck):	rbit	cnt, t0			C 1
+	clz	cnt, cnt		C 2
+	csel	s0, t0, s0, cc		C 2
+	csel	s1, t1, s1, cc		C 2 u = |u - v|
+	sub	tnc, xzr, cnt		C 3
+	csel	v0, v0, u0, cc		C 2
+	csel	v1, v1, u1, cc		C 2 v = min(u,v)
+
+	lsr	u0, s0, cnt		C 3
+	lsl	x14, s1, tnc		C 4
+	lsr	u1, s1, cnt		C 3
+	orr	u0, u0, x14		C 5
+
+	orr	x11, u1, v1
+	cbnz	x11, L(top)
+
+
+	subs	x4, u0, v0		C			0
+	b.eq	L(end1)			C
+
+	ALIGN(16)
+L(top1):rbit	x12, x4			C			1,5
+	clz	x12, x12		C			2
+	csneg	x4, x4, x4, cs		C v = abs(u-v), even	1
+	csel	u0, v0, u0, cs		C u = min(u,v)		1
+	lsr	v0, x4, x12		C			3
+	subs	x4, u0, v0		C			4
+	b.ne	L(top1)			C
+L(end1):mov	x0, u0
+	mov	x1, #0
+	ret
+
+L(lowz):C We come here when v0 - u0 = 0
+	C 1. If v1 - u1 = 0, then gcd is u = v.
+	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+	subs	t0, u1, v1
+	beq	L(end)
+	mov	t1, #0
+	subs	s0, v1, u1
+	mov	s1, #0
+	b	L(bck)			C FIXME: make conditional
+
+L(end):	mov	x0, v0
+	mov	x1, v1
+	ret
+EPILOGUE()
diff -r 228585220bca -r 5b9461face51 mpn/ia64/gcd_11.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/ia64/gcd_11.asm	Wed Sep 04 01:30:18 2019 +0200
@@ -0,0 +1,110 @@
+dnl  Itanium-2 mpn_gcd_11
+
+dnl  Copyright 2002-2005, 2012, 2013, 2015, 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/bitpair (1x1 gcd)
+C Itanium:       ?
+C Itanium 2:     4.5
+
+
+ASM_START()
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+	.rodata
+	ALIGN(m4_lshift(1,MAXSHIFT))	C align table to allow using dep
+ctz_table:
+	data1	MAXSHIFT
+forloop(i,1,MASK,
+`	data1	m4_count_trailing_zeros(i)-1
+')


More information about the gmp-commit mailing list