[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Feb 11 02:40:48 UTC 2017


details:   /var/hg/gmp/rev/67ec2317e891
changeset: 17244:67ec2317e891
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Feb 10 18:49:37 2017 +0100
description:
(EPILOGUE_cpu): Zap lea_list to avoid repetition.

details:   /var/hg/gmp/rev/62969fff192e
changeset: 17245:62969fff192e
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Feb 10 18:50:32 2017 +0100
description:
New file, based on dive_1.asm.

details:   /var/hg/gmp/rev/4ab74d8ba4d5
changeset: 17246:4ab74d8ba4d5
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 11 03:40:32 2017 +0100
description:
New file.

diffstat:

 mpn/arm/arm-defs.m4     |    3 +-
 mpn/arm/v6/bdiv_q_1.asm |  164 ++++++++++++++++++++++++++++++++++++++++++++++++
 mpn/arm64/bdiv_q_1.asm  |  128 +++++++++++++++++++++++++++++++++++++
 3 files changed, 294 insertions(+), 1 deletions(-)

diffs (truncated from 312 to 300 lines):

diff -r ca8b7ecb35ee -r 4ab74d8ba4d5 mpn/arm/arm-defs.m4
--- a/mpn/arm/arm-defs.m4	Fri Feb 10 02:06:27 2017 +0100
+++ b/mpn/arm/arm-defs.m4	Sat Feb 11 03:40:32 2017 +0100
@@ -89,6 +89,7 @@
 
 define(`EPILOGUE_cpu',
 `lea_list
-	SIZE(`$1',.-`$1')')
+	SIZE(`$1',.-`$1')'
+`define(`lea_list', `')')
 
 divert
diff -r ca8b7ecb35ee -r 4ab74d8ba4d5 mpn/arm/v6/bdiv_q_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6/bdiv_q_1.asm	Sat Feb 11 03:40:32 2017 +0100
@@ -0,0 +1,164 @@
+dnl  ARM v6 mpn_bdiv_q_1
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb       cycles/limb
+C               norm    unorm    modexact_1c_odd
+C StrongARM	 -	 -
+C XScale	 -	 -
+C Cortex-A7	 ?	 ?
+C Cortex-A8	 ?	 ?
+C Cortex-A9	 9	10		 9
+C Cortex-A15	 7	 7		 7
+
+C Architecture requirements:
+C v5	-
+C v5t	clz
+C v5te	-
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`rp',   `r0')
+define(`up',   `r1')
+define(`n',    `r2')
+define(`d',    `r3')
+define(`di_arg',  `sp[0]')
+define(`cnt_arg', `sp[4]')
+
+define(`cy',  `r7')
+define(`cnt', `r6')
+define(`tnc', `r4')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+	push	{r6,r7,r8,r9,r10,r11}
+
+	tst	d, #1
+
+	rsb	r10, d, #0
+	and	r10, r10, d
+	clz	r10, r10
+	rsb	cnt, r10, #31		C count_trailing_zeros
+	mov	d, d, lsr cnt
+
+C binvert limb
+	LEA(	r10, binvert_limb_table)
+	and	r12, d, #254
+	ldrb	r10, [r10, r12, lsr #1]
+	mul	r12, r10, r10
+	mul	r12, d, r12
+	rsb	r12, r12, r10, lsl #1
+	mul	r10, r12, r12
+	mul	r10, d, r10
+	rsb	r10, r10, r12, lsl #1	C r10 = inverse
+
+	ldr	r11, [up], #4		C up[0]
+	mov	cy, #0
+	rsb	r8, r10, #0		C r8 = -inverse
+	bne	L(norm)
+	b	L(unnorm)
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+	push	{r6,r7,r8,r9,r10,r11}
+
+	ldr	cnt, [sp, #28]
+	ldr	r10, [sp, #24]
+	cmp	cnt, #0
+
+	ldr	r11, [up], #4		C up[0]
+	mov	cy, #0
+	rsb	r8, r10, #0		C r8 = -inverse
+
+	bne	L(unnorm)
+
+L(norm):
+	subs	n, n, #1
+	mul	r11, r11, r10
+	beq	L(end)
+
+	ALIGN(16)
+L(top):	ldr	r9, [up], #4
+	mov	r12, #0
+	str	r11, [rp], #4
+	umaal	r12, cy, r11, d
+	mul	r11, r9, r10
+	mla	r11, cy, r8, r11
+	subs	n, n, #1
+	bne	L(top)
+
+L(end):	str	r11, [rp]
+	pop	{r10,r11,r6,r7,r8,r9}
+	bx	r14
+
+L(unnorm):
+	push	{r4,r5}
+	rsb	tnc, cnt, #32
+	mov	r5, r11, lsr cnt
+	subs	n, n, #1
+	beq	L(edx)
+
+	ldr	r12, [up], #4
+	orr	r9, r5, r12, lsl tnc
+	mov	r5, r12, lsr cnt
+	mul	r11, r9, r10
+	subs	n, n, #1
+	beq	L(edu)
+
+	ALIGN(16)
+L(tpu):	ldr	r12, [up], #4
+	orr	r9, r5, r12, lsl tnc
+	mov	r5, r12, lsr cnt
+	mov	r12, #0
+	str	r11, [rp], #4
+	umaal	r12, cy, r11, d
+	mul	r11, r9, r10
+	mla	r11, cy, r8, r11
+	subs	n, n, #1
+	bne	L(tpu)
+
+L(edu):	str	r11, [rp], #4
+	mov	r12, #0
+	umaal	r12, cy, r11, d
+	mul	r11, r5, r10
+	mla	r11, cy, r8, r11
+	str	r11, [rp]
+	pop	{r4,r5,r6,r7,r8,r9,r10,r11}
+	bx	r14
+
+L(edx):	mul	r11, r5, r10
+	str	r11, [rp]
+	pop	{r4,r5,r6,r7,r8,r9,r10,r11}
+	bx	r14
+EPILOGUE()
diff -r ca8b7ecb35ee -r 4ab74d8ba4d5 mpn/arm64/bdiv_q_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/bdiv_q_1.asm	Sat Feb 11 03:40:32 2017 +0100
@@ -0,0 +1,128 @@
+dnl  ARM64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb
+C               norm   unorm
+C Cortex-A53	12	15
+C Cortex-A57	12	12
+C Cortex-A72
+C Cortex-A73
+C X-Gene	11	11
+
+C TODO
+C  * Scheduling of umulh later in the unorm loop brings A53 time to 12 c/l.
+C    Unfortunately, that requires software pipelining.
+
+define(`rp',  `x0')
+define(`up',  `x1')
+define(`n',   `x2')
+define(`d',   `x3')
+define(`di',  `x4')		C	just mpn_pi1_bdiv_q_1
+define(`cnt', `x5')		C	just mpn_pi1_bdiv_q_1
+
+define(`cy',  `r7')
+define(`tnc', `x8')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+
+	rbit	x6, d
+	clz	cnt, x6
+	lsr	d, d, cnt
+
+ifdef(`PIC',`
+	adrp	x7, :got:__gmp_binvert_limb_table
+	ubfx	x6, d, 1, 7
+	ldr	x7, [x7, #:got_lo12:__gmp_binvert_limb_table]
+',`
+	adrp	x7, __gmp_binvert_limb_table
+	ubfx	x6, d, 1, 7
+	add	x7, x7, :lo12:__gmp_binvert_limb_table
+')
+	ldrb	w6, [x7, x6]
+	ubfiz	x7, x6, 1, 8
+	umull	x6, w6, w6
+	msub	x6, x6, d, x7
+	lsl	x7, x6, 1
+	mul	x6, x6, x6
+	msub	x6, x6, d, x7
+	lsl	x7, x6, 1
+	mul	x6, x6, x6
+	msub	di, x6, d, x7
+
+	b	mpn_pi1_bdiv_q_1
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+	add	n, n, #-1
+	subs	x6, x6, x6		C clear r6 and C flag
+	ldr	x9, [up],#8
+	cbz	cnt, L(norm)
+
+L(unorm):
+	lsr	x12, x9, cnt
+	cbz	n, L(eu1)
+	sub	tnc, xzr, cnt
+
+L(tpu):	ldr	x9, [up],#8
+	lsl	x7, x9, tnc
+	orr	x7, x7, x12
+	sbcs	x6, x7, x6
+	mul	x7, x6, di
+	str	x7, [rp],#8
+	lsr	x12, x9, cnt
+	umulh	x6, x7, d
+	add	n, n, #-1
+	cbnz	n, L(tpu)
+
+L(eu1):	sbcs	x6, x12, x6
+	mul	x6, x6, di
+	str	x6, [rp]
+	ret
+
+L(norm):
+	mul	x5, x9, di
+	str	x5, [rp],#8


More information about the gmp-commit mailing list