[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Feb 11 20:52:49 UTC 2017


details:   /var/hg/gmp/rev/8abfd697f191
changeset: 17253:8abfd697f191
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 11 21:32:11 2017 +0100
description:
(SETCY, RETVAL): Shorten insn sequences.

details:   /var/hg/gmp/rev/e136ed2d4f49
changeset: 17254:e136ed2d4f49
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 11 21:35:28 2017 +0100
description:
Cosmetic changes to minimise edit distance.

details:   /var/hg/gmp/rev/f52324bfe60f
changeset: 17255:f52324bfe60f
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 11 21:52:24 2017 +0100
description:
Move new v6/bdiv_q_1.asm code to v7/cora8 and grab that from cora9, cora15.

details:   /var/hg/gmp/rev/b6f94acb23c1
changeset: 17256:b6f94acb23c1
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 11 21:52:47 2017 +0100
description:
ChangeLog

diffstat:

 ChangeLog                       |   10 ++-
 mpn/arm/v6/bdiv_q_1.asm         |  164 ----------------------------------------
 mpn/arm/v7a/cora15/bdiv_q_1.asm |   36 ++++++++
 mpn/arm/v7a/cora8/bdiv_q_1.asm  |  158 ++++++++++++++++++++++++++++++++++++++
 mpn/arm/v7a/cora9/bdiv_q_1.asm  |   36 ++++++++
 mpn/arm64/aors_n.asm            |   14 +-
 mpn/arm64/cnd_aors_n.asm        |   20 ++--
 mpn/arm64/xgene1/aors_n.asm     |   13 +-
 mpn/arm64/xgene1/cnd_aors_n.asm |   19 ++--
 mpn/powerpc64/mode64/dive_1.asm |   19 ++-
 10 files changed, 282 insertions(+), 207 deletions(-)

diffs (truncated from 660 to 300 lines):

diff -r 10a86ac116c4 -r b6f94acb23c1 ChangeLog
--- a/ChangeLog	Sat Feb 11 04:14:03 2017 +0100
+++ b/ChangeLog	Sat Feb 11 21:52:47 2017 +0100
@@ -1,5 +1,14 @@
 2017-02-11  Torbjörn Granlund  <tg at gmplib.org>
 
+	* mpn/arm/v7a/cora8/bdiv_q_1.asm: New file, based on v6/dive_1.asm.
+	* mpn/arm/v7a/cora9/bdiv_q_1.asm: New file, grabbing cora8 code.
+	* mpn/arm/v7a/cora15/bdiv_q_1.asm: Likewise.
+
+	* mpn/arm64/aors_n.asm: (SETCY, RETVAL): Shorten insn sequences.
+	* mpn/arm64/cnd_aors_n.asm: Likewise.
+	* mpn/arm64/xgene1/aors_n.asm: Likewise.
+	* mpn/arm64/xgene1/cnd_aors_n.asm: Likewise.
+
 	* mpn/arm/bdiv_q_1.asm: New file.
 
 	* mpn/generic/bdiv_q_1.c (mpn_bdiv_q_1): Remove odd d special case.
@@ -10,7 +19,6 @@
 
 2017-02-10  Torbjörn Granlund  <tg at gmplib.org>
 
-	* mpn/arm/v6/bdiv_q_1.asm: New file, based on dive_1.asm.
 
 	* mpn/arm/arm-defs.m4 (EPILOGUE_cpu): Zap lea_list to avoid repetition.
 
diff -r 10a86ac116c4 -r b6f94acb23c1 mpn/arm/v6/bdiv_q_1.asm
--- a/mpn/arm/v6/bdiv_q_1.asm	Sat Feb 11 04:14:03 2017 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,164 +0,0 @@
-dnl  ARM v6 mpn_bdiv_q_1
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C               cycles/limb       cycles/limb
-C               norm    unorm    modexact_1c_odd
-C StrongARM	 -	 -
-C XScale	 -	 -
-C Cortex-A7	 ?	 ?
-C Cortex-A8	 ?	 ?
-C Cortex-A9	 9	10		 9
-C Cortex-A15	 7	 7		 7
-
-C Architecture requirements:
-C v5	-
-C v5t	clz
-C v5te	-
-C v6	umaal
-C v6t2	-
-C v7a	-
-
-define(`rp',   `r0')
-define(`up',   `r1')
-define(`n',    `r2')
-define(`d',    `r3')
-define(`di_arg',  `sp[0]')
-define(`cnt_arg', `sp[4]')
-
-define(`cy',  `r7')
-define(`cnt', `r6')
-define(`tnc', `r4')
-
-ASM_START()
-PROLOGUE(mpn_bdiv_q_1)
-	push	{r6,r7,r8,r9,r10,r11}
-
-	tst	d, #1
-
-	rsb	r10, d, #0
-	and	r10, r10, d
-	clz	r10, r10
-	rsb	cnt, r10, #31		C count_trailing_zeros
-	mov	d, d, lsr cnt
-
-C binvert limb
-	LEA(	r10, binvert_limb_table)
-	and	r12, d, #254
-	ldrb	r10, [r10, r12, lsr #1]
-	mul	r12, r10, r10
-	mul	r12, d, r12
-	rsb	r12, r12, r10, lsl #1
-	mul	r10, r12, r12
-	mul	r10, d, r10
-	rsb	r10, r10, r12, lsl #1	C r10 = inverse
-
-	ldr	r11, [up], #4		C up[0]
-	mov	cy, #0
-	rsb	r8, r10, #0		C r8 = -inverse
-	bne	L(norm)
-	b	L(unnorm)
-EPILOGUE()
-
-PROLOGUE(mpn_pi1_bdiv_q_1)
-	push	{r6,r7,r8,r9,r10,r11}
-
-	ldr	cnt, [sp, #28]
-	ldr	r10, [sp, #24]
-	cmp	cnt, #0
-
-	ldr	r11, [up], #4		C up[0]
-	mov	cy, #0
-	rsb	r8, r10, #0		C r8 = -inverse
-
-	bne	L(unnorm)
-
-L(norm):
-	subs	n, n, #1
-	mul	r11, r11, r10
-	beq	L(end)
-
-	ALIGN(16)
-L(top):	ldr	r9, [up], #4
-	mov	r12, #0
-	str	r11, [rp], #4
-	umaal	r12, cy, r11, d
-	mul	r11, r9, r10
-	mla	r11, cy, r8, r11
-	subs	n, n, #1
-	bne	L(top)
-
-L(end):	str	r11, [rp]
-	pop	{r10,r11,r6,r7,r8,r9}
-	bx	r14
-
-L(unnorm):
-	push	{r4,r5}
-	rsb	tnc, cnt, #32
-	mov	r5, r11, lsr cnt
-	subs	n, n, #1
-	beq	L(edx)
-
-	ldr	r12, [up], #4
-	orr	r9, r5, r12, lsl tnc
-	mov	r5, r12, lsr cnt
-	mul	r11, r9, r10
-	subs	n, n, #1
-	beq	L(edu)
-
-	ALIGN(16)
-L(tpu):	ldr	r12, [up], #4
-	orr	r9, r5, r12, lsl tnc
-	mov	r5, r12, lsr cnt
-	mov	r12, #0
-	str	r11, [rp], #4
-	umaal	r12, cy, r11, d
-	mul	r11, r9, r10
-	mla	r11, cy, r8, r11
-	subs	n, n, #1
-	bne	L(tpu)
-
-L(edu):	str	r11, [rp], #4
-	mov	r12, #0
-	umaal	r12, cy, r11, d
-	mul	r11, r5, r10
-	mla	r11, cy, r8, r11
-	str	r11, [rp]
-	pop	{r4,r5,r6,r7,r8,r9,r10,r11}
-	bx	r14
-
-L(edx):	mul	r11, r5, r10
-	str	r11, [rp]
-	pop	{r4,r5,r6,r7,r8,r9,r10,r11}
-	bx	r14
-EPILOGUE()
diff -r 10a86ac116c4 -r b6f94acb23c1 mpn/arm/v7a/cora15/bdiv_q_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/bdiv_q_1.asm	Sat Feb 11 21:52:47 2017 +0100
@@ -0,0 +1,36 @@
+dnl  ARM mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`arm/v7a/cora8/bdiv_q_1.asm')
diff -r 10a86ac116c4 -r b6f94acb23c1 mpn/arm/v7a/cora8/bdiv_q_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora8/bdiv_q_1.asm	Sat Feb 11 21:52:47 2017 +0100
@@ -0,0 +1,158 @@
+dnl  ARM v6 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+dnl  This is v6 code but it runs well on just the v7a Cortex-A8, A9, and A15.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb
+C               norm   unorm
+C 1176		 -	 -
+C Cortex-A5	 9	13
+C Cortex-A7	12	18
+C Cortex-A8	13	14
+C Cortex-A9	 9	10		not measured since latest edits
+C Cortex-A15	 7	 7
+C Cortex-A53	16	24
+
+C Architecture requirements:
+C v5	-
+C v5t	clz
+C v5te	-
+C v6	umaal
+C v6t2	-
+C v7a	-
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`d',   `r3')
+define(`di_arg',  `sp[0]')		C	just mpn_pi1_bdiv_q_1
+define(`cnt_arg', `sp[4]')		C	just mpn_pi1_bdiv_q_1
+
+define(`cy',  `r7')
+define(`cnt', `r6')


More information about the gmp-commit mailing list