[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Feb 23 03:25:20 UTC 2017


details:   /var/hg/gmp/rev/b742ff45f520
changeset: 17291:b742ff45f520
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Feb 22 23:44:59 2017 +0100
description:
Provide common aorsmul_1 which runs well on A53, A57, and X-Gene.

details:   /var/hg/gmp/rev/1084c3789acf
changeset: 17292:1084c3789acf
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Feb 23 04:24:24 2017 +0100
description:
Provide silvermont gmp-mparam.h.

details:   /var/hg/gmp/rev/dd1026a8cdfe
changeset: 17293:dd1026a8cdfe
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Feb 23 04:24:46 2017 +0100
description:
ChangeLog

diffstat:

 ChangeLog                          |   33 +++++++
 mpn/arm64/aorsmul_1.asm            |   64 ++++++++-----
 mpn/arm64/xgene1/aorsmul_1.asm     |  125 ---------------------------
 mpn/x86_64/silvermont/gmp-mparam.h |  170 +++++++++++++++++++++++++++++++++++++
 4 files changed, 244 insertions(+), 148 deletions(-)

diffs (truncated from 462 to 300 lines):

diff -r 2567c1119e14 -r dd1026a8cdfe ChangeLog
--- a/ChangeLog	Wed Feb 22 02:27:55 2017 +0100
+++ b/ChangeLog	Thu Feb 23 04:24:46 2017 +0100
@@ -1,3 +1,36 @@
+2017-02-23  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/silvermont/gmp-mparam.h: New file.
+
+2017-02-22  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm64/aorsmul_1.asm: Rewrite.
+
+	* mpn/arm64/lshiftc.asm: New file.
+
+2017-02-21  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm64/lshift.asm: Rewrite.
+	* mpn/arm64/rshift.asm: Rewrite.
+
+	* mpn/arm64/rsh1aors_n.asm: New file.
+
+2017-02-19  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm64/mul_1.asm: Rewrite.
+	* mpn/arm64/xgene1/mul_1.asm: Remove.
+
+2017-02-17  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm64/cora53/cnd_aors_n.asm: Moved from "..".
+
+	* mpn/arm64/xgene1/cnd_aors_n.asm: Remove file since default code
+	performs better.
+
+	* mpn/arm64/cnd_aors_n.asm: Rewrite.
+
+	* mpn/arm64/logops_n.asm: Rewrite based on new aors_n.asm.
+
 2017-02-16  Pedro Gimeno  <pggimeno at wanadoo.es>
 
 	* rand/randmt.c (__gmp_randiset_mt): Set generator functions from
diff -r 2567c1119e14 -r dd1026a8cdfe mpn/arm64/aorsmul_1.asm
--- a/mpn/arm64/aorsmul_1.asm	Wed Feb 22 02:27:55 2017 +0100
+++ b/mpn/arm64/aorsmul_1.asm	Thu Feb 23 04:24:46 2017 +0100
@@ -1,8 +1,8 @@
-dnl  ARM64 mpn_submul_1
+dnl  ARM64 mpn_addmul_1 and mpn_submul_1
 
 dnl  Contributed to the GNU project by Torbjörn Granlund.
 
-dnl  Copyright 2013 Free Software Foundation, Inc.
+dnl  Copyright 2013, 2015, 2017 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -33,8 +33,16 @@
 include(`../config.m4')
 
 C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
+C Cortex-A53	9.3-9.8
+C Cortex-A57	 7.0
+C X-Gene	 5.0
+
+C NOTES
+C  * It is possible to keep the carry chain alive between the addition blocks
+C    and thus avoid csinc, but only for addmul_1.  Since that saves no time
+C    on the tested pipelines, we keep addmul_1 and submul_1 similar.
+C  * We could separate feed-in into 4 blocks, one for each residue (mod 4).
+C    That is likely to save a few cycles.
 
 changecom(blah)
 
@@ -57,15 +65,14 @@
 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
 
 PROLOGUE(func)
-	mov	x15, #0
+	adds	x15, xzr, xzr
 
 	tbz	n, #0, L(1)
 
 	ldr	x4, [up],#8
 	mul	x8, x4, v0
 	umulh	x12, x4, v0
-	adds	x8, x8, x15
-	ldr	x4, [rp,#0]
+	ldr	x4, [rp]
 	ADDSUB	x8, x4, x8
 	csinc	x15, x12, x12, COND
 	str	x8, [rp],#8
@@ -79,19 +86,32 @@
 	umulh	x13, x5, v0
 	adds	x8, x8, x15
 	adcs	x9, x9, x12
-	ldp	x4, x5, [rp,#0]
+	ldp	x4, x5, [rp]
 	adc	x15, x13, xzr
-	sub	n, n, #1
 	ADDSUB	x8, x4, x8
 	ADDSUBC	x9, x5, x9
 	csinc	x15, x15, x15, COND
 	stp	x8, x9, [rp],#16
 
 L(2):	lsr	n, n, #2
-	cbz	n, L(end)
+	cbz	n, L(le3)
+	ldp	x4, x5, [up],#32
+	ldp	x6, x7, [up,#-16]
+	b	L(mid)
+L(le3):	mov	x0, x15
+	ret
 
-L(top):	ldp	x4, x5, [up],#16
-	ldp	x6, x7, [up],#16
+	ALIGN(16)
+L(top):	ldp	x4, x5, [up],#32
+	ldp	x6, x7, [up,#-16]
+	ADDSUB	x8, x16, x8
+	ADDSUBC	x9, x17, x9
+	stp	x8, x9, [rp],#32
+	ADDSUBC	x10, x12, x10
+	ADDSUBC	x11, x13, x11
+	stp	x10, x11, [rp,#-16]
+	csinc	x15, x15, x15, COND
+L(mid):	sub	n, n, #1
 	mul	x8, x4, v0
 	umulh	x12, x4, v0
 	mul	x9, x5, v0
@@ -103,20 +123,18 @@
 	mul	x11, x7, v0
 	umulh	x15, x7, v0
 	adcs	x10, x10, x13
-	ldp	x4, x5, [rp,#0]
+	ldp	x16, x17, [rp]
 	adcs	x11, x11, x14
-	ldp	x6, x7, [rp,#16]
+	ldp	x12, x13, [rp,#16]
 	adc	x15, x15, xzr
-	sub	n, n, #1
-	ADDSUB	x8, x4, x8
-	ADDSUBC	x9, x5, x9
-	ADDSUBC	x10, x6, x10
-	ADDSUBC	x11, x7, x11
-	stp	x8, x9, [rp],#16
-	csinc	x15, x15, x15, COND
-	stp	x10, x11, [rp],#16
 	cbnz	n, L(top)
 
-L(end):	mov	x0, x15
+	ADDSUB	x8, x16, x8
+	ADDSUBC	x9, x17, x9
+	ADDSUBC	x10, x12, x10
+	ADDSUBC	x11, x13, x11
+	stp	x8, x9, [rp]
+	stp	x10, x11, [rp,#16]
+	csinc	x0, x15, x15, COND
 	ret
 EPILOGUE()
diff -r 2567c1119e14 -r dd1026a8cdfe mpn/arm64/xgene1/aorsmul_1.asm
--- a/mpn/arm64/xgene1/aorsmul_1.asm	Wed Feb 22 02:27:55 2017 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,125 +0,0 @@
-dnl  ARM64 mpn_addmul_1 and mpn_submul_1
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013, 2015 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-C X-Gene	 5.75
-
-changecom(blah)
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`n',  `x2')
-define(`v0', `x3')
-
-ifdef(`OPERATION_addmul_1', `
-  define(`ADDSUB',	adds)
-  define(`ADDSUBC',	adcs)
-  define(`COND',	`cc')
-  define(`func',	mpn_addmul_1)')
-ifdef(`OPERATION_submul_1', `
-  define(`ADDSUB',	subs)
-  define(`ADDSUBC',	sbcs)
-  define(`COND',	`cs')
-  define(`func',	mpn_submul_1)')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-PROLOGUE(func)
-	mov	x15, #0
-
-	tbz	n, #0, L(1)
-
-	ldr	x4, [up],#8
-	mul	x8, x4, v0
-	umulh	x12, x4, v0
-	adds	x8, x8, x15
-	ldr	x4, [rp,#0]
-	ADDSUB	x8, x4, x8
-	csinc	x15, x12, x12, COND
-	str	x8, [rp],#8
-
-L(1):	tbz	n, #1, L(2)
-
-	ldp	x4, x5, [up],#16
-	mul	x8, x4, v0
-	umulh	x12, x4, v0
-	mul	x9, x5, v0
-	umulh	x13, x5, v0
-	adds	x8, x8, x15
-	adcs	x9, x9, x12
-	ldp	x4, x5, [rp,#0]
-	adc	x15, x13, xzr
-	sub	n, n, #1
-	ADDSUB	x8, x4, x8
-	ADDSUBC	x9, x5, x9
-	csinc	x15, x15, x15, COND
-	stp	x8, x9, [rp],#16
-
-L(2):	lsr	n, n, #2
-	cbz	n, L(end)
-
-L(top):	ldp	x4, x5, [up]
-	ldp	x6, x7, [up,#16]
-	add	up, up, #32
-	mul	x8, x4, v0
-	umulh	x12, x4, v0
-	mul	x9, x5, v0
-	umulh	x13, x5, v0
-	adds	x8, x8, x15
-	mul	x10, x6, v0
-	umulh	x14, x6, v0
-	adcs	x9, x9, x12
-	mul	x11, x7, v0
-	umulh	x15, x7, v0
-	adcs	x10, x10, x13
-	ldp	x4, x5, [rp,#0]
-	adcs	x11, x11, x14
-	ldp	x6, x7, [rp,#16]
-	adc	x15, x15, xzr
-	ADDSUB	x8, x4, x8
-	ADDSUBC	x9, x5, x9
-	ADDSUBC	x10, x6, x10
-	ADDSUBC	x11, x7, x11
-	stp	x8, x9, [rp]
-	csinc	x15, x15, x15, COND
-	stp	x10, x11, [rp,#16]
-	add	rp, rp, #32
-	sub	n, n, #1
-	cbnz	n, L(top)
-
-L(end):	mov	x0, x15
-	ret
-EPILOGUE()
diff -r 2567c1119e14 -r dd1026a8cdfe mpn/x86_64/silvermont/gmp-mparam.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/silvermont/gmp-mparam.h	Thu Feb 23 04:24:46 2017 +0100
@@ -0,0 +1,170 @@
+/* AMD Silvermont gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:


More information about the gmp-commit mailing list