[Gmp-commit] /var/hg/gmp: Move new p9/addmul_1.asm to proper place.

mercurial at gmplib.org mercurial at gmplib.org
Sun Dec 31 16:46:51 UTC 2017


details:   /var/hg/gmp/rev/bb6ab005e1fe
changeset: 17518:bb6ab005e1fe
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sun Dec 31 16:00:26 2017 +0100
description:
Move new p9/addmul_1.asm to proper place.

diffstat:

 mpn/powerpc64/mode64/p9/addmul_1.asm |  136 +++++++++++++++++++++++++++++++++++
 mpn/powerpc64/p9/addmul_1.asm        |  136 -----------------------------------
 2 files changed, 136 insertions(+), 136 deletions(-)

diffs (280 lines):

diff -r 8df00acd12ca -r bb6ab005e1fe mpn/powerpc64/mode64/p9/addmul_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/mode64/p9/addmul_1.asm	Sun Dec 31 16:00:26 2017 +0100
@@ -0,0 +1,136 @@
+dnl  POWER9 mpn_addmul_1.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630		 -
+C POWER4/PPC970		 -
+C POWER5		 -
+C POWER6		 -
+C POWER7		 -
+C POWER8		 -
+C POWER9		 ?
+
+C TODO
+C  * Schedule for POWER9 pipeline.
+C  * Unroll to at least 4x if that proves beneficial.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`v0', `r6')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	std	r31, -8(r1)
+
+	cmpdi	cr6, n, 2
+
+	addi	r0, n, -1
+	srdi	r0, r0, 1
+	mtctr	r0
+
+	rldicl.	r0, n, 0,63	C r0 = n & 3, set cr0
+	bne	cr0, L(b1)
+
+L(b0):	ld	r10, 0(rp)
+	ld	r12, 0(up)
+	ld	r11, 8(rp)
+	ld	r31, 8(up)
+	maddld	r0, r12, v0, r10
+	maddhdu	r7, r12, v0, r10
+	ble	cr6, L(2)
+	ld	r10, 16(rp)
+	ld	r12, 16(up)
+	maddld	r8, r31, v0, r11
+	maddhdu	r5, r31, v0, r11
+	addic	up, up, 16
+	addi	rp, rp, -8
+	b	L(mid)
+
+L(b1):	ld	r11, 0(rp)
+	ld	r31, 0(up)
+	ble	cr6, L(1)
+	ld	r10, 8(rp)
+	ld	r12, 8(up)
+	maddld	r0, r31, v0, r11
+	maddhdu	r5, r31, v0, r11
+	ld	r11, 16(rp)
+	ld	r31, 16(up)
+	maddld	r9, r12, v0, r10
+	maddhdu	r7, r12, v0, r10
+	addic	up, up, 24
+	bdz	L(end)
+
+	ALIGN(16)
+L(top):	ld	r10, 24(rp)
+	ld	r12, 0(up)
+	std	r0, 0(rp)
+	maddld	r8, r31, v0, r11	C W:0,2,4
+	adde	r0, r5, r9
+	maddhdu	r5, r31, v0, r11	C W:1,3,5
+L(mid):	ld	r11, 32(rp)
+	ld	r31, 8(up)
+	std	r0, 8(rp)
+	maddld	r9, r12, v0, r10	C W:1,3,5
+	adde	r0, r7, r8
+	maddhdu	r7, r12, v0, r10	C W:2,4,6
+	addi	rp, rp, 16
+	addi	up, up, 16
+	bdnz	L(top)
+
+L(end):	std	r0, 0(rp)
+	maddld	r8, r31, v0, r11
+	adde	r0, r5, r9
+	maddhdu	r5, r31, v0, r11
+	std	r0, 8(rp)
+	adde	r0, r7, r8
+	std	r0, 16(rp)
+	addze	r3, r5
+	ld	r31, -8(r1)
+	blr
+
+L(2):	maddld	r8, r31, v0, r11
+	maddhdu	r5, r31, v0, r11
+	std	r0, 0(rp)
+	addc	r0, r7, r8
+	std	r0, 8(rp)
+	addze	r3, r5
+	ld	r31, -8(r1)
+	blr
+
+L(1):	maddld	r0,  r31, v0, r11
+	std	r0, 0(rp)
+	maddhdu	r3, r31, v0, r11
+	ld	r31, -8(r1)
+	blr
+EPILOGUE()
diff -r 8df00acd12ca -r bb6ab005e1fe mpn/powerpc64/p9/addmul_1.asm
--- a/mpn/powerpc64/p9/addmul_1.asm	Sat Dec 30 15:48:40 2017 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-dnl  POWER9 mpn_addmul_1.
-
-dnl  Copyright 2017 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630		 -
-C POWER4/PPC970		 -
-C POWER5		 -
-C POWER6		 -
-C POWER7		 -
-C POWER8		 -
-C POWER9		 ?
-
-C TODO
-C  * Schedule for POWER9 pipeline.
-C  * Unroll to at least 4x if that proves beneficial.
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n',  `r5')
-define(`v0', `r6')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	std	r31, -8(r1)
-
-	cmpdi	cr6, n, 2
-
-	addi	r0, n, -1
-	srdi	r0, r0, 1
-	mtctr	r0
-
-	rldicl.	r0, n, 0,63	C r0 = n & 3, set cr0
-	bne	cr0, L(b1)
-
-L(b0):	ld	r10, 0(rp)
-	ld	r12, 0(up)
-	ld	r11, 8(rp)
-	ld	r31, 8(up)
-	maddld	r0, r12, v0, r10
-	maddhdu	r7, r12, v0, r10
-	ble	cr6, L(2)
-	ld	r10, 16(rp)
-	ld	r12, 16(up)
-	maddld	r8, r31, v0, r11
-	maddhdu	r5, r31, v0, r11
-	addic	up, up, 16
-	addi	rp, rp, -8
-	b	L(mid)
-
-L(b1):	ld	r11, 0(rp)
-	ld	r31, 0(up)
-	ble	cr6, L(1)
-	ld	r10, 8(rp)
-	ld	r12, 8(up)
-	maddld	r0, r31, v0, r11
-	maddhdu	r5, r31, v0, r11
-	ld	r11, 16(rp)
-	ld	r31, 16(up)
-	maddld	r9, r12, v0, r10
-	maddhdu	r7, r12, v0, r10
-	addic	up, up, 24
-	bdz	L(end)
-
-	ALIGN(16)
-L(top):	ld	r10, 24(rp)
-	ld	r12, 0(up)
-	std	r0, 0(rp)
-	maddld	r8, r31, v0, r11	C W:0,2,4
-	adde	r0, r5, r9
-	maddhdu	r5, r31, v0, r11	C W:1,3,5
-L(mid):	ld	r11, 32(rp)
-	ld	r31, 8(up)
-	std	r0, 8(rp)
-	maddld	r9, r12, v0, r10	C W:1,3,5
-	adde	r0, r7, r8
-	maddhdu	r7, r12, v0, r10	C W:2,4,6
-	addi	rp, rp, 16
-	addi	up, up, 16
-	bdnz	L(top)
-
-L(end):	std	r0, 0(rp)
-	maddld	r8, r31, v0, r11
-	adde	r0, r5, r9
-	maddhdu	r5, r31, v0, r11
-	std	r0, 8(rp)
-	adde	r0, r7, r8
-	std	r0, 16(rp)
-	addze	r3, r5
-	ld	r31, -8(r1)
-	blr
-
-L(2):	maddld	r8, r31, v0, r11
-	maddhdu	r5, r31, v0, r11
-	std	r0, 0(rp)
-	addc	r0, r7, r8
-	std	r0, 8(rp)
-	addze	r3, r5
-	ld	r31, -8(r1)
-	blr
-
-L(1):	maddld	r0,  r31, v0, r11
-	std	r0, 0(rp)
-	maddhdu	r3, r31, v0, r11
-	ld	r31, -8(r1)
-	blr
-EPILOGUE()


More information about the gmp-commit mailing list