[Gmp-commit] /var/hg/gmp: Add mpn_tabselect assembly support for powerpc64, x...

mercurial at gmplib.org mercurial at gmplib.org
Tue Nov 15 00:53:14 CET 2011


details:   /var/hg/gmp/rev/bf8095efcd35
changeset: 14441:bf8095efcd35
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Nov 15 00:53:06 2011 +0100
description:
Add mpn_tabselect assembly support for powerpc64, x86, x86_64, ia64.

diffstat:

 ChangeLog                   |    7 +-
 mpn/ia64/tabselect.asm      |  139 ++++++++++++++++++++++++++++++++++++++++++++
 mpn/powerpc64/tabselect.asm |   95 ++++++++++++++++++++++++++++++
 mpn/x86/tabselect.asm       |  104 ++++++++++++++++++++++++++++++++
 mpn/x86_64/tabselect.asm    |  108 ++++++++++++++++++++++++++++++++++
 5 files changed, 452 insertions(+), 1 deletions(-)

diffs (truncated from 482 to 300 lines):

diff -r 1df3d8fef666 -r bf8095efcd35 ChangeLog
--- a/ChangeLog	Tue Nov 15 00:49:29 2011 +0100
+++ b/ChangeLog	Tue Nov 15 00:53:06 2011 +0100
@@ -1,10 +1,15 @@
 2011-11-15  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/powerpc64/tabselect.asm: New file.
+	* mpn/x86_64/tabselect.asm: New file.
+	* mpn/x86/tabselect.asm: New file.
+	* mpn/ia64/tabselect.asm: New file.
+
 	* mpn/asm-defs.m4 (define_mpn): Add tabselect.
 
 	* configure.in (gmp_mpn_functions): Add tabselect.
 	(HAVE_NATIVE): Add entries for addncd_n, subcnd_n, tabselect.
-	
+
 	* mpn/generic/powm_sec.c: Remove mpn_tabselect implementation.
 	* mpn/generic/tabselect.c: New file with removed code.
 
diff -r 1df3d8fef666 -r bf8095efcd35 mpn/ia64/tabselect.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/ia64/tabselect.asm	Tue Nov 15 00:53:06 2011 +0100
@@ -0,0 +1,139 @@
+dnl  IA-64 mpn_tabselect.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:       ?
+C Itanium 2:     5  (estimated)
+
+C NOTES
+C  * Using software pipelining could trivially yield 3 c/l even without
+C    unrolling.  (This code was modelled after the powerpc64 code, for
+C    simplicity.)
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `r32')
+define(`tp',     `r33')
+define(`n',      `r34')
+define(`nents',  `r35')
+define(`which',  `r36')
+
+define(`mask',   `r8')
+
+define(`rp1',     `r32')
+define(`tp1',     `r33')
+define(`rp2',     `r14')
+define(`tp2',     `r15')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_tabselect)
+	.prologue
+	.save	ar.lc, r2
+	.body
+ifdef(`HAVE_ABI_32',`
+.mmi;	addp4	rp = 0, rp		C			M I
+	addp4	tp = 0, tp		C			M I
+	zxt4	n = n			C			I
+.mii;	nop	0
+	zxt4	nents = nents		C			I
+	zxt4	which = which		C			I
+	;;
+')
+.mmi;	add	rp2 = 8, rp1
+	add	tp2 = 8, tp1
+	add	r6 = -2, n
+	;;
+.mmi;	cmp.eq	p10, p0 = 1, n
+	and	r9 = 1, n		C set cr0 for use in inner loop
+	shr.u	r6 = r6, 1		C inner loop count
+	;;
+.mmi;	cmp.eq	p8, p0 = 0, r9
+	sub	which = nents, which
+	shl	n = n, 3
+	;;
+
+L(outer):
+.mmi	cmp.eq	p6, p7 = which, nents	C are we at the selected table entry?
+	nop	0
+	mov	ar.lc = r6		C			I0
+	;;
+.mmb;
+  (p6)	mov	mask = -1
+  (p7)	mov	mask = 0
+  (p8)	br.dptk	L(top)			C branch to loop entry if n even
+	;;
+
+.mmi;	ld8	r16 = [tp1], 8
+	add	tp2 = 8, tp2
+	nop	0
+	;;
+.mmi;	ld8	r18 = [rp1]
+	and	r16 = r16, mask
+	nop	0
+	;;
+.mmi;	andcm	r18 = r18, mask
+	;;
+	or	r16 = r16, r18
+	nop	0
+	;;
+.mmb;	st8	[rp1] = r16, 8
+	add	rp2 = 8, rp2
+  (p10)	br.dpnt	L(end)
+
+	ALIGN(32)
+L(top):
+.mmi;	ld8	r16 = [tp1], 16
+	ld8	r17 = [tp2], 16
+	nop	0
+	;;
+.mmi;	ld8	r18 = [rp1]
+	and	r16 = r16, mask
+	nop	0
+.mmi;	ld8	r19 = [rp2]
+	and	r17 = r17, mask
+	nop	0
+	;;
+.mmi;	andcm	r18 = r18, mask
+	andcm	r19 = r19, mask
+	nop	0
+	;;
+.mmi;	or	r16 = r16, r18
+	or	r17 = r17, r19
+	nop	0
+	;;
+.mmb;	st8	[rp1] = r16, 16
+	st8	[rp2] = r17, 16
+	br.cloop.dptk	L(top)
+	;;
+L(end):
+.mmi;	sub	rp1 = rp1, n		C move rp back to beginning
+	sub	rp2 = rp2, n		C move rp back to beginning
+	cmp.ne	p9, p0 = 1, nents
+.mmb;	add	nents = -1, nents
+	nop	0
+  (p9)	br.dptk	L(outer)
+	;;
+
+.mib;	nop	0
+	nop	0
+	br.ret.sptk.many b0
+EPILOGUE()
diff -r 1df3d8fef666 -r bf8095efcd35 mpn/powerpc64/tabselect.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/tabselect.asm	Tue Nov 15 00:53:06 2011 +0100
@@ -0,0 +1,95 @@
+dnl  PowerPC-64 mpn_tabselect.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          ?
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 ?
+
+C NOTES
+C  * This has not been tuned for any specific processor.  Its speed should not
+C    be too bad, though.
+C  * Using VMX could result in significant speedup for certain CPUs.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `r3')
+define(`tp',     `r4')
+define(`n',      `r5')
+define(`nents',  `r6')
+define(`which',  `r7')
+
+define(`mask',   `r8')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_tabselect)
+	addi	r0, n, 1
+	srdi	r0, r0, 1		C inner loop count
+	andi.	r9, n, 1		C set cr0 for use in inner loop
+	subf	which, nents, which
+	sldi	n, n, 3
+
+L(outer):
+	mtctr	r0			C put inner loop count in ctr
+
+	add	r9, which, nents	C are we at the selected table entry?
+	addic	r9, r9, -1		C set CF iff not selected entry
+	subfe	mask, r0, r0
+
+	beq	cr0, L(top)		C branch to loop entry if n even
+
+	ld	r9, 0(tp)
+	and	r9, r9, mask
+	ld	r11, 0(rp)
+	andc	r11, r11, mask
+	or	r9, r9, r11
+	std	r9, 0(rp)
+	addi	tp, tp, 8
+	addi	rp, rp, 8
+	bdz	L(end)
+
+	ALIGN(16)
+L(top):	ld	r9, 0(tp)
+	ld	r10, 8(tp)
+	and	r9, r9, mask
+	and	r10, r10, mask
+	ld	r11, 0(rp)
+	ld	r12, 8(rp)
+	andc	r11, r11, mask
+	andc	r12, r12, mask
+	or	r9, r9, r11
+	or	r10, r10, r12
+	std	r9, 0(rp)
+	std	r10, 8(rp)
+	addi	tp, tp, 16
+	addi	rp, rp, 16
+	bdnz	L(top)
+
+L(end):	subf	rp, n, rp		C move rp back to beginning
+	addi	nents, nents, -1
+	cmpdi	cr6, nents, 0
+	bne	cr6, L(outer)
+
+	blr
+EPILOGUE()
diff -r 1df3d8fef666 -r bf8095efcd35 mpn/x86/tabselect.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/tabselect.asm	Tue Nov 15 00:53:06 2011 +0100
@@ -0,0 +1,104 @@
+dnl  x86 mpn_tabselect.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C			    cycles/limb
+C P5				 ?
+C P6 model 0-8,10-12		 ?
+C P6 model 9  (Banias)		 ?
+C P6 model 13 (Dothan)		 ?
+C P4 model 0  (Willamette)	 ?
+C P4 model 1  (?)		 ?
+C P4 model 2  (Northwood)	 ?
+C P4 model 3  (Prescott)	 ?
+C P4 model 4  (Nocona)		 ?
+C Intel Atom			 ?
+C AMD K6			 ?


More information about the gmp-commit mailing list