[Gmp-commit] /home/hgfiles/gmp: New files.
mercurial at gmplib.org
mercurial at gmplib.org
Sun May 2 11:24:33 CEST 2010
details: /home/hgfiles/gmp/rev/3ee4dc316ee6
changeset: 13586:3ee4dc316ee6
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 02 11:24:00 2010 +0200
description:
New files.
diffstat:
ChangeLog | 6 ++
mpn/powerpc64/p6/lshift.asm | 113 +++++++++++++++++++++++++++++++++++++++++
mpn/powerpc64/p6/lshiftc.asm | 116 +++++++++++++++++++++++++++++++++++++++++++
mpn/powerpc64/p6/rshift.asm | 111 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 346 insertions(+), 0 deletions(-)
diffs (truncated from 365 to 300 lines):
diff -r c2615a31960d -r 3ee4dc316ee6 ChangeLog
--- a/ChangeLog Fri Apr 30 19:27:41 2010 +0200
+++ b/ChangeLog Sun May 02 11:24:00 2010 +0200
@@ -1,3 +1,9 @@
+2010-05-02 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/p6/lshift.asm: New file.
+ * mpn/powerpc64/p6/lshiftc.asm: Likewise.
+ * mpn/powerpc64/p6/rshift.asm: Likewise.
+
2010-04-30 Torbjorn Granlund <tege at gmplib.org>
* configure.in (powerpc64): Support CPU specific mode-less subdirs.
diff -r c2615a31960d -r 3ee4dc316ee6 mpn/powerpc64/p6/lshift.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/p6/lshift.asm Sun May 02 11:24:00 2010 +0200
@@ -0,0 +1,113 @@
+dnl PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
+
+dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 4
+
+C TODO
+C * Micro-optimise header code
+C * Write analogous lshiftc.asm
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp', `r7')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ mflr r12
+ bcl 20, 31, L(r) C get pc using a local "call"
+L(r): mflr r11
+ sldi r0, n, 3
+ sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
+ addi r11, r11, L(e1)-Lr-64 C address of L(e1) label in SHIFT(1)
+ add up, up, r0 C make up point at end of up[]
+ add r11, r11, r10 C address of L(oN) for N = cnt
+ add rp, rp_param, r0 C make rp point at end of rp[]
+ subfic tnc, cnt, 64
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ beq L(evn)
+
+L(odd): ld r9, -8(up)
+ cmpdi cr0, n, 1 C n = 1?
+ beq L(1)
+ ld r8, -16(up)
+ addi r11, r11, L(o1)-L(e1)
+ mtlr r11
+ srdi r11, n, 1
+ srd r3, r9, tnc C retval
+ addi up, up, 8
+ addi rp, rp, -8
+ mtctr r11
+ blr C branch to L(oN)
+
+L(evn): ld r8, -8(up)
+ ld r9, -16(up)
+ mtlr r11
+ addi n, n, 1
+ srdi r10, n, 1
+ srd r3, r8, tnc C retval
+ mtctr r10
+ blr C branch to L(eN)
+
+L(1): srd r3, r9, tnc C retval
+ sld r8, r9, cnt
+ std r8, -8(rp)
+ mtlr r12
+ blr
+
+
+define(SHIFT,`
+L(lo$1):ld r8, -24(up)
+ std r11, -8(rp)
+ addi rp, rp, -16
+L(o$1): srdi r10, r8, eval(64-$1)
+ rldimi r10, r9, $1, 0
+ ld r9, -32(up)
+ addi up, up, -16
+ std r10, 0(rp)
+L(e$1): srdi r11, r9, eval(64-$1)
+ rldimi r11, r8, $1, 0
+ bdnz L(lo$1)
+ std r11, -8(rp)
+ sldi r10, r9, $1
+ b L(com)
+ nop
+ nop
+')
+
+ ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com): std r10, -16(rp)
+ mtlr r12
+ blr
+EPILOGUE()
diff -r c2615a31960d -r 3ee4dc316ee6 mpn/powerpc64/p6/lshiftc.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/p6/lshiftc.asm Sun May 02 11:24:00 2010 +0200
@@ -0,0 +1,116 @@
+dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
+
+dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 4
+
+C TODO
+C * Micro-optimise header code
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp', `r7')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ mflr r12
+ bcl 20, 31, L(r) C get pc using a local "call"
+L(r): mflr r11
+ sldi r0, n, 3
+ sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
+ addi r11, r11, L(e1)-Lr-64 C address of L(e1) label in SHIFT(1)
+ add up, up, r0 C make up point at end of up[]
+ add r11, r11, r10 C address of L(oN) for N = cnt
+ add rp, rp_param, r0 C make rp point at end of rp[]
+ subfic tnc, cnt, 64
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ beq L(evn)
+
+L(odd): ld r9, -8(up)
+ cmpdi cr0, n, 1 C n = 1?
+ beq L(1)
+ ld r8, -16(up)
+ addi r11, r11, L(o1)-L(e1)
+ mtlr r11
+ srdi r11, n, 1
+ srd r3, r9, tnc C retval
+ addi up, up, 8
+ addi rp, rp, -8
+ mtctr r11
+ blr C branch to L(oN)
+
+L(evn): ld r8, -8(up)
+ ld r9, -16(up)
+ mtlr r11
+ addi n, n, 1
+ srdi r10, n, 1
+ srd r3, r8, tnc C retval
+ mtctr r10
+ blr C branch to L(eN)
+
+L(1): srd r3, r9, tnc C retval
+ sld r8, r9, cnt
+ nor r8, r8, r8
+ std r8, -8(rp)
+ mtlr r12
+ blr
+
+
+define(SHIFT,`
+L(lo$1):ld r8, -24(up)
+ nor r11, r11, r11
+ std r11, -8(rp)
+ addi rp, rp, -16
+L(o$1): srdi r10, r8, eval(64-$1)
+ rldimi r10, r9, $1, 0
+ ld r9, -32(up)
+ addi up, up, -16
+ nor r10, r10, r10
+ std r10, 0(rp)
+L(e$1): srdi r11, r9, eval(64-$1)
+ rldimi r11, r8, $1, 0
+ bdnz L(lo$1)
+ sldi r10, r9, $1
+ b L(com)
+ nop
+')
+
+ ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com): nor r11, r11, r11
+ nor r10, r10, r10
+ std r11, -8(rp)
+ std r10, -16(rp)
+ mtlr r12
+ blr
+EPILOGUE()
diff -r c2615a31960d -r 3ee4dc316ee6 mpn/powerpc64/p6/rshift.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/p6/rshift.asm Sun May 02 11:24:00 2010 +0200
@@ -0,0 +1,111 @@
+dnl PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
+
+dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2
+C POWER6 3.5 (mysteriously 3.0 for cnt=1)
+
+C TODO
+C * Micro-optimise header code
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp', `r7')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mflr r12
+ bcl 20, 31, L(r) C get pc using a local "call"
More information about the gmp-commit
mailing list