[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun May 13 01:17:55 CEST 2012
details: /var/hg/gmp/rev/b5f1ad1e1745
changeset: 14965:b5f1ad1e1745
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 13 01:16:38 2012 +0200
description:
Add ARM v5 mod_1s_2p.
details: /var/hg/gmp/rev/b124cc447937
changeset: 14966:b124cc447937
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 13 01:17:52 2012 +0200
description:
Add ARM rsh1add_n and rsh1sub_n.
diffstat:
ChangeLog | 5 +
mpn/arm/rsh1aors_n.asm | 112 ++++++++++++++++++++++++++++++++++++++
mpn/arm/v5/mod_1_2.asm | 144 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 261 insertions(+), 0 deletions(-)
diffs (276 lines):
diff -r 96ad88f37fa2 -r b124cc447937 ChangeLog
--- a/ChangeLog Fri May 11 19:51:40 2012 +0200
+++ b/ChangeLog Sun May 13 01:17:52 2012 +0200
@@ -1,3 +1,8 @@
+2012-05-13 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/arm/rsh1aors_n.asm: New file.
+ * mpn/arm/v5/mod_1_2.asm: New file.
+
2012-05-11 Marc Glisse <marc.glisse at inria.fr>
* gmpxx.h (explicit operator bool): New functions.
diff -r 96ad88f37fa2 -r b124cc447937 mpn/arm/rsh1aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/rsh1aors_n.asm Sun May 13 01:17:52 2012 +0200
@@ -0,0 +1,112 @@
+dnl ARM mpn_rsh1add_n and mpn_rsh1sub_n.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 3.64-3.7
+C Cortex-A15 ?
+
+C TODO
+C * Not optimised.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`RSTCY', `cmn $1, $1')
+ define(`func', mpn_rsh1add_n)
+ define(`func_nc', mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`RSTCY',
+ `mvn $2, #0x80000000
+ cmp $2, $1')
+ define(`func', mpn_rsh1sub_n)
+ define(`func_nc', mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ push {r4-r11}
+ ldr r4, [up], #4
+ ldr r8, [vp], #4
+ ADDSUB r4, r4, r8
+ rrxs r12, r7
+ and r11, r4, #1 C return value
+ subs n, n, #4
+ blo L(end)
+
+L(top): ldmia up!, {r5,r6,r7}
+ ldmia vp!, {r8,r9,r10}
+ cmn r12, r12
+ ADDSUBC r5, r5, r8
+ ADDSUBC r6, r6, r9
+ ADDSUBC r7, r7, r10
+ rrxs r12, r7
+ rrxs r6, r6
+ rrxs r5, r5
+ rrxs r4, r4
+ subs n, n, #3
+ stmia rp!, {r4,r5,r6}
+ mov r4, r7
+ bhs L(top)
+
+L(end): cmn n, #2
+ bls L(e2)
+ ldm up, {r5,r6}
+ ldm vp, {r8,r9}
+ cmn r12, r12
+ ADDSUBC r5, r5, r8
+ ADDSUBC r6, r6, r9
+ rrxs r12, r6
+ rrxs r5, r5
+ rrxs r4, r4
+ stmia rp!, {r4,r5}
+ mov r4, r6
+ b L(e1)
+
+L(e2): bne L(e1)
+ ldr r5, [up, #0]
+ ldr r8, [vp, #0]
+ cmn r12, r12
+ ADDSUBC r5, r5, r8
+ rrxs r12, r5
+ rrxs r4, r4
+ str r4, [rp], #4
+ mov r4, r5
+
+L(e1): RSTCY( r12, r1)
+ rrxs r4, r4
+ str r4, [rp, #0]
+ mov r0, r11
+ pop {r4-r11}
+ bx r14
+EPILOGUE()
diff -r 96ad88f37fa2 -r b124cc447937 mpn/arm/v5/mod_1_2.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v5/mod_1_2.asm Sun May 13 01:17:52 2012 +0200
@@ -0,0 +1,144 @@
+dnl ARM mpn_mod_1s_2p
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 4.25
+C Cortex-A15 ?
+
+define(`ap', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_2p)
+ push {r4-r10}
+ tst n, #1
+ add r7, r3, #8
+ ldmia r7, {r7, r8, r12} C load B1, B2, B3
+ add ap, ap, n, lsl #2 C put ap at operand end
+ beq L(evn)
+
+L(odd): subs n, n, #1
+ beq L(1)
+ ldmdb ap!, {r4,r6,r9}
+ mov r10, #0
+ umlal r4, r10, r6, r7
+ umlal r4, r10, r9, r8
+ b L(com)
+
+L(evn): ldmdb ap!, {r4,r10}
+L(com): subs n, n, #2
+ ble L(end)
+ ldmdb ap!, {r5,r6}
+ b L(mid)
+
+L(top): mov r9, #0
+ umlal r5, r9, r6, r7 C B1
+ umlal r5, r9, r4, r8 C B2
+ ldmdb ap!, {r4,r6}
+ umlal r5, r9, r10, r12 C B3
+ ble L(xit)
+ mov r10, #0
+ umlal r4, r10, r6, r7 C B1
+ umlal r4, r10, r5, r8 C B2
+ ldmdb ap!, {r5,r6}
+ umlal r4, r10, r9, r12 C B3
+L(mid): subs n, n, #4
+ bge L(top)
+
+ mov r9, #0
+ umlal r5, r9, r6, r7 C B1
+ umlal r5, r9, r4, r8 C B2
+ umlal r5, r9, r10, r12 C B3
+ mov r4, r5
+
+L(end): movge r9, r10 C executed iff coming via xit
+ ldr r6, [r3, #4] C cps[1] = cnt
+ mov r5, #0
+ umlal r4, r5, r9, r7
+ mov r7, r5, lsl r6
+L(x): rsb r1, r6, #32
+ orr r8, r7, r4, lsr r1
+ mov r9, r4, lsl r6
+ ldr r5, [r3, #0]
+ add r0, r8, #1
+ umull r12, r1, r8, r5
+ adds r4, r12, r9
+ adc r1, r1, r0
+ mul r5, r2, r1
+ sub r9, r9, r5
+ cmp r9, r4
+ addhi r9, r9, r2
+ cmp r2, r9
+ subls r9, r9, r2
+ mov r0, r9, lsr r6
+ pop {r4-r10}
+ bx r14
+
+L(xit): mov r10, #0
+ umlal r4, r10, r6, r7 C B1
+ umlal r4, r10, r5, r8 C B2
+ umlal r4, r10, r9, r12 C B3
+ b L(end)
+
+L(1): ldr r6, [r3, #4] C cps[1] = cnt
+ ldr r4, [ap, #-4] C ap[0]
+ mov r7, #0
+ b L(x)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_2p_cps)
+ push {r4-r8, r14}
+ clz r4, r1
+ mov r5, r1, lsl r4 C b <<= cnt
+ mov r6, r0 C r6 = cps
+ mov r0, r5
+ bl mpn_invert_limb
+ rsb r3, r4, #32
+ mov r3, r0, lsr r3
+ mov r2, #1
+ orr r3, r3, r2, lsl r4
+ rsb r1, r5, #0
+ mul r2, r1, r3
+ umull r3, r12, r2, r0
+ add r12, r2, r12
+ mvn r12, r12
+ mul r1, r5, r12
+ cmp r1, r3
+ addhi r1, r1, r5
+ umull r12, r7, r1, r0
+ add r7, r1, r7
+ mvn r7, r7
+ mul r3, r5, r7
+ cmp r3, r12
+ addhi r3, r3, r5
+ mov r5, r2, lsr r4
+ mov r7, r1, lsr r4
+ mov r8, r3, lsr r4
+ stmia r6, {r0,r4,r5,r7,r8} C fill cps
+ pop {r4-r8, pc}
+EPILOGUE()
More information about the gmp-commit
mailing list