[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Apr 29 03:05:54 CEST 2012
details: /var/hg/gmp/rev/3f1237b265bd
changeset: 14905:3f1237b265bd
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 29 00:05:19 2012 +0200
description:
Correct header comment.
details: /var/hg/gmp/rev/521b092f6517
changeset: 14906:521b092f6517
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 29 03:05:52 2012 +0200
description:
Add ARM mod_34lsub1.
diffstat:
ChangeLog | 2 +
mpn/arm/mod_34lsub1.asm | 109 ++++++++++++++++++++++++++++++++++++++++++++++
mpn/arm/v6t2/divrem_1.asm | 3 +-
3 files changed, 112 insertions(+), 2 deletions(-)
diffs (141 lines):
diff -r f331c6cb447a -r 521b092f6517 ChangeLog
--- a/ChangeLog Sun Apr 29 00:03:29 2012 +0200
+++ b/ChangeLog Sun Apr 29 03:05:52 2012 +0200
@@ -1,5 +1,7 @@
2012-04-29 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/arm/mod_34lsub1.asm: New file.
+
* mpn/arm/v6t2/divrem_1.asm: New file.
2012-04-28 Torbjorn Granlund <tege at gmplib.org>
diff -r f331c6cb447a -r 521b092f6517 mpn/arm/mod_34lsub1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/mod_34lsub1.asm Sun Apr 29 03:05:52 2012 +0200
@@ -0,0 +1,109 @@
+dnl ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 1.33
+C Cortex-A15 ?
+
+define(`ap', r0)
+define(`n', r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C * Write cleverer summation code.
+C * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+ push { r4, r5, r6, r7 }
+
+ subs n, n, #3
+ mov r7, #0
+ blt L(le2) C n <= 2
+
+ ldmia ap!, { r2, r3, r12 }
+ subs n, n, #3
+ blt L(sum) C n <= 5
+ adds r0, r0, #0 C clear carry
+ sub n, n, #3
+ b L(mid)
+
+L(top): adcs r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, r6
+L(mid): ldmia ap!, { r4, r5, r6 }
+ tst n, n
+ sub n, n, #3
+ bpl L(top)
+
+ add n, n, #3
+
+ adcs r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, r6
+ movcs r7, #1 C r7 <= 1
+
+L(sum): cmn n, #2
+ movlo r4, #0
+ ldrhs r4, [ap], #4
+ movls r5, #0
+ ldrhi r5, [ap], #4
+
+ adds r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, #0
+ adc r7, r7, #0 C r7 <= 2
+
+L(sum2):
+ bic r0, r2, #0xff000000
+ add r0, r0, r2, lsr #24
+ add r0, r0, r7
+
+ lsl r7, r3, #8
+ bic r1, r7, #0xff000000
+ add r0, r0, r1
+ add r0, r0, r3, lsr #16
+
+ lsl r7, r12, #16
+ bic r1, r7, #0xff000000
+ add r0, r0, r1
+ add r0, r0, r12, lsr #8
+
+ pop { r4, r5, r6, r7 }
+ bx lr
+
+L(le2): cmn n, #1
+ bne L(1)
+ ldmia ap!, { r2, r3 }
+ mov r12, #0
+ b L(sum2)
+L(1): ldr r2, [ap]
+ bic r0, r2, #0xff000000
+ add r0, r0, r2, lsr #24
+ pop { r4, r5, r6, r7 }
+ bx lr
+EPILOGUE()
diff -r f331c6cb447a -r 521b092f6517 mpn/arm/v6t2/divrem_1.asm
--- a/mpn/arm/v6t2/divrem_1.asm Sun Apr 29 00:03:29 2012 +0200
+++ b/mpn/arm/v6t2/divrem_1.asm Sun Apr 29 03:05:52 2012 +0200
@@ -1,4 +1,4 @@
-dnl ARM mpn_add_n and mpn_sub_n
+dnl ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
dnl Contributed to the GNU project by Torbjorn Granlund.
@@ -32,7 +32,6 @@
C * Optimise inner-loops better, they could likely run a cycle or two faster.
C * Decrease register usage, streamline non-loop code.
-
define(`qp_arg', `r0')
define(`fn', `r1')
define(`up_arg', `r2')
More information about the gmp-commit
mailing list