[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Apr 27 00:18:45 CEST 2012
details: /var/hg/gmp/rev/56336a2c0e95
changeset: 14895:56336a2c0e95
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Apr 27 00:07:15 2012 +0200
description:
Define LEA for ARM.
details: /var/hg/gmp/rev/06430c60bb82
changeset: 14896:06430c60bb82
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Apr 27 00:12:04 2012 +0200
description:
Add ARM mpn_modexact_1c_odd (v4 and slightly faster v6).
details: /var/hg/gmp/rev/6a7e129fe145
changeset: 14897:6a7e129fe145
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Apr 27 00:18:27 2012 +0200
description:
Add ARM gcd_1 (v5 and sligtly faster for v6t2).
diffstat:
ChangeLog | 11 ++++
mpn/arm/arm-defs.m4 | 26 +++++++++++-
mpn/arm/invert_limb.asm | 8 +--
mpn/arm/mode1o.asm | 72 ++++++++++++++++++++++++++++++++
mpn/arm/v5/gcd_1.asm | 106 ++++++++++++++++++++++++++++++++++++++++++++++++
mpn/arm/v6/mode1o.asm | 75 +++++++++++++++++++++++++++++++++
mpn/arm/v6t2/gcd_1.asm | 100 +++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 391 insertions(+), 7 deletions(-)
diffs (truncated from 459 to 300 lines):
diff -r 20cf5542e78e -r 6a7e129fe145 ChangeLog
--- a/ChangeLog Thu Apr 26 13:08:24 2012 +0200
+++ b/ChangeLog Fri Apr 27 00:18:27 2012 +0200
@@ -1,3 +1,14 @@
+2012-04-27 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/arm/v5/gcd_1.asm: New file.
+ * mpn/arm/v6t2/gcd_1.asm: New file.
+
+ * mpn/arm/mode1o.asm: New file.
+ * mpn/arm/v6/mode1o.asm: New file.
+
+ * mpn/arm/arm-defs.m4 (LEA): New define.
+ * mpn/arm/invert_limb.asm: Use LEA.
+
2012-04-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpz/bin_uiui.c (bc_bin_uiui): Nail support.
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/arm-defs.m4
--- a/mpn/arm/arm-defs.m4 Thu Apr 26 13:08:24 2012 +0200
+++ b/mpn/arm/arm-defs.m4 Fri Apr 27 00:18:27 2012 +0200
@@ -23,7 +23,7 @@
dnl Standard commenting is with @, the default m4 # is for constants and we
dnl don't want to disable macro expansions in or after them.
-changecom(@)
+changecom(@&*$)
dnl APCS register names.
@@ -47,4 +47,28 @@
deflit(lr,r14)
deflit(pc,r15)
+
+define(`lea_num',0)
+
+dnl LEA(reg,gmp_symbol)
+dnl
+dnl Load the address of gmp_symbol into a register. The gmp_symbol must be
+dnl either local or protected/hidden, since we assume it has a fixed distance
+dnl from the point of use.
+
+define(`LEA',`dnl
+ldr $1, L(ptr`'lea_num)
+ifdef(`PIC',dnl
+`
+L(bas`'lea_num):dnl
+ add $1, $1, pc`'dnl
+ define(`EPILOGUE_cpu',
+ L(ptr`'lea_num): .word GSYM_PREFIX`'$2-L(bas`'lea_num)-8)dnl
+ define(`lea_num', eval(lea_num+1))dnl
+',`dnl
+ define(`EPILOGUE_cpu',
+ L(ptr`'lea_num): .word GSYM_PREFIX`'$2)')dnl
+')
+
+
divert
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/invert_limb.asm
--- a/mpn/arm/invert_limb.asm Thu Apr 26 13:08:24 2012 +0200
+++ b/mpn/arm/invert_limb.asm Fri Apr 27 00:18:27 2012 +0200
@@ -1,6 +1,6 @@
dnl ARM mpn_invert_limb -- Invert a normalized limb.
-dnl Copyright 2001, 2009, 2011 Free Software Foundation, Inc.
+dnl Copyright 2001, 2009, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -21,8 +21,7 @@
ASM_START()
PROLOGUE(mpn_invert_limb)
- ldr r2, L(4)
-L(2): add r2, pc, r2
+ LEA( r2, approx_tab-512)
mov r3, r0, lsr #23
mov r3, r3, asl #1
ldrh r3, [r3, r2]
@@ -43,9 +42,6 @@
adc r3, r3, r0
rsb r0, r3, r2
bx lr
-
- ALIGN(4)
-L(4): .word approx_tab-8-512-L(2)
EPILOGUE()
.section .rodata
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/mode1o.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/mode1o.asm Fri Apr 27 00:18:27 2012 +0200
@@ -0,0 +1,72 @@
+dnl ARM mpn_modexact_1c_odd
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 10
+C Cortex-A15 ?
+
+define(`up', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cy', `r3')
+
+ .protected binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+ stmfd sp!, {r4, r5}
+
+ LEA( r4, binvert_limb_table)
+
+ ldr r5, [up], #4 C up[0]
+
+ ubfx r12, d, #1, #7
+ ldrb r4, [r4, r12]
+ smulbb r12, r4, r4
+ mul r12, d, r12
+ rsb r12, r12, r4, asl #1
+ mul r4, r12, r12
+ mul r4, d, r4
+ rsb r4, r4, r12, asl #1 C r4 = inverse
+
+ subs n, n, #1 C set carry as side-effect
+ beq L(end)
+
+L(top): sbcs cy, r5, cy
+ ldr r5, [up], #4
+ sub n, n, #1
+ mul r12, r4, cy
+ tst n, n
+ umull r12, cy, r12, d
+ bne L(top)
+
+L(end): sbcs cy, r5, cy
+ mul r12, r4, cy
+ umull r12, r0, r12, d
+ addcc r0, r0, #1
+
+ ldmfd sp!, {r4, r5}
+ bx r14
+EPILOGUE()
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/v5/gcd_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v5/gcd_1.asm Fri Apr 27 00:18:27 2012 +0200
@@ -0,0 +1,106 @@
+dnl ARM v5 mpn_gcd_1.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 5.9
+C Cortex-A15 ?
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+C TODO
+C * Optimise inner-loop better.
+
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+C INPUT PARAMETERS
+define(`up', `r0')
+define(`n', `r1')
+define(`v0', `r2')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+ push {r4, r7, lr}
+ ldr r3, [up] C U low limb
+
+ orr r3, r3, v0
+ rsb r4, r3, #0
+ and r4, r4, r3
+ clz r4, r4 C min(ctz(u0),ctz(v0))
+ rsb r4, r4, #31
+
+ rsb r12, v0, #0
+ and r12, r12, v0
+ clz r12, r12
+ rsb r12, r12, #31
+ lsr v0, v0, r12
+
+ mov r7, v0
+
+ cmp n, #1
+ bne L(nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ ldr r3, [up]
+ cmp v0, r3, lsr #BMOD_THRES_LOG2
+ bhi L(red1)
+
+L(bmod):mov r3, #0 C carry argument
+ bl mpn_modexact_1c_odd
+ b L(red0)
+
+L(nby1):cmp n, #BMOD_1_TO_MOD_1_THRESHOLD
+ blo L(bmod)
+
+ bl mpn_mod_1
+
+L(red0):mov r3, r0
+L(red1):rsbs r12, r3, #0
+ and r12, r12, r3
+ clz r12, r12
+ rsb r12, r12, #31
+ bne L(mid)
+ b L(end)
+
+ ALIGN(8)
+L(top): rsb r12, r12, #31
+ movcc r3, r1 C if x-y < 0
+ movcc r7, r0 C use x,y-x
+L(mid): lsr r3, r3, r12 C
+ mov r0, r3 C
+ sub r1, r7, r3 C
+ rsbs r3, r7, r3 C
+ and r12, r1, r3 C
+ clz r12, r12 C
+ bne L(top) C
+
+L(end): lsl r0, r7, r4
+ pop {r4, r7, pc}
+EPILOGUE()
diff -r 20cf5542e78e -r 6a7e129fe145 mpn/arm/v6/mode1o.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6/mode1o.asm Fri Apr 27 00:18:27 2012 +0200
@@ -0,0 +1,75 @@
+dnl ARM mpn_modexact_1c_odd
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
More information about the gmp-commit
mailing list