[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Mar 14 22:45:47 CET 2013
details: /var/hg/gmp/rev/9150ec471dd6
changeset: 15578:9150ec471dd6
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 14 15:34:55 2013 +0100
description:
Avoid ARM conditional insn execution.
details: /var/hg/gmp/rev/581c2fb98e17
changeset: 15579:581c2fb98e17
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 14 22:25:47 2013 +0100
description:
(ADDSUB): Remove unused definition.
details: /var/hg/gmp/rev/a5395f469f70
changeset: 15580:a5395f469f70
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 14 22:26:35 2013 +0100
description:
Remove a redundant ASM_START.
details: /var/hg/gmp/rev/abab3a171532
changeset: 15581:abab3a171532
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 14 22:44:48 2013 +0100
description:
Provide ARM64 cnd_add_n and cnd_sub_n.
details: /var/hg/gmp/rev/127b13d0a7bb
changeset: 15582:127b13d0a7bb
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 14 22:45:29 2013 +0100
description:
Provide IA64 cnd_add_n and cnd_sub_n.
details: /var/hg/gmp/rev/6dbd2044d1ce
changeset: 15583:6dbd2044d1ce
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 14 22:45:42 2013 +0100
description:
ChangeLog
diffstat:
ChangeLog | 10 +
mpn/arm/cnd_aors_n.asm | 28 ++--
mpn/arm64/aors_n.asm | 2 -
mpn/arm64/cnd_aors_n.asm | 88 ++++++++++++++++
mpn/ia64/aors_n.asm | 3 +-
mpn/ia64/cnd_aors_n.asm | 248 +++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 361 insertions(+), 18 deletions(-)
diffs (truncated from 476 to 300 lines):
diff -r d9d55fa98fd5 -r 6dbd2044d1ce ChangeLog
--- a/ChangeLog Thu Mar 14 10:35:21 2013 +0100
+++ b/ChangeLog Thu Mar 14 22:45:42 2013 +0100
@@ -1,5 +1,15 @@
2013-03-14 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/ia64/cnd_aors_n.asm: New file.
+
+ * mpn/arm64/cnd_aors_n.asm: New file.
+
+ * mpn/arm64/aors_n.asm (ADDSUB): Remove unused definition.
+
+ * mpn/ia64/aors_n.asm: Remove a redundant ASM_START.
+
+ * mpn/arm/cnd_aors_n.asm: Avoid ARM conditional insn execution.
+
* mpn/x86_64/missing.asm: Move from mulx/adx since we cannot currently
prune missing.asm from path.
* mpn/x86_64/mulx/adx/missing-call.m4: Likewise.
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/arm/cnd_aors_n.asm
--- a/mpn/arm/cnd_aors_n.asm Thu Mar 14 10:35:21 2013 +0100
+++ b/mpn/arm/cnd_aors_n.asm Thu Mar 14 22:45:42 2013 +0100
@@ -1,6 +1,6 @@
dnl ARM mpn_cnd_add_n, mpn_cnd_sub_n
-dnl Copyright 2012 Free Software Foundation, Inc.
+dnl Copyright 2012, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -55,11 +55,11 @@
push {r4-r11}
ldr n, [sp, #32]
+ cmp cnd, #1
+ sbc cnd, cnd, cnd C conditionally set to 0xffffffff
+
INITCY C really only needed for n = 0 (mod 4)
- teq cnd, #0 C could use this for clearing/setting cy
- mvnne cnd, #0 C conditionally set to 0xffffffff
-
ands r4, n, #3
beq L(top)
cmp r4, #2
@@ -68,9 +68,9 @@
L(b3): ldm vp!, {r4,r5,r6}
ldm up!, {r8,r9,r10}
- and r4, r4, cnd
- and r5, r5, cnd
- and r6, r6, cnd
+ bic r4, r4, cnd
+ bic r5, r5, cnd
+ bic r6, r6, cnd
ADDSUB r8, r8, r4
ADDSUBC r9, r9, r5
ADDSUBC r10, r10, r6
@@ -82,8 +82,8 @@
L(b2): ldm vp!, {r4,r5}
ldm up!, {r8,r9}
- and r4, r4, cnd
- and r5, r5, cnd
+ bic r4, r4, cnd
+ bic r5, r5, cnd
ADDSUB r8, r8, r4
ADDSUBC r9, r9, r5
stm rp!, {r8,r9}
@@ -94,7 +94,7 @@
L(b1): ldr r4, [vp], #4
ldr r8, [up], #4
- and r4, r4, cnd
+ bic r4, r4, cnd
ADDSUB r8, r8, r4
str r8, [rp], #4
sub n, n, #1
@@ -103,10 +103,10 @@
L(top): ldm vp!, {r4,r5,r6,r7}
ldm up!, {r8,r9,r10,r11}
- and r4, r4, cnd
- and r5, r5, cnd
- and r6, r6, cnd
- and r7, r7, cnd
+ bic r4, r4, cnd
+ bic r5, r5, cnd
+ bic r6, r6, cnd
+ bic r7, r7, cnd
ADDSUBC r8, r8, r4
ADDSUBC r9, r9, r5
ADDSUBC r10, r10, r6
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/arm64/aors_n.asm
--- a/mpn/arm64/aors_n.asm Thu Mar 14 10:35:21 2013 +0100
+++ b/mpn/arm64/aors_n.asm Thu Mar 14 22:45:42 2013 +0100
@@ -33,7 +33,6 @@
define(`n', `x3')
ifdef(`OPERATION_add_n', `
- define(`ADDSUB', adds)
define(`ADDSUBC', adcs)
define(`CLRCY', `cmn xzr, xzr')
define(`SETCY', `cmp $1, #1')
@@ -41,7 +40,6 @@
define(`func', mpn_add_n)
define(`func_nc', mpn_add_nc)')
ifdef(`OPERATION_sub_n', `
- define(`ADDSUB', subs)
define(`ADDSUBC', sbcs)
define(`CLRCY', `cmp xzr, xzr')
define(`SETCY', `subs $1, xzr, $1')
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/arm64/cnd_aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/cnd_aors_n.asm Thu Mar 14 22:45:42 2013 +0100
@@ -0,0 +1,88 @@
+dnl ARM64 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 ?
+C Cortex-A57 ?
+
+changecom(@&*$)
+
+define(`cnd', `x0')
+define(`rp', `x1')
+define(`up', `x2')
+define(`vp', `x3')
+define(`n', `x4')
+
+ifdef(`OPERATION_cnd_add_n', `
+ define(`ADDSUBC', adcs)
+ define(`CLRCY', `cmn xzr, xzr')
+ define(`RETVAL', `adc x0, xzr, xzr')
+ define(func, mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+ define(`ADDSUBC', sbcs)
+ define(`CLRCY', `cmp xzr, xzr')
+ define(`RETVAL', `sbc x0, xzr, xzr
+ and x0, x0, #1')
+ define(func, mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ cmp cnd, #1
+ sbc cnd, cnd, cnd
+
+ CLRCY C really only needed for n = 0 (mod 4)
+
+ tbz n, #0, L(1)
+ ldr x10, [up], #8
+ ldr x12, [vp], #8
+ bic x6, x12, cnd
+ ADDSUBC x8, x10, x6
+ sub n, n, #1
+ str x8, [rp], #8
+ cbz n, L(rt)
+
+L(1): ldp x10, x11, [up], #16
+ ldp x12, x13, [vp], #16
+ sub n, n, #2
+ cbz n, L(end)
+
+L(top): bic x6, x12, cnd
+ bic x7, x13, cnd
+ ldp x12, x13, [vp], #16
+ ADDSUBC x8, x10, x6
+ ADDSUBC x9, x11, x7
+ ldp x10, x11, [up], #16
+ sub n, n, #2
+ stp x8, x9, [rp], #16
+ cbnz n, L(top)
+
+L(end): bic x6, x12, cnd
+ bic x7, x13, cnd
+ ADDSUBC x8, x10, x6
+ ADDSUBC x9, x11, x7
+ stp x8, x9, [rp]
+L(rt): RETVAL
+ ret
+EPILOGUE()
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/ia64/aors_n.asm
--- a/mpn/ia64/aors_n.asm Thu Mar 14 10:35:21 2013 +0100
+++ b/mpn/ia64/aors_n.asm Thu Mar 14 22:45:42 2013 +0100
@@ -30,7 +30,7 @@
C "switch (8 * (n >= 8) + (n mod 8))" to enter it and feed-in code.
C * The non-nc code was trimmed cycle for cycle to its current state. It is
C probably hard to save more that an odd cycle there. The nc code is much
-C rawer (since tune/speed doesn't have any applicable direct measurements).
+C cruder (since tune/speed doesn't have any applicable direct measurements).
C * Without the nc entry points, this becomes around 1800 bytes of object
C code; the nc code adds over 1000 bytes. We should perhaps sacrifice a
C few cycles for the non-nc code and let it fall into the nc code.
@@ -406,7 +406,6 @@
EPILOGUE()
-ASM_START()
PROLOGUE(func)
.prologue
.save ar.lc, r2
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/ia64/cnd_aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/ia64/cnd_aors_n.asm Thu Mar 14 22:45:42 2013 +0100
@@ -0,0 +1,248 @@
+dnl IA-64 mpn_cnd_add_n/mpn_cnd_sub_n.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 1.5
+
+C INPUT PARAMETERS
+define(`cnd', `r32')
+define(`rp', `r33')
+define(`up', `r34')
+define(`vp', `r35')
+define(`n', `r36')
+
+ifdef(`OPERATION_cnd_add_n',`
+ define(ADDSUB, add)
+ define(CND, ltu)
+ define(INCR, 1)
+ define(LIM, -1)
+ define(func, mpn_cnd_add_n)
+')
+ifdef(`OPERATION_cnd_sub_n',`
+ define(ADDSUB, sub)
+ define(CND, gtu)
+ define(INCR, -1)
+ define(LIM, 0)
+ define(func, mpn_cnd_sub_n)
+')
+
+define(cmpeqor, `cmp.eq.or')
+define(PFDIST, 160)
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`x0',`r20') define(`x1',`r21') define(`x2',`r22') define(`x3',`r23')
+define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
+define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31')
+define(`up1',`up') define(`up2',`r8') define(`upadv',`r1')
+define(`vp1',`vp') define(`vp2',`r9') define(`vpadv',`r11')
+define(`rp1',`rp') define(`rp2',`r10')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',`
+ addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
More information about the gmp-commit
mailing list