[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Nov 8 00:42:24 UTC 2018
details: /var/hg/gmp/rev/5680ffe364d4
changeset: 17679:5680ffe364d4
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Nov 08 01:25:46 2018 +0100
description:
(EXTRA_PROGRAMS): Add missing files.
details: /var/hg/gmp/rev/1db067f8fb23
changeset: 17680:1db067f8fb23
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Nov 08 01:29:29 2018 +0100
description:
Fix comment typo.
details: /var/hg/gmp/rev/bd1912fb6496
changeset: 17681:bd1912fb6496
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Nov 08 01:39:54 2018 +0100
description:
Declare use of neon insns.
details: /var/hg/gmp/rev/3230453a6131
changeset: 17682:3230453a6131
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Nov 08 01:40:15 2018 +0100
description:
Declare use of neon insns. Cleanup.
diffstat:
mpn/arm/neon/lorrshift.asm | 6 ++--
mpn/arm/neon/lshiftc.asm | 47 +++++++++++++++------------------------------
mpn/generic/mod_1_4.c | 2 +-
tests/devel/Makefile.am | 4 +-
4 files changed, 22 insertions(+), 37 deletions(-)
diffs (155 lines):
diff -r f4ae84f1db39 -r 3230453a6131 mpn/arm/neon/lorrshift.asm
--- a/mpn/arm/neon/lorrshift.asm Thu Nov 08 01:23:37 2018 +0100
+++ b/mpn/arm/neon/lorrshift.asm Thu Nov 08 01:40:15 2018 +0100
@@ -56,8 +56,8 @@
C * Try using 128-bit operations. Note that Neon lacks pure 128-bit shifts,
C which might make it tricky.
C * Clean up and simplify.
-C * Consider sharing most of the code for lshift and rshift, since the feed-in code,
-C the loop, and most of the wind-down code are identical.
+C * Consider sharing most of the code for lshift and rshift, since the feed-in
+C code, the loop, and most of the wind-down code are identical.
C * Replace the basecase code with code using 'extension' registers.
C * Optimise. It is not clear that this loop insn permutation is optimal for
C either A9 or A15.
@@ -85,7 +85,7 @@
MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
-ASM_START()
+ASM_START(neon)
TEXT
ALIGN(64)
PROLOGUE(func)
diff -r f4ae84f1db39 -r 3230453a6131 mpn/arm/neon/lshiftc.asm
--- a/mpn/arm/neon/lshiftc.asm Thu Nov 08 01:23:37 2018 +0100
+++ b/mpn/arm/neon/lshiftc.asm Thu Nov 08 01:40:15 2018 +0100
@@ -68,53 +68,38 @@
define(`n', `r2')
define(`cnt', `r3')
- define(`IFLSH', `$1')
- define(`IFRSH', `')
- define(`X',`0')
- define(`Y',`1')
- define(`func',`mpn_lshiftc')
-define(`OPERATION_lshiftc',1)
-
-ASM_START()
+ASM_START(neon)
TEXT
ALIGN(64)
PROLOGUE(mpn_lshiftc)
-IFLSH(` mov r12, n, lsl #2 ')
-IFLSH(` add rp, rp, r12 ')
-IFLSH(` add ap, ap, r12 ')
+ mov r12, n, lsl #2
+ add rp, rp, r12
+ add ap, ap, r12
cmp n, #4 C SIMD code n limit
ble L(base)
-ifdef(`OPERATION_lshiftc',`
vdup.32 d6, r3 C left shift count is positive
sub r3, r3, #64 C right shift count is negative
vdup.32 d7, r3
- mov r12, #-8') C lshift pointer update offset
-ifdef(`OPERATION_rshift',`
- rsb r3, r3, #0 C right shift count is negative
- vdup.32 d6, r3
- add r3, r3, #64 C left shift count is positive
- vdup.32 d7, r3
- mov r12, #8') C rshift pointer update offset
+ mov r12, #-8 C lshift pointer update offset
-IFLSH(` sub ap, ap, #8 ')
+ sub ap, ap, #8
vld1.32 {d19}, [ap], r12 C load initial 2 limbs
vshl.u64 d18, d19, d7 C retval
tst rp, #4 C is rp 64-bit aligned already?
beq L(rp_aligned) C yes, skip
vmvn d19, d19
-IFLSH(` add ap, ap, #4 ') C move back ap pointer
-IFRSH(` sub ap, ap, #4 ') C move back ap pointer
+ add ap, ap, #4 C move back ap pointer
vshl.u64 d4, d19, d6
sub n, n, #1 C first limb handled
-IFLSH(` sub rp, rp, #4 ')
- vst1.32 {d4[Y]}, [rp]IFRSH(!) C store first limb, rp gets aligned
+ sub rp, rp, #4
+ vst1.32 {d4[1]}, [rp] C store first limb, rp gets aligned
vld1.32 {d19}, [ap], r12 C load ap[1] and ap[2]
L(rp_aligned):
-IFLSH(` sub rp, rp, #8 ')
+ sub rp, rp, #8
subs n, n, #6
vmvn d19, d19
blt L(two_or_three_more)
@@ -180,9 +165,9 @@
L(l3): vshl.u64 d5, d19, d6
vld1.32 {d17}, [ap], r12
L(cj1): vmov.u8 d16, #0
-IFLSH(` add ap, ap, #4 ')
+ add ap, ap, #4
vmvn d17, d17
- vld1.32 {d16[Y]}, [ap], r12
+ vld1.32 {d16[1]}, [ap], r12
vshl.u64 d0, d17, d7
vshl.u64 d4, d17, d6
vmvn d16, d16
@@ -192,9 +177,9 @@
vst1.32 {d3}, [rp:64], r12
vorr d2, d4, d1
vst1.32 {d2}, [rp:64], r12
-IFLSH(` add rp, rp, #4 ')
- vst1.32 {d5[Y]}, [rp]
- vmov.32 r0, d18[X]
+ add rp, rp, #4
+ vst1.32 {d5[1]}, [rp]
+ vmov.32 r0, d18[0]
bx lr
L(l2): vld1.32 {d16}, [ap], r12
@@ -208,7 +193,7 @@
vorr d3, d5, d0
L(cj2): vst1.32 {d2}, [rp:64], r12
vst1.32 {d3}, [rp]
- vmov.32 r0, d18[X]
+ vmov.32 r0, d18[0]
bx lr
diff -r f4ae84f1db39 -r 3230453a6131 mpn/generic/mod_1_4.c
--- a/mpn/generic/mod_1_4.c Thu Nov 08 01:23:37 2018 +0100
+++ b/mpn/generic/mod_1_4.c Thu Nov 08 01:40:15 2018 +0100
@@ -1,6 +1,6 @@
/* mpn_mod_1s_4p (ap, n, b, cps)
Divide (ap,,n) by b. Return the single-limb remainder.
- Requires that d < B / 4.
+ Requires that b < B / 4.
Contributed to the GNU project by Torbjorn Granlund.
Based on a suggestion by Peter L. Montgomery.
diff -r f4ae84f1db39 -r 3230453a6131 tests/devel/Makefile.am
--- a/tests/devel/Makefile.am Thu Nov 08 01:23:37 2018 +0100
+++ b/tests/devel/Makefile.am Thu Nov 08 01:40:15 2018 +0100
@@ -1,6 +1,6 @@
## Process this file with automake to generate Makefile.in
-# Copyright 2000-2002 Free Software Foundation, Inc.
+# Copyright 2000-2002, 2018 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library test suite.
#
@@ -25,7 +25,7 @@
# add_n_sub_n add_n_sub_n_2 not yet built since mpn_add_n_sub_n doesn't yet exist
#
EXTRA_PROGRAMS = \
- aors_n anymul_1 copy divmod_1 divrem shift logops_n sqrtrem_1_2 tst-addsub try
+ aors_n anymul_1 copy divmod_1 divrem shift logops_n sqrtrem_1_2 tst-addsub try addmul_N mul_N cnd_aors_n
allprogs: $(EXTRA_PROGRAMS)
More information about the gmp-commit
mailing list