[Gmp-commit] /var/hg/gmp: 4 new changesets

Thu Nov 8 00:42:24 UTC 2018

details:   /var/hg/gmp/rev/5680ffe364d4
changeset: 17679:5680ffe364d4
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Nov 08 01:25:46 2018 +0100
description:
(EXTRA_PROGRAMS): Add missing files.

details:   /var/hg/gmp/rev/1db067f8fb23
changeset: 17680:1db067f8fb23
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Nov 08 01:29:29 2018 +0100
description:
Fix comment typo.

details:   /var/hg/gmp/rev/bd1912fb6496
changeset: 17681:bd1912fb6496
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Nov 08 01:39:54 2018 +0100
description:
Declare use of neon insns.

details:   /var/hg/gmp/rev/3230453a6131
changeset: 17682:3230453a6131
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Nov 08 01:40:15 2018 +0100
description:
Declare use of neon insns. Cleanup.

diffstat:

 mpn/arm/neon/lorrshift.asm |   6 ++--
 mpn/arm/neon/lshiftc.asm   |  47 +++++++++++++++------------------------------
 mpn/generic/mod_1_4.c      |   2 +-
 tests/devel/Makefile.am    |   4 +-
 4 files changed, 22 insertions(+), 37 deletions(-)

diffs (155 lines):

diff -r f4ae84f1db39 -r 3230453a6131 mpn/arm/neon/lorrshift.asm

--- a/mpn/arm/neon/lorrshift.asm	Thu Nov 08 01:23:37 2018 +0100
+++ b/mpn/arm/neon/lorrshift.asm	Thu Nov 08 01:40:15 2018 +0100
@@ -56,8 +56,8 @@
 C  * Try using 128-bit operations.  Note that Neon lacks pure 128-bit shifts,
 C    which might make it tricky.
 C  * Clean up and simplify.
-C  * Consider sharing most of the code for lshift and rshift, since the feed-in code,
-C    the loop, and most of the wind-down code are identical.
+C  * Consider sharing most of the code for lshift and rshift, since the feed-in
+C    code, the loop, and most of the wind-down code are identical.
 C  * Replace the basecase code with code using 'extension' registers.
 C  * Optimise.  It is not clear that this loop insn permutation is optimal for
 C    either A9 or A15.
@@ -85,7 +85,7 @@
 
 MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
 
-ASM_START()
+ASM_START(neon)
 	TEXT
 	ALIGN(64)
 PROLOGUE(func)
diff -r f4ae84f1db39 -r 3230453a6131 mpn/arm/neon/lshiftc.asm
--- a/mpn/arm/neon/lshiftc.asm	Thu Nov 08 01:23:37 2018 +0100
+++ b/mpn/arm/neon/lshiftc.asm	Thu Nov 08 01:40:15 2018 +0100
@@ -68,53 +68,38 @@
 define(`n',   `r2')
 define(`cnt', `r3')
 
-	define(`IFLSH', `$1')
-	define(`IFRSH', `')
-	define(`X',`0')
-	define(`Y',`1')
-	define(`func',`mpn_lshiftc')
-define(`OPERATION_lshiftc',1)
-
-ASM_START()
+ASM_START(neon)
 	TEXT
 	ALIGN(64)
 PROLOGUE(mpn_lshiftc)
-IFLSH(`	mov	r12, n, lsl #2	')
-IFLSH(`	add	rp, rp, r12	')
-IFLSH(`	add	ap, ap, r12	')
+	mov	r12, n, lsl #2
+	add	rp, rp, r12
+	add	ap, ap, r12
 
 	cmp	n, #4			C SIMD code n limit
 	ble	L(base)
 
-ifdef(`OPERATION_lshiftc',`
 	vdup.32	d6, r3			C left shift count is positive
 	sub	r3, r3, #64		C right shift count is negative
 	vdup.32	d7, r3
-	mov	r12, #-8')		C lshift pointer update offset
-ifdef(`OPERATION_rshift',`
-	rsb	r3, r3, #0		C right shift count is negative
-	vdup.32	d6, r3
-	add	r3, r3, #64		C left shift count is positive
-	vdup.32	d7, r3
-	mov	r12, #8')		C rshift pointer update offset
+	mov	r12, #-8		C lshift pointer update offset
 
-IFLSH(`	sub	ap, ap, #8	')
+	sub	ap, ap, #8
 	vld1.32	{d19}, [ap], r12	C load initial 2 limbs
 	vshl.u64 d18, d19, d7		C retval
 
 	tst	rp, #4			C is rp 64-bit aligned already?
 	beq	L(rp_aligned)		C yes, skip
 	vmvn	 d19, d19
-IFLSH(`	add	ap, ap, #4	')	C move back ap pointer
-IFRSH(`	sub	ap, ap, #4	')	C move back ap pointer
+	add	ap, ap, #4		C move back ap pointer
 	vshl.u64 d4, d19, d6
 	sub	n, n, #1		C first limb handled
-IFLSH(`	sub	 rp, rp, #4	')
-	vst1.32	 {d4[Y]}, [rp]IFRSH(!)	C store first limb, rp gets aligned
+	sub	 rp, rp, #4
+	vst1.32	 {d4[1]}, [rp]		C store first limb, rp gets aligned
 	vld1.32	 {d19}, [ap], r12	C load ap[1] and ap[2]
 
 L(rp_aligned):
-IFLSH(`	sub	rp, rp, #8	')
+	sub	rp, rp, #8
 	subs	n, n, #6
 	vmvn	 d19, d19
 	blt	L(two_or_three_more)
@@ -180,9 +165,9 @@
 L(l3):	vshl.u64 d5, d19, d6
 	vld1.32	 {d17}, [ap], r12
 L(cj1):	vmov.u8	 d16, #0
-IFLSH(`	add	 ap, ap, #4	')
+	add	 ap, ap, #4
 	vmvn	 d17, d17
-	vld1.32	 {d16[Y]}, [ap], r12
+	vld1.32	 {d16[1]}, [ap], r12
 	vshl.u64 d0, d17, d7
 	vshl.u64 d4, d17, d6
 	vmvn	 d16, d16
@@ -192,9 +177,9 @@
 	vst1.32	 {d3}, [rp:64], r12
 	vorr	 d2, d4, d1
 	vst1.32	 {d2}, [rp:64], r12
-IFLSH(`	add	 rp, rp, #4	')
-	vst1.32	 {d5[Y]}, [rp]
-	vmov.32	 r0, d18[X]
+	add	 rp, rp, #4
+	vst1.32	 {d5[1]}, [rp]
+	vmov.32	 r0, d18[0]
 	bx	lr
 
 L(l2):	vld1.32	 {d16}, [ap], r12
@@ -208,7 +193,7 @@
 	vorr	 d3, d5, d0
 L(cj2):	vst1.32	 {d2}, [rp:64], r12
 	vst1.32	 {d3}, [rp]
-	vmov.32	 r0, d18[X]
+	vmov.32	 r0, d18[0]
 	bx	lr
 
 
diff -r f4ae84f1db39 -r 3230453a6131 mpn/generic/mod_1_4.c
--- a/mpn/generic/mod_1_4.c	Thu Nov 08 01:23:37 2018 +0100
+++ b/mpn/generic/mod_1_4.c	Thu Nov 08 01:40:15 2018 +0100
@@ -1,6 +1,6 @@
 /* mpn_mod_1s_4p (ap, n, b, cps)
    Divide (ap,,n) by b.  Return the single-limb remainder.
-   Requires that d < B / 4.
+   Requires that b < B / 4.
 
    Contributed to the GNU project by Torbjorn Granlund.
    Based on a suggestion by Peter L. Montgomery.
diff -r f4ae84f1db39 -r 3230453a6131 tests/devel/Makefile.am
--- a/tests/devel/Makefile.am	Thu Nov 08 01:23:37 2018 +0100
+++ b/tests/devel/Makefile.am	Thu Nov 08 01:40:15 2018 +0100
@@ -1,6 +1,6 @@
 ## Process this file with automake to generate Makefile.in
 
-# Copyright 2000-2002 Free Software Foundation, Inc.
+# Copyright 2000-2002, 2018 Free Software Foundation, Inc.
 #
 # This file is part of the GNU MP Library test suite.
 #
@@ -25,7 +25,7 @@
 # add_n_sub_n add_n_sub_n_2 not yet built since mpn_add_n_sub_n doesn't yet exist
 #
 EXTRA_PROGRAMS = \
-  aors_n anymul_1 copy divmod_1 divrem shift logops_n sqrtrem_1_2 tst-addsub try
+  aors_n anymul_1 copy divmod_1 divrem shift logops_n sqrtrem_1_2 tst-addsub try addmul_N mul_N cnd_aors_n
 
 allprogs: $(EXTRA_PROGRAMS)