[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Apr 8 05:58:37 CEST 2013


details:   /var/hg/gmp/rev/92b87a14e382
changeset: 15695:92b87a14e382
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 08 05:09:20 2013 +0200
description:
Rewrite count-trailing-zeros code, using private table.

details:   /var/hg/gmp/rev/672697a024a8
changeset: 15696:672697a024a8
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 08 05:52:50 2013 +0200
description:
Add cycle numbers.

details:   /var/hg/gmp/rev/849dfa2b37ee
changeset: 15697:849dfa2b37ee
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 08 05:57:59 2013 +0200
description:
Canonicalise arm assembly to use old style "mov ... lsl" for shift ops.

details:   /var/hg/gmp/rev/26afaf8703c5
changeset: 15698:26afaf8703c5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Apr 08 05:58:31 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                                |   5 +++++
 mpn/arm/dive_1.asm                       |  26 ++++++++++++++++----------
 mpn/arm/lshift.asm                       |  10 +++++-----
 mpn/arm/lshiftc.asm                      |  10 +++++-----
 mpn/arm/mod_34lsub1.asm                  |   4 ++--
 mpn/arm/neon/lorrshift.asm               |  20 ++++++++++----------
 mpn/arm/neon/lshiftc.asm                 |  10 +++++-----
 mpn/arm/rsh1aors_n.asm                   |  24 ++++++++++++------------
 mpn/arm/rshift.asm                       |  10 +++++-----
 mpn/arm/v5/gcd_1.asm                     |   6 +++---
 mpn/arm/v6/dive_1.asm                    |   8 ++++----
 mpn/arm/v6t2/gcd_1.asm                   |   6 +++---
 mpn/arm/v7a/cora15/neon/rsh1aors_n.asm   |   2 +-
 mpn/sparc64/ultrasparct3/mod_34lsub1.asm |   4 ++--
 14 files changed, 78 insertions(+), 67 deletions(-)

diffs (truncated from 472 to 300 lines):

diff -r 95358c679785 -r 26afaf8703c5 ChangeLog
--- a/ChangeLog	Mon Apr 08 00:03:35 2013 +0200
+++ b/ChangeLog	Mon Apr 08 05:58:31 2013 +0200
@@ -1,3 +1,8 @@
+2013-04-08  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/arm/dive_1.asm: Rewrite count-trailing-zeros code, using private
+	table.
+
 2013-04-07  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/sparc64/ultrasparct3/mod_34lsub1.asm: New file.
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/dive_1.asm
--- a/mpn/arm/dive_1.asm	Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/dive_1.asm	Mon Apr 08 05:58:31 2013 +0200
@@ -55,20 +55,15 @@
 	bne	L(inv)
 
 C count trailing zeros
+	movs	r4, d, lsl #16
+	moveq	d, d, lsr #16
+	moveq	cnt, #16
 	tst	d, #0xff
 	moveq	d, d, lsr #8
 	addeq	cnt, cnt, #8
-	tst	d, #0xff
-	moveq	d, d, lsr #8
-	addeq	cnt, cnt, #8
-	tst	d, #0xff
-	moveq	d, d, lsr #8
-	addeq	cnt, cnt, #8
-	rsb	r5, d, #0
-	and	r5, r5, d
-	LEA(	r4, __clz_tab)
+	LEA(	r4, ctz_tab)
+	and	r5, d, #0xff
 	ldrb	r4, [r4, r5]
-	sub	r4, r4, #2
 	mov	d, d, lsr r4
 	add	cnt, cnt, r4
 
@@ -132,3 +127,14 @@
 	pop	{r4-r9}
 	bx	r14
 EPILOGUE()
+
+	.section .rodata
+ctz_tab:
+	.byte	8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/lshift.asm
--- a/mpn/arm/lshift.asm	Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/lshift.asm	Mon Apr 08 05:58:31 2013 +0200
@@ -43,7 +43,7 @@
 	add	rp, rp, n, lsl #2
 	rsb	tnc, cnt, #32
 
-	lsl	r7, r4, cnt
+	mov	r7, r4, lsl cnt
 	tst	n, #1
 	beq	L(evn)			C n even
 
@@ -59,19 +59,19 @@
 L(top):	ldr	r8, [up, #-4]!
 	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(mid):	ldr	r6, [up, #-4]!
 	orr	r7, r7, r8, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r8, cnt
+	mov	r7, r8, lsl cnt
 	subs	n, n, #2
 	bgt	L(top)
 
 L(end):	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(1):	str	r7, [rp, #-4]
-	lsr	r0, r4, tnc
+	mov	r0, r4, lsr tnc
 	pop	{r4, r6, r7, r8}
 	bx	r14
 EPILOGUE()
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/lshiftc.asm
--- a/mpn/arm/lshiftc.asm	Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/lshiftc.asm	Mon Apr 08 05:58:31 2013 +0200
@@ -44,7 +44,7 @@
 	rsb	tnc, cnt, #32
 	mvn	r6, r4
 
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 	tst	n, #1
 	beq	L(evn)			C n even
 
@@ -63,22 +63,22 @@
 	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
 	mvn	r8, r8
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(mid):	ldr	r6, [up, #-4]!
 	orr	r7, r7, r8, lsr tnc
 	str	r7, [rp, #-4]!
 	mvn	r6, r6
-	lsl	r7, r8, cnt
+	mov	r7, r8, lsl cnt
 	subs	n, n, #2
 	bgt	L(top)
 
 L(end):	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(1):	mvn	r6, #0
 	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]
-	lsr	r0, r4, tnc
+	mov	r0, r4, lsr tnc
 	pop	{r4, r6, r7, r8}
 	bx	r14
 EPILOGUE()
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/mod_34lsub1.asm
--- a/mpn/arm/mod_34lsub1.asm	Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/mod_34lsub1.asm	Mon Apr 08 05:58:31 2013 +0200
@@ -84,12 +84,12 @@
 	add	r0, r0, r2, lsr #24
 	add	r0, r0, r7
 
-	lsl	r7, r3, #8
+	mov	r7, r3, lsl #8
 	bic	r1, r7, #0xff000000
 	add	r0, r0, r1
 	add	r0, r0, r3, lsr #16
 
-	lsl	r7, r12, #16
+	mov	r7, r12, lsl #16
 	bic	r1, r7, #0xff000000
 	add	r0, r0, r1
 	add	r0, r0, r12, lsr #8
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/neon/lorrshift.asm
--- a/mpn/arm/neon/lorrshift.asm	Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/neon/lorrshift.asm	Mon Apr 08 05:58:31 2013 +0200
@@ -202,7 +202,7 @@
 	ldr	r4, [ap, #-4]!
 	rsb	tnc, cnt, #32
 
-	lsl	r7, r4, cnt
+	mov	r7, r4, lsl cnt
 	tst	n, #1
 	beq	L(ev)			C n even
 
@@ -218,23 +218,23 @@
 L(tp):	ldr	r8, [ap, #-4]!
 	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(md):	ldr	r6, [ap, #-4]!
 	orr	r7, r7, r8, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r8, cnt
+	mov	r7, r8, lsl cnt
 
 L(ed):	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(ed1):	str	r7, [rp, #-4]
-	lsr	r0, r4, tnc
+	mov	r0, r4, lsr tnc
 ')
 ifdef(`OPERATION_rshift',`
 	ldr	r4, [ap]
 	rsb	tnc, cnt, #32
 
-	lsr	r7, r4, cnt
+	mov	r7, r4, lsr cnt
 	tst	n, #1
 	beq	L(ev)			C n even
 
@@ -251,17 +251,17 @@
 L(tp):	ldr	r8, [ap, #4]!
 	orr	r7, r7, r6, lsl tnc
 	str	r7, [rp], #4
-	lsr	r7, r6, cnt
+	mov	r7, r6, lsr cnt
 L(md):	ldr	r6, [ap, #4]!
 	orr	r7, r7, r8, lsl tnc
 	str	r7, [rp], #4
-	lsr	r7, r8, cnt
+	mov	r7, r8, lsr cnt
 
 L(ed):	orr	r7, r7, r6, lsl tnc
 	str	r7, [rp], #4
-	lsr	r7, r6, cnt
+	mov	r7, r6, lsr cnt
 L(ed1):	str	r7, [rp], #4
-	lsl	r0, r4, tnc
+	mov	r0, r4, lsl tnc
 ')
 	pop	{r4, r6, r7, r8}
 	bx	r14
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/neon/lshiftc.asm
--- a/mpn/arm/neon/lshiftc.asm	Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/neon/lshiftc.asm	Mon Apr 08 05:58:31 2013 +0200
@@ -208,7 +208,7 @@
 	rsb	tnc, cnt, #32
 	mvn	r6, r4
 
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 	tst	n, #1
 	beq	L(ev)			C n even
 
@@ -227,20 +227,20 @@
 	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
 	mvn	r8, r8
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(md):	ldr	r6, [ap, #-4]!
 	orr	r7, r7, r8, lsr tnc
 	str	r7, [rp, #-4]!
 	mvn	r6, r6
-	lsl	r7, r8, cnt
+	mov	r7, r8, lsl cnt
 
 L(ed):	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]!
-	lsl	r7, r6, cnt
+	mov	r7, r6, lsl cnt
 L(ed1):	mvn	r6, #0
 	orr	r7, r7, r6, lsr tnc
 	str	r7, [rp, #-4]
-	lsr	r0, r4, tnc
+	mov	r0, r4, lsr tnc
 	pop	{r4, r6, r7, r8}
 	bx	r14
 EPILOGUE()
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/rsh1aors_n.asm
--- a/mpn/arm/rsh1aors_n.asm	Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/rsh1aors_n.asm	Mon Apr 08 05:58:31 2013 +0200
@@ -27,7 +27,7 @@
 C Cortex-A7	 ?
 C Cortex-A8	 ?
 C Cortex-A9	3.64-3.7
-C Cortex-A15	 ?
+C Cortex-A15	 2.5
 
 C TODO
 C  * Not optimised.
@@ -60,7 +60,7 @@
 	ldr	r4, [up], #4
 	ldr	r8, [vp], #4
 	ADDSUB	r4, r4, r8
-	rrxs	r12, r7
+	movs	r12, r7, rrx
 	and	r11, r4, #1	C return value
 	subs	n, n, #4
 	blo	L(end)
@@ -71,10 +71,10 @@
 	ADDSUBC	r5, r5, r8
 	ADDSUBC	r6, r6, r9
 	ADDSUBC	r7, r7, r10
-	rrxs	r12, r7
-	rrxs	r6, r6
-	rrxs	r5, r5
-	rrxs	r4, r4
+	movs	r12, r7, rrx
+	movs	r6, r6, rrx
+	movs	r5, r5, rrx
+	movs	r4, r4, rrx
 	subs	n, n, #3
 	stmia	rp!, {r4,r5,r6}
 	mov	r4, r7
@@ -87,9 +87,9 @@
 	cmn	r12, r12
 	ADDSUBC	r5, r5, r8
 	ADDSUBC	r6, r6, r9
-	rrxs	r12, r6
-	rrxs	r5, r5
-	rrxs	r4, r4
+	movs	r12, r6, rrx
+	movs	r5, r5, rrx
+	movs	r4, r4, rrx
 	stmia	rp!, {r4,r5}
 	mov	r4, r6
 	b	L(e1)
@@ -99,13 +99,13 @@
 	ldr	r8, [vp, #0]


More information about the gmp-commit mailing list