[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Apr 8 05:58:37 CEST 2013
details: /var/hg/gmp/rev/92b87a14e382
changeset: 15695:92b87a14e382
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 08 05:09:20 2013 +0200
description:
Rewrite count-trailing-zeros code, using private table.
details: /var/hg/gmp/rev/672697a024a8
changeset: 15696:672697a024a8
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 08 05:52:50 2013 +0200
description:
Add cycle numbers.
details: /var/hg/gmp/rev/849dfa2b37ee
changeset: 15697:849dfa2b37ee
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 08 05:57:59 2013 +0200
description:
Canonicalise arm assembly to use old style "mov ... lsl" for shift ops.
details: /var/hg/gmp/rev/26afaf8703c5
changeset: 15698:26afaf8703c5
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 08 05:58:31 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 5 +++++
mpn/arm/dive_1.asm | 26 ++++++++++++++++----------
mpn/arm/lshift.asm | 10 +++++-----
mpn/arm/lshiftc.asm | 10 +++++-----
mpn/arm/mod_34lsub1.asm | 4 ++--
mpn/arm/neon/lorrshift.asm | 20 ++++++++++----------
mpn/arm/neon/lshiftc.asm | 10 +++++-----
mpn/arm/rsh1aors_n.asm | 24 ++++++++++++------------
mpn/arm/rshift.asm | 10 +++++-----
mpn/arm/v5/gcd_1.asm | 6 +++---
mpn/arm/v6/dive_1.asm | 8 ++++----
mpn/arm/v6t2/gcd_1.asm | 6 +++---
mpn/arm/v7a/cora15/neon/rsh1aors_n.asm | 2 +-
mpn/sparc64/ultrasparct3/mod_34lsub1.asm | 4 ++--
14 files changed, 78 insertions(+), 67 deletions(-)
diffs (truncated from 472 to 300 lines):
diff -r 95358c679785 -r 26afaf8703c5 ChangeLog
--- a/ChangeLog Mon Apr 08 00:03:35 2013 +0200
+++ b/ChangeLog Mon Apr 08 05:58:31 2013 +0200
@@ -1,3 +1,8 @@
+2013-04-08 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/arm/dive_1.asm: Rewrite count-trailing-zeros code, using private
+ table.
+
2013-04-07 Torbjorn Granlund <tege at gmplib.org>
* mpn/sparc64/ultrasparct3/mod_34lsub1.asm: New file.
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/dive_1.asm
--- a/mpn/arm/dive_1.asm Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/dive_1.asm Mon Apr 08 05:58:31 2013 +0200
@@ -55,20 +55,15 @@
bne L(inv)
C count trailing zeros
+ movs r4, d, lsl #16
+ moveq d, d, lsr #16
+ moveq cnt, #16
tst d, #0xff
moveq d, d, lsr #8
addeq cnt, cnt, #8
- tst d, #0xff
- moveq d, d, lsr #8
- addeq cnt, cnt, #8
- tst d, #0xff
- moveq d, d, lsr #8
- addeq cnt, cnt, #8
- rsb r5, d, #0
- and r5, r5, d
- LEA( r4, __clz_tab)
+ LEA( r4, ctz_tab)
+ and r5, d, #0xff
ldrb r4, [r4, r5]
- sub r4, r4, #2
mov d, d, lsr r4
add cnt, cnt, r4
@@ -132,3 +127,14 @@
pop {r4-r9}
bx r14
EPILOGUE()
+
+ .section .rodata
+ctz_tab:
+ .byte 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+ .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+ .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+ .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+ .byte 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+ .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+ .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
+ .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/lshift.asm
--- a/mpn/arm/lshift.asm Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/lshift.asm Mon Apr 08 05:58:31 2013 +0200
@@ -43,7 +43,7 @@
add rp, rp, n, lsl #2
rsb tnc, cnt, #32
- lsl r7, r4, cnt
+ mov r7, r4, lsl cnt
tst n, #1
beq L(evn) C n even
@@ -59,19 +59,19 @@
L(top): ldr r8, [up, #-4]!
orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(mid): ldr r6, [up, #-4]!
orr r7, r7, r8, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r8, cnt
+ mov r7, r8, lsl cnt
subs n, n, #2
bgt L(top)
L(end): orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(1): str r7, [rp, #-4]
- lsr r0, r4, tnc
+ mov r0, r4, lsr tnc
pop {r4, r6, r7, r8}
bx r14
EPILOGUE()
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/lshiftc.asm
--- a/mpn/arm/lshiftc.asm Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/lshiftc.asm Mon Apr 08 05:58:31 2013 +0200
@@ -44,7 +44,7 @@
rsb tnc, cnt, #32
mvn r6, r4
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
tst n, #1
beq L(evn) C n even
@@ -63,22 +63,22 @@
orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
mvn r8, r8
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(mid): ldr r6, [up, #-4]!
orr r7, r7, r8, lsr tnc
str r7, [rp, #-4]!
mvn r6, r6
- lsl r7, r8, cnt
+ mov r7, r8, lsl cnt
subs n, n, #2
bgt L(top)
L(end): orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(1): mvn r6, #0
orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]
- lsr r0, r4, tnc
+ mov r0, r4, lsr tnc
pop {r4, r6, r7, r8}
bx r14
EPILOGUE()
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/mod_34lsub1.asm
--- a/mpn/arm/mod_34lsub1.asm Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/mod_34lsub1.asm Mon Apr 08 05:58:31 2013 +0200
@@ -84,12 +84,12 @@
add r0, r0, r2, lsr #24
add r0, r0, r7
- lsl r7, r3, #8
+ mov r7, r3, lsl #8
bic r1, r7, #0xff000000
add r0, r0, r1
add r0, r0, r3, lsr #16
- lsl r7, r12, #16
+ mov r7, r12, lsl #16
bic r1, r7, #0xff000000
add r0, r0, r1
add r0, r0, r12, lsr #8
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/neon/lorrshift.asm
--- a/mpn/arm/neon/lorrshift.asm Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/neon/lorrshift.asm Mon Apr 08 05:58:31 2013 +0200
@@ -202,7 +202,7 @@
ldr r4, [ap, #-4]!
rsb tnc, cnt, #32
- lsl r7, r4, cnt
+ mov r7, r4, lsl cnt
tst n, #1
beq L(ev) C n even
@@ -218,23 +218,23 @@
L(tp): ldr r8, [ap, #-4]!
orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(md): ldr r6, [ap, #-4]!
orr r7, r7, r8, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r8, cnt
+ mov r7, r8, lsl cnt
L(ed): orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(ed1): str r7, [rp, #-4]
- lsr r0, r4, tnc
+ mov r0, r4, lsr tnc
')
ifdef(`OPERATION_rshift',`
ldr r4, [ap]
rsb tnc, cnt, #32
- lsr r7, r4, cnt
+ mov r7, r4, lsr cnt
tst n, #1
beq L(ev) C n even
@@ -251,17 +251,17 @@
L(tp): ldr r8, [ap, #4]!
orr r7, r7, r6, lsl tnc
str r7, [rp], #4
- lsr r7, r6, cnt
+ mov r7, r6, lsr cnt
L(md): ldr r6, [ap, #4]!
orr r7, r7, r8, lsl tnc
str r7, [rp], #4
- lsr r7, r8, cnt
+ mov r7, r8, lsr cnt
L(ed): orr r7, r7, r6, lsl tnc
str r7, [rp], #4
- lsr r7, r6, cnt
+ mov r7, r6, lsr cnt
L(ed1): str r7, [rp], #4
- lsl r0, r4, tnc
+ mov r0, r4, lsl tnc
')
pop {r4, r6, r7, r8}
bx r14
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/neon/lshiftc.asm
--- a/mpn/arm/neon/lshiftc.asm Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/neon/lshiftc.asm Mon Apr 08 05:58:31 2013 +0200
@@ -208,7 +208,7 @@
rsb tnc, cnt, #32
mvn r6, r4
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
tst n, #1
beq L(ev) C n even
@@ -227,20 +227,20 @@
orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
mvn r8, r8
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(md): ldr r6, [ap, #-4]!
orr r7, r7, r8, lsr tnc
str r7, [rp, #-4]!
mvn r6, r6
- lsl r7, r8, cnt
+ mov r7, r8, lsl cnt
L(ed): orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]!
- lsl r7, r6, cnt
+ mov r7, r6, lsl cnt
L(ed1): mvn r6, #0
orr r7, r7, r6, lsr tnc
str r7, [rp, #-4]
- lsr r0, r4, tnc
+ mov r0, r4, lsr tnc
pop {r4, r6, r7, r8}
bx r14
EPILOGUE()
diff -r 95358c679785 -r 26afaf8703c5 mpn/arm/rsh1aors_n.asm
--- a/mpn/arm/rsh1aors_n.asm Mon Apr 08 00:03:35 2013 +0200
+++ b/mpn/arm/rsh1aors_n.asm Mon Apr 08 05:58:31 2013 +0200
@@ -27,7 +27,7 @@
C Cortex-A7 ?
C Cortex-A8 ?
C Cortex-A9 3.64-3.7
-C Cortex-A15 ?
+C Cortex-A15 2.5
C TODO
C * Not optimised.
@@ -60,7 +60,7 @@
ldr r4, [up], #4
ldr r8, [vp], #4
ADDSUB r4, r4, r8
- rrxs r12, r7
+ movs r12, r7, rrx
and r11, r4, #1 C return value
subs n, n, #4
blo L(end)
@@ -71,10 +71,10 @@
ADDSUBC r5, r5, r8
ADDSUBC r6, r6, r9
ADDSUBC r7, r7, r10
- rrxs r12, r7
- rrxs r6, r6
- rrxs r5, r5
- rrxs r4, r4
+ movs r12, r7, rrx
+ movs r6, r6, rrx
+ movs r5, r5, rrx
+ movs r4, r4, rrx
subs n, n, #3
stmia rp!, {r4,r5,r6}
mov r4, r7
@@ -87,9 +87,9 @@
cmn r12, r12
ADDSUBC r5, r5, r8
ADDSUBC r6, r6, r9
- rrxs r12, r6
- rrxs r5, r5
- rrxs r4, r4
+ movs r12, r6, rrx
+ movs r5, r5, rrx
+ movs r4, r4, rrx
stmia rp!, {r4,r5}
mov r4, r6
b L(e1)
@@ -99,13 +99,13 @@
ldr r8, [vp, #0]
More information about the gmp-commit
mailing list