[Gmp-commit] /var/hg/gmp: Rewrite feed-in code, reducing code size and making...
mercurial at gmplib.org
mercurial at gmplib.org
Thu Sep 2 19:57:16 UTC 2021
details: /var/hg/gmp/rev/3be14764fc52
changeset: 18231:3be14764fc52
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Sep 02 21:57:13 2021 +0200
description:
Rewrite feed-in code, reducing code size and making it work for v8plus.
diffstat:
mpn/sparc32/v8/addmul_1.asm | 68 +++++++++++++++-----------------------------
mpn/sparc32/v8/mul_1.asm | 63 ++++++++++++++---------------------------
2 files changed, 45 insertions(+), 86 deletions(-)
diffs (207 lines):
diff -r 8cd6cd6cdd8d -r 3be14764fc52 mpn/sparc32/v8/addmul_1.asm
--- a/mpn/sparc32/v8/addmul_1.asm Sat Aug 21 18:17:23 2021 +0200
+++ b/mpn/sparc32/v8/addmul_1.asm Thu Sep 02 21:57:13 2021 +0200
@@ -40,50 +40,32 @@
ASM_START()
PROLOGUE(mpn_addmul_1)
+ ld [%o1+0],%o4
+ andcc %o2,1,%g0
+ be L(bx0)
+ andcc %o2,2,%g0
+L(bx1): be L(01)
orcc %g0,%g0,%g2
- ld [%o1+0],%o4 C 1
+L(b11): add %o0,-8,%o0
+ b L(11)
+ add %o1,-8,%o1
+L(bx0): be L(b00)
+ orcc %g0,%g0,%g2
+L(b10): add %o0,-12,%o0
+ b L(10)
+ add %o1,4,%o1
+L(b00): add %o0,-4,%o0
+ b L(00)
+ add %o1,-4,%o1
- sll %o2,4,%g1
- and %g1,(4-1)<<4,%g1
-ifdef(`PIC',
-` mov %o7,%g4 C Save return address register
-0: call 1f
- add %o7,L(1)-0b,%g3
-1: mov %g4,%o7 C Restore return address register
-',
-` sethi %hi(L(1)),%g3
- or %g3,%lo(L(1)),%g3
-')
- jmp %g3+%g1
- nop
-L(1):
-L(L00): add %o0,-4,%o0
- b L(loop00) C 4, 8, 12, ...
- add %o1,-4,%o1
- nop
-L(L01): b L(loop01) C 1, 5, 9, ...
- nop
- nop
- nop
-L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
- b L(loop10)
- add %o1,4,%o1
- nop
-L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
- b L(loop11)
- add %o1,-8,%o1
- nop
-
-L(loop):
- addcc %g3,%g2,%g3 C 1
+L(top): addcc %g3,%g2,%g3 C 1
ld [%o1+4],%o4 C 2
rd %y,%g2 C 1
addx %g0,%g2,%g2
ld [%o0+0],%g1 C 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] C 1
-L(loop00):
- umul %o4,%o3,%g3 C 2
+L(00): umul %o4,%o3,%g3 C 2
ld [%o0+4],%g1 C 2
addxcc %g3,%g2,%g3 C 2
ld [%o1+8],%o4 C 3
@@ -92,8 +74,7 @@
nop
addcc %g1,%g3,%g3
st %g3,[%o0+4] C 2
-L(loop11):
- umul %o4,%o3,%g3 C 3
+L(11): umul %o4,%o3,%g3 C 3
addxcc %g3,%g2,%g3 C 3
ld [%o1+12],%o4 C 4
rd %y,%g2 C 3
@@ -102,8 +83,7 @@
ld [%o0+8],%g1 C 2
addcc %g1,%g3,%g3
st %g3,[%o0+8] C 3
-L(loop10):
- umul %o4,%o3,%g3 C 4
+L(10): umul %o4,%o3,%g3 C 4
addxcc %g3,%g2,%g3 C 4
ld [%o1+0],%o4 C 1
rd %y,%g2 C 4
@@ -113,9 +93,8 @@
st %g3,[%o0+12] C 4
add %o0,16,%o0
addx %g0,%g2,%g2
-L(loop01):
- addcc %o2,-4,%o2
- bg L(loop)
+L(01): addcc %o2,-4,%o2
+ bg L(top)
umul %o4,%o3,%g3 C 1
addcc %g3,%g2,%g3 C 4
@@ -124,8 +103,7 @@
ld [%o0+0],%g1 C 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] C 4
- addx %g0,%g2,%o0
retl
- nop
+ addx %g0,%g2,%o0
EPILOGUE(mpn_addmul_1)
diff -r 8cd6cd6cdd8d -r 3be14764fc52 mpn/sparc32/v8/mul_1.asm
--- a/mpn/sparc32/v8/mul_1.asm Sat Aug 21 18:17:23 2021 +0200
+++ b/mpn/sparc32/v8/mul_1.asm Thu Sep 02 21:57:13 2021 +0200
@@ -40,67 +40,48 @@
ASM_START()
PROLOGUE(mpn_mul_1)
- sll %o2,4,%g1
- and %g1,(4-1)<<4,%g1
-ifdef(`PIC',
-` mov %o7,%g4 C Save return address register
-0: call 1f
- add %o7,L(1)-0b,%g3
-1: mov %g4,%o7 C Restore return address register
-',
-` sethi %hi(L(1)),%g3
- or %g3,%lo(L(1)),%g3
-')
- jmp %g3+%g1
- ld [%o1+0],%o4 C 1
-L(1):
-L(L00): add %o0,-4,%o0
+ ld [%o1+0],%o4
+ andcc %o2,1,%g0
+ be L(bx0)
+ andcc %o2,2,%g0
+L(bx1): be L(01)
+ orcc %g0,%g0,%g2
+L(b11): add %o0,-8,%o0
+ b L(11)
+ add %o1,-8,%o1
+L(bx0): be L(b00)
+ orcc %g0,%g0,%g2
+L(b10): add %o0,-12,%o0
+ b L(10)
+ add %o1,4,%o1
+L(b00): add %o0,-4,%o0
+ b L(00)
add %o1,-4,%o1
- b L(loop00) C 4, 8, 12, ...
- orcc %g0,%g0,%g2
-L(L01): b L(loop01) C 1, 5, 9, ...
- orcc %g0,%g0,%g2
- nop
- nop
-L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
- add %o1,4,%o1
- b L(loop10)
- orcc %g0,%g0,%g2
- nop
-L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
- add %o1,-8,%o1
- b L(loop11)
- orcc %g0,%g0,%g2
-L(loop):
- addcc %g3,%g2,%g3 C 1
+L(top): addcc %g3,%g2,%g3 C 1
ld [%o1+4],%o4 C 2
st %g3,[%o0+0] C 1
rd %y,%g2 C 1
-L(loop00):
- umul %o4,%o3,%g3 C 2
+L(00): umul %o4,%o3,%g3 C 2
addxcc %g3,%g2,%g3 C 2
ld [%o1+8],%o4 C 3
st %g3,[%o0+4] C 2
rd %y,%g2 C 2
-L(loop11):
- umul %o4,%o3,%g3 C 3
+L(11): umul %o4,%o3,%g3 C 3
addxcc %g3,%g2,%g3 C 3
ld [%o1+12],%o4 C 4
add %o1,16,%o1
st %g3,[%o0+8] C 3
rd %y,%g2 C 3
-L(loop10):
- umul %o4,%o3,%g3 C 4
+L(10): umul %o4,%o3,%g3 C 4
addxcc %g3,%g2,%g3 C 4
ld [%o1+0],%o4 C 1
st %g3,[%o0+12] C 4
add %o0,16,%o0
rd %y,%g2 C 4
addx %g0,%g2,%g2
-L(loop01):
- addcc %o2,-4,%o2
- bg L(loop)
+L(01): addcc %o2,-4,%o2
+ bg L(top)
umul %o4,%o3,%g3 C 1
addcc %g3,%g2,%g3 C 4
More information about the gmp-commit
mailing list