[Gmp-commit] /var/hg/gmp: Rewrite loongarch mul_1, addmul_1, submul_1.
mercurial at gmplib.org
mercurial at gmplib.org
Thu Jul 20 19:57:37 CEST 2023
details: /var/hg/gmp/rev/f45eae89762d
changeset: 18398:f45eae89762d
user: Torbjorn Granlund <tg at gmplib.org>
date: Thu Jul 20 19:57:28 2023 +0200
description:
Rewrite loongarch mul_1, addmul_1, submul_1.
diffstat:
mpn/loongarch/64/aorsmul_1.asm | 89 ++++++++++++++++++++++++++++++-----------
mpn/loongarch/64/mul_1.asm | 74 +++++++++++++++++++++++++---------
2 files changed, 119 insertions(+), 44 deletions(-)
diffs (200 lines):
diff -r 6741673d1679 -r f45eae89762d mpn/loongarch/64/aorsmul_1.asm
--- a/mpn/loongarch/64/aorsmul_1.asm Thu Jul 20 12:44:33 2023 +0200
+++ b/mpn/loongarch/64/aorsmul_1.asm Thu Jul 20 19:57:28 2023 +0200
@@ -33,12 +33,13 @@
include(`../config.m4')
C INPUT PARAMETERS
-define(`rp_arg',`$r4')
-define(`ap', `$r5')
-define(`n', `$r6')
-define(`b0', `$r7')
+define(`rp', `$a0')
+define(`ap', `$a1')
+define(`n', `$a2')
+define(`b0', `$a3')
-define(`rp', `$r8')
+define(`cy', `$a4')
+define(`i', `$a5')
ifdef(`OPERATION_addmul_1',`
define(`ADDSUB', `add.d')
@@ -51,29 +52,69 @@
define(`func', `mpn_submul_1')
')
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1)
+
+define(`BLOCK', `
+ mul.d $t1, $t2, b0
+ mulh.du $t0, $t2, b0
+ ld.d $t2, ap, $1
+ ADDSUB $t5, $t3, $t1
+ CMPCY( $t4, $t5, $t3)
+ ld.d $t3, rp, $1
+ ADDSUB $t6, $t5, cy
+ add.d $t4, $t4, $t0
+ CMPCY( $t5, $t6, $t5)
+ st.d $t6, rp, eval($1-8)
+ add.d cy, $t4, $t5')
ASM_START()
+
+ifdef(`OPERATION_addmul_1', `
+PROLOGUE(mpn_addmul_1c)
+ srli.d i, n, 2
+ b L(ent)
+EPILOGUE()
+')
+
PROLOGUE(func)
- alsl.d rp, n, rp_arg, 3
- alsl.d ap, n, ap, 3
- sub.d n, $r0, n
- slli.d n, n, 3
- or $r4, $r0, $r0
+ srli.d i, n, 2
+ or cy, $r0, $r0
+L(ent): ld.d $t2, ap, 0
+ ld.d $t3, rp, 0
-L(top): ldx.d $r13, ap, n
- mul.d $r17, $r13, b0
- mulh.du $r13, $r13, b0
- ldx.d $r12, rp, n
- ADDSUB $r17, $r12, $r17
- CMPCY( $r12, $r17, $r12)
- ADDSUB $r4, $r17, $r4 C cycle 0, 3, ...
- add.d $r12, $r12, $r13
- CMPCY( $r17, $r4, $r17) C cycle 1, 4, ...
- stx.d $r4, rp, n
- addi.d n, n, 8 C bookkeeping
- add.d $r4, $r12, $r17 C cycle 2, 5, ...
- bnez n, L(top)
+ andi $t0, n, 1
+ andi $t1, n, 2
+ bnez $t0, L(bx1)
+L(bx0): beqz $t1, L(b0)
+L(b10): addi.d ap, ap, -16
+ addi.d rp, rp, -16
+ b L(b2)
+L(bx1): beqz $t1, L(b01)
+L(b11): addi.d ap, ap, -8
+ addi.d rp, rp, -8
+ b L(b3)
+L(b01): addi.d ap, ap, 8
+ addi.d rp, rp, 8
+ beqz i, L(end)
+L(top):
+L(b1): BLOCK(0)
+L(b0): BLOCK(8)
+ addi.d i, i, -1
+L(b3): BLOCK(16)
+L(b2): BLOCK(24)
+ addi.d ap, ap, 32
+ addi.d rp, rp, 32
+ bnez i, L(top)
+
+L(end): mul.d $t1, $t2, b0
+ mulh.du $t0, $t2, b0
+ ADDSUB $t5, $t3, $t1
+ CMPCY( $t4, $t5, $t3)
+ ADDSUB $t6, $t5, cy
+ add.d $t4, $t4, $t0
+ CMPCY( $t5, $t6, $t5)
+ st.d $t6, rp, -8
+ add.d $a0, $t4, $t5
jr $r1
EPILOGUE()
diff -r 6741673d1679 -r f45eae89762d mpn/loongarch/64/mul_1.asm
--- a/mpn/loongarch/64/mul_1.asm Thu Jul 20 12:44:33 2023 +0200
+++ b/mpn/loongarch/64/mul_1.asm Thu Jul 20 19:57:28 2023 +0200
@@ -33,31 +33,65 @@
include(`../config.m4')
C INPUT PARAMETERS
-define(`rp_arg',`$r4')
-define(`ap', `$r5')
-define(`n', `$r6')
-define(`b0', `$r7')
+define(`rp', `$a0')
+define(`ap', `$a1')
+define(`n', `$a2')
+define(`b0', `$a3')
+
+define(`cy', `$a4')
+define(`i', `$a5')
-define(`rp', `$r8')
-
+define(`BLOCK', `
+ mul.d $t1, $t2, b0
+ mulh.du $t0, $t2, b0
+ ld.d $t2, ap, $1
+ add.d $t6, $t1, cy
+ sltu $t5, $t6, $t1
+ st.d $t6, rp, eval($1-8)
+ add.d cy, $t0, $t5')
ASM_START()
+
+PROLOGUE(mpn_mul_1c)
+ srli.d i, n, 2
+ b L(ent)
+EPILOGUE()
+
PROLOGUE(mpn_mul_1)
- alsl.d rp, n, rp_arg, 3
- alsl.d ap, n, ap, 3
- sub.d n, $r0, n
- slli.d n, n, 3
- or $r4, $r0, $r0
+ srli.d i, n, 2
+ or cy, $r0, $r0
+L(ent): ld.d $t2, ap, 0
-L(top): ldx.d $r13, ap, n
- mul.d $r17, $r13, b0
- mulh.du $r13, $r13, b0
- add.d $r17, $r17, $r4
- sltu $r4, $r17, $r4
- stx.d $r17, rp, n
- addi.d n, n, 8 C bookkeeping
- add.d $r4, $r13, $r4
- bnez n, L(top)
+ andi $t0, n, 1
+ andi $t1, n, 2
+ bnez $t0, L(bx1)
+L(bx0): beqz $t1, L(b0)
+L(b10): addi.d ap, ap, -16
+ addi.d rp, rp, -16
+ b L(b2)
+L(bx1): beqz $t1, L(b01)
+L(b11): addi.d ap, ap, -8
+ addi.d rp, rp, -8
+ b L(b3)
+L(b01): addi.d ap, ap, 8
+ addi.d rp, rp, 8
+ beqz i, L(end)
+L(top):
+L(b1): BLOCK(0)
+L(b0): BLOCK(8)
+ addi.d i, i, -1
+L(b3): BLOCK(16)
+L(b2): BLOCK(24)
+ addi.d ap, ap, 32
+ addi.d rp, rp, 32
+ bnez i, L(top)
+
+L(end): mul.d $t1, $t2, b0
+ mulh.du $t0, $t2, b0
+ add.d $t6, $t1, cy
+ sltu $t5, $t6, $t1
+ st.d $t6, rp, -8
+ add.d $a0, $t0, $t5
jr $r1
EPILOGUE()
More information about the gmp-commit
mailing list