[Gmp-commit] /var/hg/gmp: Rewrite loongarch mul_1, addmul_1, submul_1.

mercurial at gmplib.org mercurial at gmplib.org
Thu Jul 20 19:57:37 CEST 2023


details:   /var/hg/gmp/rev/f45eae89762d
changeset: 18398:f45eae89762d
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Thu Jul 20 19:57:28 2023 +0200
description:
Rewrite loongarch mul_1, addmul_1, submul_1.

diffstat:

 mpn/loongarch/64/aorsmul_1.asm |  89 ++++++++++++++++++++++++++++++-----------
 mpn/loongarch/64/mul_1.asm     |  74 +++++++++++++++++++++++++---------
 2 files changed, 119 insertions(+), 44 deletions(-)

diffs (200 lines):

diff -r 6741673d1679 -r f45eae89762d mpn/loongarch/64/aorsmul_1.asm
--- a/mpn/loongarch/64/aorsmul_1.asm	Thu Jul 20 12:44:33 2023 +0200
+++ b/mpn/loongarch/64/aorsmul_1.asm	Thu Jul 20 19:57:28 2023 +0200
@@ -33,12 +33,13 @@
 include(`../config.m4')
 
 C INPUT PARAMETERS
-define(`rp_arg',`$r4')
-define(`ap',	`$r5')
-define(`n',	`$r6')
-define(`b0',	`$r7')
+define(`rp',	`$a0')
+define(`ap',	`$a1')
+define(`n',	`$a2')
+define(`b0',	`$a3')
 
-define(`rp',	`$r8')
+define(`cy',	`$a4')
+define(`i',	`$a5')
 
 ifdef(`OPERATION_addmul_1',`
     define(`ADDSUB',	`add.d')
@@ -51,29 +52,69 @@
     define(`func',	`mpn_submul_1')
 ')
 
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1)
+
+define(`BLOCK', `
+	mul.d	$t1, $t2, b0
+	mulh.du	$t0, $t2, b0
+	ld.d	$t2, ap, $1
+	ADDSUB	$t5, $t3, $t1
+	CMPCY(	$t4, $t5, $t3)
+	ld.d	$t3, rp, $1
+	ADDSUB	$t6, $t5, cy
+	add.d	$t4, $t4, $t0
+	CMPCY(	$t5, $t6, $t5)
+	st.d	$t6, rp, eval($1-8)
+	add.d	cy, $t4, $t5')
 
 ASM_START()
+
+ifdef(`OPERATION_addmul_1', `
+PROLOGUE(mpn_addmul_1c)
+	srli.d	i, n, 2
+	b	L(ent)
+EPILOGUE()
+')
+
 PROLOGUE(func)
-	alsl.d	rp, n, rp_arg, 3
-	alsl.d	ap, n, ap, 3
-	sub.d	n, $r0, n
-	slli.d	n, n, 3
-	or	$r4, $r0, $r0
+	srli.d	i, n, 2
+	or	cy, $r0, $r0
+L(ent):	ld.d	$t2, ap, 0
+	ld.d	$t3, rp, 0
 
-L(top):	ldx.d	$r13, ap, n
-	mul.d	$r17, $r13, b0
-	mulh.du	$r13, $r13, b0
-	ldx.d	$r12, rp, n
-	ADDSUB	$r17, $r12, $r17
-	CMPCY(	$r12, $r17, $r12)
-	ADDSUB	$r4, $r17, $r4		C cycle 0, 3, ...
-	add.d	$r12, $r12, $r13
-	CMPCY(	$r17, $r4, $r17)	C cycle 1, 4, ...
-	stx.d	$r4, rp, n
-	addi.d	n, n, 8			C bookkeeping
-	add.d	$r4, $r12, $r17		C cycle 2, 5, ...
-	bnez	n, L(top)
+	andi	$t0, n, 1
+	andi	$t1, n, 2
+	bnez	$t0, L(bx1)
+L(bx0):	beqz	$t1, L(b0)
+L(b10):	addi.d	ap, ap, -16
+	addi.d	rp, rp, -16
+	b	L(b2)
+L(bx1):	beqz	$t1, L(b01)
+L(b11):	addi.d	ap, ap, -8
+	addi.d	rp, rp, -8
+	b	L(b3)
+L(b01):	addi.d	ap, ap, 8
+	addi.d	rp, rp, 8
+	beqz	i, L(end)
 
+L(top):
+L(b1):	BLOCK(0)
+L(b0):	BLOCK(8)
+	addi.d	i, i, -1
+L(b3):	BLOCK(16)
+L(b2):	BLOCK(24)
+	addi.d	ap, ap, 32
+	addi.d	rp, rp, 32
+	bnez	i, L(top)
+
+L(end):	mul.d	$t1, $t2, b0
+	mulh.du	$t0, $t2, b0
+	ADDSUB	$t5, $t3, $t1
+	CMPCY(	$t4, $t5, $t3)
+	ADDSUB	$t6, $t5, cy
+	add.d	$t4, $t4, $t0
+	CMPCY(	$t5, $t6, $t5)
+	st.d	$t6, rp, -8
+	add.d	$a0, $t4, $t5
 	jr	$r1
 EPILOGUE()
diff -r 6741673d1679 -r f45eae89762d mpn/loongarch/64/mul_1.asm
--- a/mpn/loongarch/64/mul_1.asm	Thu Jul 20 12:44:33 2023 +0200
+++ b/mpn/loongarch/64/mul_1.asm	Thu Jul 20 19:57:28 2023 +0200
@@ -33,31 +33,65 @@
 include(`../config.m4')
 
 C INPUT PARAMETERS
-define(`rp_arg',`$r4')
-define(`ap',	`$r5')
-define(`n',	`$r6')
-define(`b0',	`$r7')
+define(`rp',	`$a0')
+define(`ap',	`$a1')
+define(`n',	`$a2')
+define(`b0',	`$a3')
+
+define(`cy',	`$a4')
+define(`i',	`$a5')
 
-define(`rp',	`$r8')
-
+define(`BLOCK', `
+	mul.d	$t1, $t2, b0
+	mulh.du	$t0, $t2, b0
+	ld.d	$t2, ap, $1
+	add.d	$t6, $t1, cy
+	sltu	$t5, $t6, $t1
+	st.d	$t6, rp, eval($1-8)
+	add.d	cy, $t0, $t5')
 
 ASM_START()
+
+PROLOGUE(mpn_mul_1c)
+	srli.d	i, n, 2
+	b	L(ent)
+EPILOGUE()
+
 PROLOGUE(mpn_mul_1)
-	alsl.d	rp, n, rp_arg, 3
-	alsl.d	ap, n, ap, 3
-	sub.d	n, $r0, n
-	slli.d	n, n, 3
-	or	$r4, $r0, $r0
+	srli.d	i, n, 2
+	or	cy, $r0, $r0
+L(ent):	ld.d	$t2, ap, 0
 
-L(top):	ldx.d	$r13, ap, n
-	mul.d	$r17, $r13, b0
-	mulh.du	$r13, $r13, b0
-	add.d	$r17, $r17, $r4
-	sltu	$r4, $r17, $r4
-	stx.d	$r17, rp, n
-	addi.d	n, n, 8			C bookkeeping
-	add.d	$r4, $r13, $r4
-	bnez	n, L(top)
+	andi	$t0, n, 1
+	andi	$t1, n, 2
+	bnez	$t0, L(bx1)
+L(bx0):	beqz	$t1, L(b0)
+L(b10):	addi.d	ap, ap, -16
+	addi.d	rp, rp, -16
+	b	L(b2)
+L(bx1):	beqz	$t1, L(b01)
+L(b11):	addi.d	ap, ap, -8
+	addi.d	rp, rp, -8
+	b	L(b3)
+L(b01):	addi.d	ap, ap, 8
+	addi.d	rp, rp, 8
+	beqz	i, L(end)
 
+L(top):
+L(b1):	BLOCK(0)
+L(b0):	BLOCK(8)
+	addi.d	i, i, -1
+L(b3):	BLOCK(16)
+L(b2):	BLOCK(24)
+	addi.d	ap, ap, 32
+	addi.d	rp, rp, 32
+	bnez	i, L(top)
+
+L(end):	mul.d	$t1, $t2, b0
+	mulh.du	$t0, $t2, b0
+	add.d	$t6, $t1, cy
+	sltu	$t5, $t6, $t1
+	st.d	$t6, rp, -8
+	add.d	$a0, $t0, $t5
 	jr	$r1
 EPILOGUE()


More information about the gmp-commit mailing list