[PATCH 3/3] Rewrite the MIPS legecy code as MIPS r6 style
YunQiang Su
syq at debian.org
Thu May 23 04:03:12 UTC 2019
From: Luyou Peng <lpeng at wavecomp.com>
The r6 style code is much shorter and with the same performance,
to make it easy to maintain, we rebase the code with new style:
Use single loop in per-function.
---
mpn/mips32/add_n.asm | 95 ++++++++---------------------------------
mpn/mips32/addmul_1.asm | 61 +++++++--------------------
mpn/mips32/lshift.asm | 8 ++--
mpn/mips32/mul_1.asm | 54 +++++++-----------------
mpn/mips32/rshift.asm | 8 ++--
mpn/mips32/sqr_diagonal.asm | 60 ++++++++++++++++++++++++++
mpn/mips32/sub_n.asm | 95 ++++++++---------------------------------
mpn/mips32/submul_1.asm | 65 ++++++++--------------------
mpn/mips32/umul.asm | 6 +--
mpn/mips64/add_n.asm | 98 ++++++++-----------------------------------
mpn/mips64/addmul_1.asm | 57 ++++++-------------------
mpn/mips64/lshift.asm | 8 ++--
mpn/mips64/mul_1.asm | 55 +++++++-----------------
mpn/mips64/rshift.asm | 8 ++--
mpn/mips64/sqr_diagonal.asm | 49 +++++++---------------
mpn/mips64/sub_n.asm | 100 ++++++++------------------------------------
mpn/mips64/submul_1.asm | 61 +++++++--------------------
17 files changed, 254 insertions(+), 634 deletions(-)
create mode 100644 mpn/mips32/sqr_diagonal.asm
diff --git a/mpn/mips32/add_n.asm b/mpn/mips32/add_n.asm
index e7d4c48..20909c4 100644
--- a/mpn/mips32/add_n.asm
+++ b/mpn/mips32/add_n.asm
@@ -1,7 +1,7 @@
-dnl MIPS32 mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl MIPS64 mpn_add_n -- Add two limb vectors of the same length > 0 and store
dnl sum in a third limb vector.
-dnl Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
+dnl Copyright 1995, 2000-2002, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -40,85 +40,26 @@ C size $7
ASM_START()
PROLOGUE(mpn_add_n)
- lw $10,0($5)
- lw $11,0($6)
+ move $2,$0
+Loop:
+ lw $9,0($5)
+ lw $10,0($6)
+ addiu $7,$7,-1 C decrement loop counter
- addiu $7,$7,-1
- and $9,$7,4-1 C number of limbs in first loop
- beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- move $2,$0
+ addu $9,$2,$9
+ sltu $2,$9,$2
+ addu $10,$9,$10
+ sltu $9,$10,$9
- subu $7,$7,$9
-
-.Loop0: addiu $9,$9,-1
- lw $12,4($5)
- addu $11,$11,$2
- lw $13,4($6)
- sltu $8,$11,$2
- addu $11,$10,$11
- sltu $2,$11,$10
- sw $11,0($4)
- or $2,$2,$8
+ sw $10,0($4)
addiu $5,$5,4
addiu $6,$6,4
- move $10,$12
- move $11,$13
- bne $9,$0,.Loop0
- addiu $4,$4,4
-
-.L0: beq $7,$0,.Lend
- nop
-
-.Loop: addiu $7,$7,-4
-
- lw $12,4($5)
- addu $11,$11,$2
- lw $13,4($6)
- sltu $8,$11,$2
- addu $11,$10,$11
- sltu $2,$11,$10
- sw $11,0($4)
- or $2,$2,$8
-
- lw $10,8($5)
- addu $13,$13,$2
- lw $11,8($6)
- sltu $8,$13,$2
- addu $13,$12,$13
- sltu $2,$13,$12
- sw $13,4($4)
- or $2,$2,$8
-
- lw $12,12($5)
- addu $11,$11,$2
- lw $13,12($6)
- sltu $8,$11,$2
- addu $11,$10,$11
- sltu $2,$11,$10
- sw $11,8($4)
- or $2,$2,$8
-
- lw $10,16($5)
- addu $13,$13,$2
- lw $11,16($6)
- sltu $8,$13,$2
- addu $13,$12,$13
- sltu $2,$13,$12
- sw $13,12($4)
- or $2,$2,$8
-
- addiu $5,$5,16
- addiu $6,$6,16
-
- bne $7,$0,.Loop
- addiu $4,$4,16
+ addiu $4,$4,4
-.Lend: addu $11,$11,$2
- sltu $8,$11,$2
- addu $11,$10,$11
- sltu $2,$11,$10
- sw $11,0($4)
+ bgtz $7,Loop
+ addu $2,$2,$9
+Lend:
j $31
- or $2,$2,$8
-EPILOGUE(mpn_add_n)
+ nop
+EPILOGUE()
diff --git a/mpn/mips32/addmul_1.asm b/mpn/mips32/addmul_1.asm
index 9aa9e16..be5a7cd 100644
--- a/mpn/mips32/addmul_1.asm
+++ b/mpn/mips32/addmul_1.asm
@@ -1,7 +1,7 @@
-dnl MIPS32 mpn_addmul_1 -- Multiply a limb vector with a single limb and add
+dnl MIPS64 mpn_addmul_1 -- Multiply a limb vector with a single limb and add
dnl the product to a second limb vector.
-dnl Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
+dnl Copyright 1992, 1994, 1995, 2000-2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -40,62 +40,31 @@ C s2_limb $7
ASM_START()
PROLOGUE(mpn_addmul_1)
-C feed-in phase 0
- lw $8,0($5)
+ move $2,$0 C zero cy2
-C feed-in phase 1
- addiu $5,$5,4
- multu $8,$7
-
- addiu $6,$6,-1
- beq $6,$0,$LC0
- move $2,$0 C zero cy2
-
- addiu $6,$6,-1
- beq $6,$0,$LC1
- lw $8,0($5) C load new s1 limb as early as possible
+Loop:
+ lw $8,0($5)
+ lw $10,0($4)
+ addiu $6,$6,-1 C decrement loop counter
-Loop: lw $10,0($4)
+ multu $7,$8
mflo $3
- mfhi $9
- addiu $5,$5,4
+
addu $3,$3,$2 C add old carry limb to low product limb
- multu $8,$7
- lw $8,0($5) C load new s1 limb as early as possible
- addiu $6,$6,-1 C decrement loop counter
sltu $2,$3,$2 C carry from previous addition -> $2
addu $3,$10,$3
sltu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
- addiu $4,$4,4
- bne $6,$0,Loop
- addu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 1
-$LC1: lw $10,0($4)
- mflo $3
mfhi $9
- addu $3,$3,$2
- sltu $2,$3,$2
- multu $8,$7
- addu $3,$10,$3
- sltu $10,$3,$10
- addu $2,$2,$10
- sw $3,0($4)
+
+ addiu $5,$5,4
addiu $4,$4,4
- addu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 0
-$LC0: lw $10,0($4)
- mflo $3
- mfhi $9
- addu $3,$3,$2
- sltu $2,$3,$2
- addu $3,$10,$3
- sltu $10,$3,$10
- addu $2,$2,$10
- sw $3,0($4)
- j $31
+ bgtz $6,Loop
addu $2,$9,$2 C add high product limb and carry from addition
+Lend:
+ j $31
+ nop
EPILOGUE(mpn_addmul_1)
diff --git a/mpn/mips32/lshift.asm b/mpn/mips32/lshift.asm
index 6a58bb4..f5cefc3 100644
--- a/mpn/mips32/lshift.asm
+++ b/mpn/mips32/lshift.asm
@@ -46,7 +46,7 @@ PROLOGUE(mpn_lshift)
addiu $6,$6,-1
and $9,$6,4-1 C number of limbs in first loop
beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- srl $2,$10,$13 C compute function result
+ srl $2,$10,$13 C compute function result
subu $6,$6,$9
@@ -59,10 +59,10 @@ PROLOGUE(mpn_lshift)
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
- sw $8,0($4)
+ sw $8,0($4)
.L0: beq $6,$0,.Lend
- nop
+ nop
.Loop: lw $3,-8($5)
addiu $4,$4,-16
@@ -91,7 +91,7 @@ PROLOGUE(mpn_lshift)
addiu $5,$5,-16
or $8,$14,$9
bgtz $6,.Loop
- sw $8,0($4)
+ sw $8,0($4)
.Lend: sll $8,$10,$7
j $31
diff --git a/mpn/mips32/mul_1.asm b/mpn/mips32/mul_1.asm
index 4337bc2..fd465d7 100644
--- a/mpn/mips32/mul_1.asm
+++ b/mpn/mips32/mul_1.asm
@@ -1,7 +1,7 @@
-dnl MIPS32 mpn_mul_1 -- Multiply a limb vector with a single limb and store
+dnl MIPS64 mpn_mul_1 -- Multiply a limb vector with a single limb and store
dnl the product in a second limb vector.
-dnl Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
+dnl Copyright 1992, 1994, 1995, 2000-2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -40,50 +40,26 @@ C s2_limb $7
ASM_START()
PROLOGUE(mpn_mul_1)
-C feed-in phase 0
- lw $8,0($5)
-
-C feed-in phase 1
- addiu $5,$5,4
- multu $8,$7
+ move $2,$0 C zero cy2
+Loop:
+ lw $8,0($5)
+ addiu $6,$6,-1 C decrement loop counter
- addiu $6,$6,-1
- beq $6,$0,$LC0
- move $2,$0 C zero cy2
+ multu $7,$8
+ mflo $3
- addiu $6,$6,-1
- beq $6,$0,$LC1
- lw $8,0($5) C load new s1 limb as early as possible
+ addu $3,$3,$2 C add old carry limb to low product limb
+ sltu $2,$3,$2 C carry from previous addition -> $2
+ sw $3,0($4)
-Loop: mflo $10
mfhi $9
+
addiu $5,$5,4
- addu $10,$10,$2 C add old carry limb to low product limb
- multu $8,$7
- lw $8,0($5) C load new s1 limb as early as possible
- addiu $6,$6,-1 C decrement loop counter
- sltu $2,$10,$2 C carry from previous addition -> $2
- sw $10,0($4)
addiu $4,$4,4
- bne $6,$0,Loop
- addu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 1
-$LC1: mflo $10
- mfhi $9
- addu $10,$10,$2
- sltu $2,$10,$2
- multu $8,$7
- sw $10,0($4)
- addiu $4,$4,4
+ bgtz $6,Loop
addu $2,$9,$2 C add high product limb and carry from addition
-
-C wind-down phase 0
-$LC0: mflo $10
- mfhi $9
- addu $10,$10,$2
- sltu $2,$10,$2
- sw $10,0($4)
+Lend:
j $31
- addu $2,$9,$2 C add high product limb and carry from addition
+ nop
EPILOGUE(mpn_mul_1)
diff --git a/mpn/mips32/rshift.asm b/mpn/mips32/rshift.asm
index 4b54510..8a630ea 100644
--- a/mpn/mips32/rshift.asm
+++ b/mpn/mips32/rshift.asm
@@ -43,7 +43,7 @@ PROLOGUE(mpn_rshift)
addiu $6,$6,-1
and $9,$6,4-1 C number of limbs in first loop
beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- sll $2,$10,$13 C compute function result
+ sll $2,$10,$13 C compute function result
subu $6,$6,$9
@@ -56,10 +56,10 @@ PROLOGUE(mpn_rshift)
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
- sw $8,-4($4)
+ sw $8,-4($4)
.L0: beq $6,$0,.Lend
- nop
+ nop
.Loop: lw $3,4($5)
addiu $4,$4,16
@@ -88,7 +88,7 @@ PROLOGUE(mpn_rshift)
addiu $5,$5,16
or $8,$14,$9
bgtz $6,.Loop
- sw $8,-4($4)
+ sw $8,-4($4)
.Lend: srl $8,$10,$7
j $31
diff --git a/mpn/mips32/sqr_diagonal.asm b/mpn/mips32/sqr_diagonal.asm
new file mode 100644
index 0000000..58cb91a
--- /dev/null
+++ b/mpn/mips32/sqr_diagonal.asm
@@ -0,0 +1,60 @@
+dnl MIPS64 mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl INPUT PARAMETERS
+dnl rp $4
+dnl up $5
+dnl n $6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+
+Loop:
+ lw $8,0($5)
+ addiu $6,$6,-1
+
+ multu $8,$8
+ mflo $10
+ mfhi $9
+
+ sw $10,0($4)
+ sw $9,8($4)
+
+ addiu $5,$5,4
+
+ bgtz $6,Loop
+ addiu $4,$4,8
+Lend:
+ j $31
+ nop
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/mips32/sub_n.asm b/mpn/mips32/sub_n.asm
index a962ce1..65c5dda 100644
--- a/mpn/mips32/sub_n.asm
+++ b/mpn/mips32/sub_n.asm
@@ -1,7 +1,7 @@
-dnl MIPS32 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl MIPS64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
dnl store difference in a third limb vector.
-dnl Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
+dnl Copyright 1995, 2000-2002, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -39,85 +39,26 @@ C size $7
ASM_START()
PROLOGUE(mpn_sub_n)
- lw $10,0($5)
- lw $11,0($6)
+ move $2,$0
+Loop:
+ lw $9,0($5)
+ lw $10,0($6)
+ addiu $7,$7,-1 C decrement loop counter
- addiu $7,$7,-1
- and $9,$7,4-1 C number of limbs in first loop
- beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- move $2,$0
+ addu $10,$2,$10
+ sltu $2,$10,$2
+ subu $10,$9,$10
+ sltu $9,$9,$10
- subu $7,$7,$9
-
-.Loop0: addiu $9,$9,-1
- lw $12,4($5)
- addu $11,$11,$2
- lw $13,4($6)
- sltu $8,$11,$2
- subu $11,$10,$11
- sltu $2,$10,$11
- sw $11,0($4)
- or $2,$2,$8
+ sw $10,0($4)
addiu $5,$5,4
addiu $6,$6,4
- move $10,$12
- move $11,$13
- bne $9,$0,.Loop0
- addiu $4,$4,4
-
-.L0: beq $7,$0,.Lend
- nop
-
-.Loop: addiu $7,$7,-4
-
- lw $12,4($5)
- addu $11,$11,$2
- lw $13,4($6)
- sltu $8,$11,$2
- subu $11,$10,$11
- sltu $2,$10,$11
- sw $11,0($4)
- or $2,$2,$8
-
- lw $10,8($5)
- addu $13,$13,$2
- lw $11,8($6)
- sltu $8,$13,$2
- subu $13,$12,$13
- sltu $2,$12,$13
- sw $13,4($4)
- or $2,$2,$8
-
- lw $12,12($5)
- addu $11,$11,$2
- lw $13,12($6)
- sltu $8,$11,$2
- subu $11,$10,$11
- sltu $2,$10,$11
- sw $11,8($4)
- or $2,$2,$8
-
- lw $10,16($5)
- addu $13,$13,$2
- lw $11,16($6)
- sltu $8,$13,$2
- subu $13,$12,$13
- sltu $2,$12,$13
- sw $13,12($4)
- or $2,$2,$8
-
- addiu $5,$5,16
- addiu $6,$6,16
-
- bne $7,$0,.Loop
- addiu $4,$4,16
+ addiu $4,$4,4
-.Lend: addu $11,$11,$2
- sltu $8,$11,$2
- subu $11,$10,$11
- sltu $2,$10,$11
- sw $11,0($4)
+ bgtz $7,Loop
+ addu $2,$2,$9
+Lend:
j $31
- or $2,$2,$8
-EPILOGUE(mpn_sub_n)
+ nop
+EPILOGUE()
diff --git a/mpn/mips32/submul_1.asm b/mpn/mips32/submul_1.asm
index 335722b..2832384 100644
--- a/mpn/mips32/submul_1.asm
+++ b/mpn/mips32/submul_1.asm
@@ -1,7 +1,7 @@
-dnl MIPS32 mpn_submul_1 -- Multiply a limb vector with a single limb and
+dnl MIPS64 mpn_submul_1 -- Multiply a limb vector with a single limb and
dnl subtract the product from a second limb vector.
-dnl Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
+dnl Copyright 1992, 1994, 1995, 2000-2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -40,62 +40,31 @@ C s2_limb $7
ASM_START()
PROLOGUE(mpn_submul_1)
-C feed-in phase 0
- lw $8,0($5)
-
-C feed-in phase 1
- addiu $5,$5,4
- multu $8,$7
-
- addiu $6,$6,-1
- beq $6,$0,$LC0
- move $2,$0 C zero cy2
-
- addiu $6,$6,-1
- beq $6,$0,$LC1
- lw $8,0($5) C load new s1 limb as early as possible
+ move $2,$0 C zero cy2
+Loop:
+ lw $8,0($5)
+ lw $10,0($4)
+ addiu $6,$6,-1 C decrement loop counter
-Loop: lw $10,0($4)
+ multu $7,$8
mflo $3
- mfhi $9
- addiu $5,$5,4
+
addu $3,$3,$2 C add old carry limb to low product limb
- multu $8,$7
- lw $8,0($5) C load new s1 limb as early as possible
- addiu $6,$6,-1 C decrement loop counter
sltu $2,$3,$2 C carry from previous addition -> $2
subu $3,$10,$3
- sgtu $10,$3,$10
+ sltu $10,$10,$3
addu $2,$2,$10
sw $3,0($4)
- addiu $4,$4,4
- bne $6,$0,Loop
- addu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 1
-$LC1: lw $10,0($4)
- mflo $3
mfhi $9
- addu $3,$3,$2
- sltu $2,$3,$2
- multu $8,$7
- subu $3,$10,$3
- sgtu $10,$3,$10
- addu $2,$2,$10
- sw $3,0($4)
+
+ addiu $5,$5,4
addiu $4,$4,4
- addu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 0
-$LC0: lw $10,0($4)
- mflo $3
- mfhi $9
- addu $3,$3,$2
- sltu $2,$3,$2
- subu $3,$10,$3
- sgtu $10,$3,$10
- addu $2,$2,$10
- sw $3,0($4)
- j $31
+ bgtz $6,Loop
addu $2,$9,$2 C add high product limb and carry from addition
+Lend:
+ j $31
+ nop
+
EPILOGUE(mpn_submul_1)
diff --git a/mpn/mips32/umul.asm b/mpn/mips32/umul.asm
index 1ced0eb..bcf42d6 100644
--- a/mpn/mips32/umul.asm
+++ b/mpn/mips32/umul.asm
@@ -1,6 +1,6 @@
-dnl MIPS32 umul_ppmm -- longlong.h support.
+dnl MIPS64 umul_ppmm -- longlong.h support.
-dnl Copyright 1999, 2002 Free Software Foundation, Inc.
+dnl Copyright 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -41,5 +41,5 @@ PROLOGUE(mpn_umul_ppmm)
mflo $3
mfhi $2
j $31
- sw $3,0($4)
+ sd $3,0($4)
EPILOGUE(mpn_umul_ppmm)
diff --git a/mpn/mips64/add_n.asm b/mpn/mips64/add_n.asm
index 6856407..c9b593b 100644
--- a/mpn/mips64/add_n.asm
+++ b/mpn/mips64/add_n.asm
@@ -39,96 +39,32 @@ C size $7
ASM_START()
PROLOGUE(mpn_add_nc)
- ld $10,0($5)
- ld $11,0($6)
- daddiu $7,$7,-1
- and $9,$7,4-1 C number of limbs in first loop
- beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- move $2,$8
- b .Loop0
- dsubu $7,$7,$9
+ move $2,$8
+ b Loop
EPILOGUE()
PROLOGUE(mpn_add_n)
- ld $10,0($5)
- ld $11,0($6)
- daddiu $7,$7,-1
- and $9,$7,4-1 C number of limbs in first loop
- beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- move $2,$0
+ move $2,$0
+Loop:
+ ld $9,0($5)
+ ld $10,0($6)
+ daddiu $7,$7,-1 C decrement loop counter
- dsubu $7,$7,$9
+ daddu $9,$2,$9
+ sltu $2,$9,$2
+ daddu $10,$9,$10
+ sltu $9,$10,$9
-.Loop0: daddiu $9,$9,-1
- ld $12,8($5)
- daddu $11,$11,$2
- ld $13,8($6)
- sltu $8,$11,$2
- daddu $11,$10,$11
- sltu $2,$11,$10
- sd $11,0($4)
- or $2,$2,$8
+ sd $10,0($4)
daddiu $5,$5,8
daddiu $6,$6,8
- move $10,$12
- move $11,$13
- bne $9,$0,.Loop0
- daddiu $4,$4,8
+ daddiu $4,$4,8
-.L0: beq $7,$0,.Lend
- nop
-
-.Loop: daddiu $7,$7,-4
-
- ld $12,8($5)
- daddu $11,$11,$10
- ld $13,8($6)
- sltu $8,$11,$10
- daddu $11,$11,$2
- sltu $2,$11,$2
- sd $11,0($4)
- or $2,$2,$8
-
- ld $10,16($5)
- daddu $13,$13,$12
- ld $11,16($6)
- sltu $8,$13,$12
- daddu $13,$13,$2
- sltu $2,$13,$2
- sd $13,8($4)
- or $2,$2,$8
-
- ld $12,24($5)
- daddu $11,$11,$10
- ld $13,24($6)
- sltu $8,$11,$10
- daddu $11,$11,$2
- sltu $2,$11,$2
- sd $11,16($4)
- or $2,$2,$8
-
- ld $10,32($5)
- daddu $13,$13,$12
- ld $11,32($6)
- sltu $8,$13,$12
- daddu $13,$13,$2
- sltu $2,$13,$2
- sd $13,24($4)
- or $2,$2,$8
-
- daddiu $5,$5,32
- daddiu $6,$6,32
-
- bne $7,$0,.Loop
- daddiu $4,$4,32
-
-.Lend: daddu $11,$11,$2
- sltu $8,$11,$2
- daddu $11,$10,$11
- sltu $2,$11,$10
- sd $11,0($4)
+ bgtz $7,Loop
+ daddu $2,$2,$9
+Lend:
j $31
- or $2,$2,$8
+ nop
EPILOGUE()
diff --git a/mpn/mips64/addmul_1.asm b/mpn/mips64/addmul_1.asm
index 8ff0976..e372143 100644
--- a/mpn/mips64/addmul_1.asm
+++ b/mpn/mips64/addmul_1.asm
@@ -40,62 +40,31 @@ C s2_limb $7
ASM_START()
PROLOGUE(mpn_addmul_1)
-C feed-in phase 0
- ld $8,0($5)
+ move $2,$0 C zero cy2
-C feed-in phase 1
- daddiu $5,$5,8
- dmultu $8,$7
-
- daddiu $6,$6,-1
- beq $6,$0,$LC0
- move $2,$0 C zero cy2
-
- daddiu $6,$6,-1
- beq $6,$0,$LC1
- ld $8,0($5) C load new s1 limb as early as possible
+Loop:
+ ld $8,0($5)
+ ld $10,0($4)
+ daddiu $6,$6,-1 C decrement loop counter
-Loop: ld $10,0($4)
+ dmultu $7,$8
mflo $3
- mfhi $9
- daddiu $5,$5,8
+
daddu $3,$3,$2 C add old carry limb to low product limb
- dmultu $8,$7
- ld $8,0($5) C load new s1 limb as early as possible
- daddiu $6,$6,-1 C decrement loop counter
sltu $2,$3,$2 C carry from previous addition -> $2
daddu $3,$10,$3
sltu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
- daddiu $4,$4,8
- bne $6,$0,Loop
- daddu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 1
-$LC1: ld $10,0($4)
- mflo $3
mfhi $9
- daddu $3,$3,$2
- sltu $2,$3,$2
- dmultu $8,$7
- daddu $3,$10,$3
- sltu $10,$3,$10
- daddu $2,$2,$10
- sd $3,0($4)
+
+ daddiu $5,$5,8
daddiu $4,$4,8
- daddu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 0
-$LC0: ld $10,0($4)
- mflo $3
- mfhi $9
- daddu $3,$3,$2
- sltu $2,$3,$2
- daddu $3,$10,$3
- sltu $10,$3,$10
- daddu $2,$2,$10
- sd $3,0($4)
- j $31
+ bgtz $6,Loop
daddu $2,$9,$2 C add high product limb and carry from addition
+Lend:
+ j $31
+ nop
EPILOGUE(mpn_addmul_1)
diff --git a/mpn/mips64/lshift.asm b/mpn/mips64/lshift.asm
index 3440eaf..a232fcf 100644
--- a/mpn/mips64/lshift.asm
+++ b/mpn/mips64/lshift.asm
@@ -46,7 +46,7 @@ PROLOGUE(mpn_lshift)
daddiu $6,$6,-1
and $9,$6,4-1 C number of limbs in first loop
beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- dsrl $2,$10,$13 C compute function result
+ dsrl $2,$10,$13 C compute function result
dsubu $6,$6,$9
@@ -59,10 +59,10 @@ PROLOGUE(mpn_lshift)
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
- sd $8,0($4)
+ sd $8,0($4)
.L0: beq $6,$0,.Lend
- nop
+ nop
.Loop: ld $3,-16($5)
daddiu $4,$4,-32
@@ -91,7 +91,7 @@ PROLOGUE(mpn_lshift)
daddiu $5,$5,-32
or $8,$14,$9
bgtz $6,.Loop
- sd $8,0($4)
+ sd $8,0($4)
.Lend: dsll $8,$10,$7
j $31
diff --git a/mpn/mips64/mul_1.asm b/mpn/mips64/mul_1.asm
index 77acf0a..72ef70f 100644
--- a/mpn/mips64/mul_1.asm
+++ b/mpn/mips64/mul_1.asm
@@ -40,53 +40,26 @@ C s2_limb $7
ASM_START()
PROLOGUE(mpn_mul_1)
-C feed-in phase 0
- ld $8,0($5)
-
-C feed-in phase 1
- daddiu $5,$5,8
- dmultu $8,$7
+ move $2,$0 C zero cy2
+Loop:
+ ld $8,0($5)
+ daddiu $6,$6,-1 C decrement loop counter
- daddiu $6,$6,-1
- beq $6,$0,$LC0
- move $2,$0 C zero cy2
+ dmultu $7,$8
+ mflo $3
- daddiu $6,$6,-1
- beq $6,$0,$LC1
- ld $8,0($5) C load new s1 limb as early as possible
+ daddu $3,$3,$2 C add old carry limb to low product limb
+ sltu $2,$3,$2 C carry from previous addition -> $2
+ sd $3,0($4)
-Loop: nop
- mflo $10
mfhi $9
- daddiu $5,$5,8
- daddu $10,$10,$2 C add old carry limb to low product limb
- dmultu $8,$7
- ld $8,0($5) C load new s1 limb as early as possible
- daddiu $6,$6,-1 C decrement loop counter
- sltu $2,$10,$2 C carry from previous addition -> $2
- nop
- nop
- sd $10,0($4)
- daddiu $4,$4,8
- bne $6,$0,Loop
- daddu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 1
-$LC1: mflo $10
- mfhi $9
- daddu $10,$10,$2
- sltu $2,$10,$2
- dmultu $8,$7
- sd $10,0($4)
+ daddiu $5,$5,8
daddiu $4,$4,8
- daddu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 0
-$LC0: mflo $10
- mfhi $9
- daddu $10,$10,$2
- sltu $2,$10,$2
- sd $10,0($4)
- j $31
+ bgtz $6,Loop
daddu $2,$9,$2 C add high product limb and carry from addition
+Lend:
+ j $31
+ nop
EPILOGUE(mpn_mul_1)
diff --git a/mpn/mips64/rshift.asm b/mpn/mips64/rshift.asm
index 9253cb5..a43aa90 100644
--- a/mpn/mips64/rshift.asm
+++ b/mpn/mips64/rshift.asm
@@ -43,7 +43,7 @@ PROLOGUE(mpn_rshift)
daddiu $6,$6,-1
and $9,$6,4-1 C number of limbs in first loop
beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- dsll $2,$10,$13 C compute function result
+ dsll $2,$10,$13 C compute function result
dsubu $6,$6,$9
@@ -56,10 +56,10 @@ PROLOGUE(mpn_rshift)
move $10,$3
or $8,$11,$12
bne $9,$0,.Loop0
- sd $8,-8($4)
+ sd $8,-8($4)
.L0: beq $6,$0,.Lend
- nop
+ nop
.Loop: ld $3,8($5)
daddiu $4,$4,32
@@ -88,7 +88,7 @@ PROLOGUE(mpn_rshift)
daddiu $5,$5,32
or $8,$14,$9
bgtz $6,.Loop
- sd $8,-8($4)
+ sd $8,-8($4)
.Lend: dsrl $8,$10,$7
j $31
diff --git a/mpn/mips64/sqr_diagonal.asm b/mpn/mips64/sqr_diagonal.asm
index dcb87dc..73d501e 100644
--- a/mpn/mips64/sqr_diagonal.asm
+++ b/mpn/mips64/sqr_diagonal.asm
@@ -38,40 +38,23 @@ include(`../config.m4')
ASM_START()
PROLOGUE(mpn_sqr_diagonal)
- ld r8,0(r5)
- daddiu r6,r6,-2
- dmultu r8,r8
- bltz r6,$Lend1
- nop
- ld r8,8(r5)
- beq r6,r0,$Lend2
- nop
-$Loop: mflo r10
- mfhi r9
- daddiu r6,r6,-1
- sd r10,0(r4)
- sd r9,8(r4)
- dmultu r8,r8
- ld r8,16(r5)
- daddiu r5,r5,8
- bne r6,r0,$Loop
- daddiu r4,r4,16
+Loop:
+ ld $8,0($5)
+ daddiu $6,$6,-1
+
+ dmultu $8,$8
+ mflo $10
+ mfhi $9
-$Lend2: mflo r10
- mfhi r9
- sd r10,0(r4)
- sd r9,8(r4)
- dmultu r8,r8
- mflo r10
- mfhi r9
- sd r10,16(r4)
- j r31
- sd r9,24(r4)
+ sd $10,0($4)
+ sd $9,8($4)
-$Lend1: mflo r10
- mfhi r9
- sd r10,0(r4)
- j r31
- sd r9,8(r4)
+ daddiu $5,$5,8
+
+ bgtz $6,Loop
+ daddiu $4,$4,16
+Lend:
+ j $31
+ nop
EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/mips64/sub_n.asm b/mpn/mips64/sub_n.asm
index 6a69897..b8b4e78 100644
--- a/mpn/mips64/sub_n.asm
+++ b/mpn/mips64/sub_n.asm
@@ -39,96 +39,30 @@ C size $7
ASM_START()
PROLOGUE(mpn_sub_nc)
- ld $10,0($5)
- ld $11,0($6)
-
- daddiu $7,$7,-1
- and $9,$7,4-1 C number of limbs in first loop
- beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- move $2,$8
- b .Loop0
- dsubu $7,$7,$9
+ move $2,$8
+ b Loop
EPILOGUE()
PROLOGUE(mpn_sub_n)
- ld $10,0($5)
- ld $11,0($6)
-
- daddiu $7,$7,-1
- and $9,$7,4-1 C number of limbs in first loop
- beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
- move $2,$0
+ move $2,$0
+Loop:
+ ld $9,0($5)
+ ld $10,0($6)
+ daddiu $7,$7,-1 C decrement loop counter
- dsubu $7,$7,$9
+ daddu $10,$2,$10
+ sltu $2,$10,$2
+ dsubu $10,$9,$10
+ sltu $9,$9,$10
-.Loop0: daddiu $9,$9,-1
- ld $12,8($5)
- daddu $11,$11,$2
- ld $13,8($6)
- sltu $8,$11,$2
- dsubu $11,$10,$11
- sltu $2,$10,$11
- sd $11,0($4)
- or $2,$2,$8
+ sd $10,0($4)
daddiu $5,$5,8
daddiu $6,$6,8
- move $10,$12
- move $11,$13
- bne $9,$0,.Loop0
- daddiu $4,$4,8
-
-.L0: beq $7,$0,.Lend
- nop
-
-.Loop: daddiu $7,$7,-4
-
- ld $12,8($5)
- dsubu $11,$10,$11
- ld $13,8($6)
- sltu $8,$10,$11
- dsubu $14,$11,$2
- sltu $2,$11,$14
- sd $14,0($4)
- or $2,$2,$8
-
- ld $10,16($5)
- dsubu $13,$12,$13
- ld $11,16($6)
- sltu $8,$12,$13
- dsubu $14,$13,$2
- sltu $2,$13,$14
- sd $14,8($4)
- or $2,$2,$8
-
- ld $12,24($5)
- dsubu $11,$10,$11
- ld $13,24($6)
- sltu $8,$10,$11
- dsubu $14,$11,$2
- sltu $2,$11,$14
- sd $14,16($4)
- or $2,$2,$8
-
- ld $10,32($5)
- dsubu $13,$12,$13
- ld $11,32($6)
- sltu $8,$12,$13
- dsubu $14,$13,$2
- sltu $2,$13,$14
- sd $14,24($4)
- or $2,$2,$8
-
- daddiu $5,$5,32
- daddiu $6,$6,32
-
- bne $7,$0,.Loop
- daddiu $4,$4,32
+ daddiu $4,$4,8
-.Lend: daddu $11,$11,$2
- sltu $8,$11,$2
- dsubu $11,$10,$11
- sltu $2,$10,$11
- sd $11,0($4)
+ bgtz $7,Loop
+ daddu $2,$2,$9
+Lend:
j $31
- or $2,$2,$8
+ nop
EPILOGUE()
diff --git a/mpn/mips64/submul_1.asm b/mpn/mips64/submul_1.asm
index 089589c..ea38f2f 100644
--- a/mpn/mips64/submul_1.asm
+++ b/mpn/mips64/submul_1.asm
@@ -40,62 +40,31 @@ C s2_limb $7
ASM_START()
PROLOGUE(mpn_submul_1)
-C feed-in phase 0
- ld $8,0($5)
-
-C feed-in phase 1
- daddiu $5,$5,8
- dmultu $8,$7
-
- daddiu $6,$6,-1
- beq $6,$0,$LC0
- move $2,$0 C zero cy2
-
- daddiu $6,$6,-1
- beq $6,$0,$LC1
- ld $8,0($5) C load new s1 limb as early as possible
+ move $2,$0 C zero cy2
+Loop:
+ ld $8,0($5)
+ ld $10,0($4)
+ daddiu $6,$6,-1 C decrement loop counter
-Loop: ld $10,0($4)
+ dmultu $7,$8
mflo $3
- mfhi $9
- daddiu $5,$5,8
+
daddu $3,$3,$2 C add old carry limb to low product limb
- dmultu $8,$7
- ld $8,0($5) C load new s1 limb as early as possible
- daddiu $6,$6,-1 C decrement loop counter
sltu $2,$3,$2 C carry from previous addition -> $2
dsubu $3,$10,$3
- sgtu $10,$3,$10
+ sltu $10,$10,$3
daddu $2,$2,$10
sd $3,0($4)
- daddiu $4,$4,8
- bne $6,$0,Loop
- daddu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 1
-$LC1: ld $10,0($4)
- mflo $3
mfhi $9
- daddu $3,$3,$2
- sltu $2,$3,$2
- dmultu $8,$7
- dsubu $3,$10,$3
- sgtu $10,$3,$10
- daddu $2,$2,$10
- sd $3,0($4)
+
+ daddiu $5,$5,8
daddiu $4,$4,8
- daddu $2,$9,$2 C add high product limb and carry from addition
-C wind-down phase 0
-$LC0: ld $10,0($4)
- mflo $3
- mfhi $9
- daddu $3,$3,$2
- sltu $2,$3,$2
- dsubu $3,$10,$3
- sgtu $10,$3,$10
- daddu $2,$2,$10
- sd $3,0($4)
- j $31
+ bgtz $6,Loop
daddu $2,$9,$2 C add high product limb and carry from addition
+Lend:
+ j $31
+ nop
+
EPILOGUE(mpn_submul_1)
--
2.11.0
More information about the gmp-devel
mailing list