[Gmp-commit] /var/hg/gmp: Improve new submul_1.asm code.
mercurial at gmplib.org
mercurial at gmplib.org
Wed Mar 9 23:39:14 CET 2022
details: /var/hg/gmp/rev/72f950c9d01e
changeset: 18323:72f950c9d01e
user: Torbjorn Granlund <tg at gmplib.org>
date: Wed Mar 09 23:39:02 2022 +0100
description:
Improve new submul_1.asm code.
diffstat:
mpn/x86_64/alderlake/submul_1.asm | 78 +++++++++++++++++++++++++-------------
1 files changed, 51 insertions(+), 27 deletions(-)
diffs (110 lines):
diff -r 1e999ea14207 -r 72f950c9d01e mpn/x86_64/alderlake/submul_1.asm
--- a/mpn/x86_64/alderlake/submul_1.asm Wed Mar 09 00:32:43 2022 +0100
+++ b/mpn/x86_64/alderlake/submul_1.asm Wed Mar 09 23:39:02 2022 +0100
@@ -41,7 +41,7 @@
C AMD bd4 -
C AMD zn1 ?
C AMD zn2 ?
-C AMD zn3 ?
+C AMD zn3 2.0
C AMD bt1 -
C AMD bt2 -
C Intel P4 -
@@ -54,7 +54,7 @@
C Intel HWL -
C Intel BWL ?
C Intel SKL ?
-C Intel RKL ?
+C Intel RKL 2.0
C Intel ALD 1.53
C Intel atom -
C Intel SLM -
@@ -78,39 +78,63 @@
mov v0_param, v0
mov %rax, n
test $1, R8(n)
- mov $-1, %rax
- adox( %rax, %rax) C Set OF
- jz L(b0)
+ jz L(bx0)
+
+L(bx1): mulx( (up), %r9, %rax)
+ test $2, R8(n)
+ stc
+ jz L(b01)
-L(b1): mov $0, R32(%r8)
- lea -8(up), up
- lea -8(rp), rp
- lea 1(n), n
+L(b11): lea 1(n), n
+ lea 16(up), up
+ lea 16(rp), rp
+ jmp L(lo3)
+
+L(b01): lea 3(n), n
jmp L(lo1)
-L(b0): mov $0, R32(%r10)
+L(bx0): mulx( (up), %r9, %r8)
+ test $2, R8(n)
+ stc
+ jz L(b00)
+
+L(b10): lea 8(up), up
+ lea 8(rp), rp
+ lea 2(n), n
+ jmp L(lo2)
+
+L(b00): lea 24(up), up
+ lea 24(rp), rp
+ jmp L(lo0)
-L(top): mulx( (up), %r9, %r8)
- adcx( %r10, %r9)
- not %r9
- adox( (rp), %r9)
+L(top): lea 32(up), up
+ lea 32(rp), rp
+ mulx( -24,(up), %r9, %r8)
+ adox( %rax, %r9)
+L(lo0): not %r9
+ adcx( -24,(rp), %r9)
+ mov %r9, -24(rp)
+ mulx( -16,(up), %r9, %rax)
+ adox( %r8, %r9)
+L(lo3): not %r9
+ adcx( -16,(rp), %r9)
+ mov %r9, -16(rp)
+ mulx( -8,(up), %r9, %r8)
+ adox( %rax, %r9)
+L(lo2): not %r9
+ adcx( -8,(rp), %r9)
+ mov %r9, -8(rp)
+ mulx( (up), %r9, %rax)
+ adox( %r8, %r9)
+L(lo1): not %r9
+ adcx( (rp), %r9)
mov %r9, (rp)
-L(lo1): mulx( 8,(up), %r11, %r10)
- adcx( %r8, %r11)
- not %r11
- adox( 8,(rp), %r11)
- mov %r11, 8(rp)
- lea 16(up), up
- lea 16(rp), rp
- lea -2(n), n
+ lea -4(n), n
jrcxz L(end)
jmp L(top)
-L(end): adcx( %rcx, %r10)
- not %r10
- adox( %rcx, %r10)
- mov %r10, %rax
- neg %rax
+L(end): adox( %rcx, %rax)
+ sbb $-1, %rax
ret
EPILOGUE()
ASM_END()
More information about the gmp-commit
mailing list