[Gmp-commit] /var/hg/gmp-6.1: Adhere to DOS64 xmm callee-saves rules.
mercurial at gmplib.org
mercurial at gmplib.org
Sat Dec 16 21:32:31 UTC 2017
details: /var/hg/gmp-6.1/rev/bce8e07b51a6
changeset: 16973:bce8e07b51a6
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Dec 16 22:32:27 2017 +0100
description:
Adhere to DOS64 xmm callee-saves rules.
diffstat:
ChangeLog | 9 +++++
mpn/x86_64/fastsse/com-palignr.asm | 63 ++++++++++++++++++-----------------
mpn/x86_64/fastsse/com.asm | 10 +++++-
mpn/x86_64/fastsse/copyd.asm | 12 +++++-
mpn/x86_64/fastsse/copyi.asm | 12 +++++-
mpn/x86_64/fastsse/lshiftc.asm | 16 ++++----
mpn/x86_64/fastsse/sec_tabselect.asm | 38 ++++++++++++++-------
7 files changed, 103 insertions(+), 57 deletions(-)
diffs (truncated from 454 to 300 lines):
diff -r ed66cd73af51 -r bce8e07b51a6 ChangeLog
--- a/ChangeLog Fri Sep 08 23:38:03 2017 +0200
+++ b/ChangeLog Sat Dec 16 22:32:27 2017 +0100
@@ -1,3 +1,12 @@
+2017-12-16 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/fastsse/com.asm: Adhere to DOS64 xmm callee-saves rules.
+ * mpn/x86_64/fastsse/com-palignr.asm: Likewise.
+ * mpn/x86_64/fastsse/copyd.asm: Likewise.
+ * mpn/x86_64/fastsse/copyi.asm: Likewise.
+ * mpn/x86_64/fastsse/lshiftc.asm: Likewise.
+ * mpn/x86_64/fastsse/sec_tabselect.asm: Likewise.
+
2017-09-08 Torbjörn Granlund <tg at gmplib.org>
* configure.ac: Set GMP_NONSTD_ABI protecting against dots in the abi.
diff -r ed66cd73af51 -r bce8e07b51a6 mpn/x86_64/fastsse/com-palignr.asm
--- a/mpn/x86_64/fastsse/com-palignr.asm Fri Sep 08 23:38:03 2017 +0200
+++ b/mpn/x86_64/fastsse/com-palignr.asm Sat Dec 16 22:32:27 2017 +0100
@@ -36,19 +36,20 @@
C aligned unaligned best seen for cpu?
C AMD K8,K9 2.0 illop 1.0/1.0 N
C AMD K10 0.85 illop Y/N
-C AMD bull 1.39 ? 1.45 Y/N
-C AMD pile 0.8-1.4 0.7-1.4 Y
-C AMD steam
-C AMD excavator
+C AMD bd1 1.39 ? 1.45 Y/N
+C AMD bd2 0.8-1.4 0.7-1.4 Y
+C AMD bd3
+C AMD bd4
C AMD bobcat 1.97 ? 8.17 1.5/1.5 N
C AMD jaguar 1.02 1.02 0.91/0.91 N
C Intel P4 2.26 illop Y/N
-C Intel core 0.52 0.95 opt/0.74 Y
-C Intel NHM 0.52 0.65 opt/opt Y
+C Intel core 0.58 0.87 opt/0.74 Y
+C Intel NHM 0.64 1.14 opt/bad Y
C Intel SBR 0.51 0.65 opt/opt Y
C Intel IBR 0.50 0.64 opt/0.57 Y
C Intel HWL 0.51 0.58 opt/opt Y
-C Intel BWL 0.57 0.69 opt/0.65 Y
+C Intel BWL 0.52 0.64 opt/opt Y
+C Intel SKL 0.51 0.63 opt/opt Y
C Intel atom 1.16 1.70 opt/opt Y
C Intel SLM 1.02 1.52 N
C VIA nano 1.09 1.10 opt/opt Y
@@ -81,7 +82,7 @@
cmp $COM_SSE_THRESHOLD, n
jbe L(bc)
- pcmpeqb %xmm7, %xmm7 C set to 111...111
+ pcmpeqb %xmm5, %xmm5 C set to 111...111
test $8, R8(rp) C is rp 16-byte aligned?
jz L(rp_aligned) C jump if rp aligned
@@ -107,10 +108,10 @@
movdqa 32(up), %xmm2
movdqa 48(up), %xmm3
lea 64(up), up
- pxor %xmm7, %xmm0
- pxor %xmm7, %xmm1
- pxor %xmm7, %xmm2
- pxor %xmm7, %xmm3
+ pxor %xmm5, %xmm0
+ pxor %xmm5, %xmm1
+ pxor %xmm5, %xmm2
+ pxor %xmm5, %xmm3
movdqa %xmm0, (rp)
movdqa %xmm1, 16(rp)
movdqa %xmm2, 32(rp)
@@ -124,8 +125,8 @@
movdqa (up), %xmm0
movdqa 16(up), %xmm1
lea 32(up), up
- pxor %xmm7, %xmm0
- pxor %xmm7, %xmm1
+ pxor %xmm5, %xmm0
+ pxor %xmm5, %xmm1
movdqa %xmm0, (rp)
movdqa %xmm1, 16(rp)
lea 32(rp), rp
@@ -134,7 +135,7 @@
jz 1f
movdqa (up), %xmm0
lea 16(up), up
- pxor %xmm7, %xmm0
+ pxor %xmm5, %xmm0
movdqa %xmm0, (rp)
lea 16(rp), rp
@@ -167,44 +168,44 @@
ALIGN(16)
L(utop):movdqa 120(up), %xmm3
- pxor %xmm7, %xmm0
+ pxor %xmm5, %xmm0
movdqa %xmm0, -128(rp)
sub $16, n
L(um): movdqa 104(up), %xmm2
palignr($8, %xmm2, %xmm3)
movdqa 88(up), %xmm1
- pxor %xmm7, %xmm3
+ pxor %xmm5, %xmm3
movdqa %xmm3, 112(rp)
palignr($8, %xmm1, %xmm2)
movdqa 72(up), %xmm0
- pxor %xmm7, %xmm2
+ pxor %xmm5, %xmm2
movdqa %xmm2, 96(rp)
palignr($8, %xmm0, %xmm1)
movdqa 56(up), %xmm3
- pxor %xmm7, %xmm1
+ pxor %xmm5, %xmm1
movdqa %xmm1, 80(rp)
palignr($8, %xmm3, %xmm0)
movdqa 40(up), %xmm2
- pxor %xmm7, %xmm0
+ pxor %xmm5, %xmm0
movdqa %xmm0, 64(rp)
palignr($8, %xmm2, %xmm3)
movdqa 24(up), %xmm1
- pxor %xmm7, %xmm3
+ pxor %xmm5, %xmm3
movdqa %xmm3, 48(rp)
palignr($8, %xmm1, %xmm2)
movdqa 8(up), %xmm0
- pxor %xmm7, %xmm2
+ pxor %xmm5, %xmm2
movdqa %xmm2, 32(rp)
palignr($8, %xmm0, %xmm1)
movdqa -8(up), %xmm3
- pxor %xmm7, %xmm1
+ pxor %xmm5, %xmm1
movdqa %xmm1, 16(rp)
palignr($8, %xmm3, %xmm0)
lea 128(up), up
lea 128(rp), rp
jnc L(utop)
- pxor %xmm7, %xmm0
+ pxor %xmm5, %xmm0
movdqa %xmm0, -128(rp)
L(uend):test $8, R8(n)
@@ -213,19 +214,19 @@
movdqa 40(up), %xmm2
palignr($8, %xmm2, %xmm3)
movdqa 24(up), %xmm1
- pxor %xmm7, %xmm3
+ pxor %xmm5, %xmm3
movdqa %xmm3, 48(rp)
palignr($8, %xmm1, %xmm2)
movdqa 8(up), %xmm0
- pxor %xmm7, %xmm2
+ pxor %xmm5, %xmm2
movdqa %xmm2, 32(rp)
palignr($8, %xmm0, %xmm1)
movdqa -8(up), %xmm3
- pxor %xmm7, %xmm1
+ pxor %xmm5, %xmm1
movdqa %xmm1, 16(rp)
palignr($8, %xmm3, %xmm0)
lea 64(up), up
- pxor %xmm7, %xmm0
+ pxor %xmm5, %xmm0
movdqa %xmm0, (rp)
lea 64(rp), rp
@@ -235,11 +236,11 @@
movdqa 8(up), %xmm0
palignr($8, %xmm0, %xmm1)
movdqa -8(up), %xmm3
- pxor %xmm7, %xmm1
+ pxor %xmm5, %xmm1
movdqa %xmm1, 16(rp)
palignr($8, %xmm3, %xmm0)
lea 32(up), up
- pxor %xmm7, %xmm0
+ pxor %xmm5, %xmm0
movdqa %xmm0, (rp)
lea 32(rp), rp
@@ -249,7 +250,7 @@
movdqa -8(up), %xmm3
palignr($8, %xmm3, %xmm0)
lea 16(up), up
- pxor %xmm7, %xmm0
+ pxor %xmm5, %xmm0
movdqa %xmm0, (rp)
lea 16(rp), rp
diff -r ed66cd73af51 -r bce8e07b51a6 mpn/x86_64/fastsse/com.asm
--- a/mpn/x86_64/fastsse/com.asm Fri Sep 08 23:38:03 2017 +0200
+++ b/mpn/x86_64/fastsse/com.asm Sat Dec 16 22:32:27 2017 +0100
@@ -78,6 +78,10 @@
PROLOGUE(mpn_com)
FUNC_ENTRY(3)
+IFDOS(` add $-56, %rsp ')
+IFDOS(` movdqa %xmm6, (%rsp) ')
+IFDOS(` movdqa %xmm7, 16(%rsp) ')
+
pcmpeqb %xmm7, %xmm7 C set to 111...111
test $8, R8(rp) C is rp 16-byte aligned?
@@ -162,6 +166,10 @@
not %rax
mov %rax, (rp)
1:
-L(don): FUNC_EXIT()
+L(don):
+IFDOS(` movdqa (%rsp), %xmm6 ')
+IFDOS(` movdqa 16(%rsp), %xmm7 ')
+IFDOS(` add $56, %rsp ')
+ FUNC_EXIT()
ret
EPILOGUE()
diff -r ed66cd73af51 -r bce8e07b51a6 mpn/x86_64/fastsse/copyd.asm
--- a/mpn/x86_64/fastsse/copyd.asm Fri Sep 08 23:38:03 2017 +0200
+++ b/mpn/x86_64/fastsse/copyd.asm Sat Dec 16 22:32:27 2017 +0100
@@ -94,9 +94,13 @@
lea -8(rp), rp
dec n
- sub $16, n
+L(ali): sub $16, n
jc L(sma)
+IFDOS(` add $-56, %rsp ')
+IFDOS(` movdqa %xmm6, (%rsp) ')
+IFDOS(` movdqa %xmm7, 16(%rsp) ')
+
ALIGN(16)
L(top): movdqu (up), %xmm0
movdqu -16(up), %xmm1
@@ -116,9 +120,13 @@
movdqa %xmm6, -96(rp)
movdqa %xmm7, -112(rp)
lea -128(rp), rp
-L(ali): sub $16, n
+ sub $16, n
jnc L(top)
+IFDOS(` movdqa (%rsp), %xmm6 ')
+IFDOS(` movdqa 16(%rsp), %xmm7 ')
+IFDOS(` add $56, %rsp ')
+
L(sma): test $8, R8(n)
jz 1f
movdqu (up), %xmm0
diff -r ed66cd73af51 -r bce8e07b51a6 mpn/x86_64/fastsse/copyi.asm
--- a/mpn/x86_64/fastsse/copyi.asm Fri Sep 08 23:38:03 2017 +0200
+++ b/mpn/x86_64/fastsse/copyi.asm Sat Dec 16 22:32:27 2017 +0100
@@ -88,9 +88,13 @@
movsq C copy single limb
dec n
- sub $16, n
+L(ali): sub $16, n
jc L(sma)
+IFDOS(` add $-56, %rsp ')
+IFDOS(` movdqa %xmm6, (%rsp) ')
+IFDOS(` movdqa %xmm7, 16(%rsp) ')
+
ALIGN(16)
L(top): movdqu (up), %xmm0
movdqu 16(up), %xmm1
@@ -110,9 +114,13 @@
movdqa %xmm6, 96(rp)
movdqa %xmm7, 112(rp)
lea 128(rp), rp
-L(ali): sub $16, n
+ sub $16, n
jnc L(top)
+IFDOS(` movdqa (%rsp), %xmm6 ')
+IFDOS(` movdqa 16(%rsp), %xmm7 ')
+IFDOS(` add $56, %rsp ')
+
L(sma): test $8, R8(n)
jz 1f
movdqu (up), %xmm0
diff -r ed66cd73af51 -r bce8e07b51a6 mpn/x86_64/fastsse/lshiftc.asm
--- a/mpn/x86_64/fastsse/lshiftc.asm Fri Sep 08 23:38:03 2017 +0200
+++ b/mpn/x86_64/fastsse/lshiftc.asm Sat Dec 16 22:32:27 2017 +0100
@@ -79,7 +79,7 @@
mov -8(ap,n,8), %rax
shr R8(%rcx), %rax
- pcmpeqb %xmm7, %xmm7 C set to 111...111
+ pcmpeqb %xmm2, %xmm2 C set to 111...111
cmp $2, n
jle L(le2)
@@ -94,7 +94,7 @@
psllq %xmm4, %xmm0
psrlq %xmm5, %xmm1
por %xmm1, %xmm0
More information about the gmp-commit
mailing list