[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Nov 17 21:26:35 UTC 2019
details: /var/hg/gmp/rev/b09d3f0c080a
changeset: 17967:b09d3f0c080a
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Nov 17 19:46:23 2019 +0100
description:
Increase alignment; update x/l table.
details: /var/hg/gmp/rev/c2c0aef2546c
changeset: 17968:c2c0aef2546c
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Nov 17 22:05:10 2019 +0100
description:
Rewrite.
details: /var/hg/gmp/rev/5d3ad50a7156
changeset: 17969:5d3ad50a7156
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Nov 17 22:06:51 2019 +0100
description:
Rewrite.
diffstat:
mpn/arm/v6t2/gcd_11.asm | 23 +--
mpn/x86_64/bt1/aorsmul_1.asm | 157 ++++++++++++------------
mpn/x86_64/bt1/mul_1.asm | 271 +++++++++++++++++++++++++-----------------
3 files changed, 246 insertions(+), 205 deletions(-)
diffs (truncated from 568 to 300 lines):
diff -r d296f15b0eec -r 5d3ad50a7156 mpn/arm/v6t2/gcd_11.asm
--- a/mpn/arm/v6t2/gcd_11.asm Sun Nov 17 18:34:30 2019 +0100
+++ b/mpn/arm/v6t2/gcd_11.asm Sun Nov 17 22:06:51 2019 +0100
@@ -1,9 +1,7 @@
dnl ARM v6t2 mpn_gcd_11.
-dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjörn
-dnl Granlund.
-
-dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2019 Free Software Foundation,
+dnl Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -36,21 +34,20 @@
C cycles/bit (approx)
C StrongARM -
C XScale -
-C Cortex-A5 5.75 obsolete
-C Cortex-A7 6.38 obsolete
-C Cortex-A8 5.0 obsolete
-C Cortex-A9 5.3 obsolete
-C Cortex-A15 2.92 obsolete
-C Cortex-A17 5.63 obsolete
-C Cortex-A53 4.25 obsolete
-C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+C Cortex-A5 5.2
+C Cortex-A7 5.04
+C Cortex-A8 3.59
+C Cortex-A9 9.5
+C Cortex-A15 3.2
+C Cortex-A17 5.25
+C Cortex-A53 3.57
define(`u0', `r0')
define(`v0', `r1')
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
PROLOGUE(mpn_gcd_11)
subs r3, u0, v0 C 0
beq L(end) C
diff -r d296f15b0eec -r 5d3ad50a7156 mpn/x86_64/bt1/aorsmul_1.asm
--- a/mpn/x86_64/bt1/aorsmul_1.asm Sun Nov 17 18:34:30 2019 +0100
+++ b/mpn/x86_64/bt1/aorsmul_1.asm Sun Nov 17 22:06:51 2019 +0100
@@ -1,6 +1,7 @@
-dnl AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bobcat.
+dnl AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bt1/bt2.
-dnl Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl Copyright 2003-2005, 2007, 2008, 2011, 2012, 2018-2019 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -31,25 +32,29 @@
include(`../config.m4')
C cycles/limb
-C AMD K8,K9 4.52
-C AMD K10 4.51
-C AMD bull 4.66
-C AMD pile 4.57
-C AMD steam
-C AMD excavator
-C AMD bobcat 5.05
-C AMD jaguar 5.22
-C Intel P4 16.8 18.6
-C Intel core2 5.59
-C Intel NHM 5.39
-C Intel SBR 3.93
-C Intel IBR 3.59
-C Intel HWL 3.61
-C Intel BWL 2.76
-C Intel SKL 2.77
-C Intel atom 23
-C Intel SLM 8
-C VIA nano 5.63
+C AMD K8,K9 4.52 old measurement
+C AMD K10 4.51 old measurement
+C AMD bd1 4.66 old measurement
+C AMD bd2 4.57 old measurement
+C AMD bd3 ?
+C AMD bd4 ?
+C AMD zen ?
+C AMD bt1 5.04
+C AMD bt2 5.07
+C Intel P4 16.8 18.6 old measurement
+C Intel PNR 5.59 old measurement
+C Intel NHM 5.39 old measurement
+C Intel SBR 3.93 old measurement
+C Intel IBR 3.59 old measurement
+C Intel HWL 3.61 old measurement
+C Intel BWL 2.76 old measurement
+C Intel SKL 2.77 old measurement
+C Intel atom 23 old measurement
+C Intel SLM 8 old measurement
+C Intel GLM ?
+C VIA nano 5.63 old measurement
+
+C The ALIGNment here might look completely ad-hoc. They are not.
ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)
@@ -91,7 +96,7 @@
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
PROLOGUE(func)
IFDOS(` push %rsi ')
IFDOS(` push %rdi ')
@@ -100,91 +105,85 @@
push %rbx
mov (up), %rax
- lea -16(rp,n_param,8), rp
- lea -16(up,n_param,8), up
-
+ lea (rp,n_param,8), rp
+ lea (up,n_param,8), up
mov n_param, n
- and $3, R32(n_param)
- jz L(b0)
- cmp $2, R32(n_param)
- ja L(b3)
- jz L(b2)
+
+ test $1, R8(n_param)
+ jne L(bx1)
-L(b1): mul v0
- cmp $1, n
- jz L(n1)
- mov %rax, w2
- mov %rdx, w3
+L(bx0): mul v0
neg n
- add $3, n
- jmp L(L1)
-L(n1): ADDSUB %rax, 8(rp)
- adc $0, %rdx
- mov %rdx, %rax
- pop %rbx
-IFDOS(` pop %rdi ')
-IFDOS(` pop %rsi ')
- ret
+ mov %rax, w0
+ mov %rdx, w1
+ test $2, R8(n)
+ jne L(L2)
-L(b3): mul v0
- mov %rax, w2
+L(b00): add $2, n
+ jmp L(L0)
+
+ ALIGN(16)
+L(bx1): mul v0
+ test $2, R8(n)
+ je L(b01)
+
+L(b11): mov %rax, w2
mov %rdx, w3
neg n
inc n
jmp L(L3)
-L(b0): mul v0
+ ALIGN(16)
+L(b01): sub $3, n
+ jc L(n1)
+ mov %rax, w2
+ mov %rdx, w3
+ neg n
+
+ ALIGN(16)
+L(top): mov -16(up,n,8), %rax
+ mul v0
mov %rax, w0
mov %rdx, w1
- neg n
- add $2, n
- jmp L(L0)
-
-L(b2): mul v0
- mov %rax, w0
- mov %rdx, w1
- neg n
- jmp L(L2)
-
- ALIGN(16)
-L(top): ADDSUB w0, -16(rp,n,8)
+ ADDSUB w2, -24(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+L(L0): mov -8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ ADDSUB w0, -16(rp,n,8)
adc w1, w2
adc $0, w3
-L(L1): mov 0(up,n,8), %rax
+L(L3): mov (up,n,8), %rax
mul v0
mov %rax, w0
mov %rdx, w1
ADDSUB w2, -8(rp,n,8)
adc w3, w0
adc $0, w1
-L(L0): mov 8(up,n,8), %rax
+L(L2): mov 8(up,n,8), %rax
mul v0
mov %rax, w2
mov %rdx, w3
- ADDSUB w0, 0(rp,n,8)
+ ADDSUB w0, (rp,n,8)
adc w1, w2
adc $0, w3
-L(L3): mov 16(up,n,8), %rax
- mul v0
- mov %rax, w0
- mov %rdx, w1
- ADDSUB w2, 8(rp,n,8)
- adc w3, w0
- adc $0, w1
-L(L2): mov 24(up,n,8), %rax
- mul v0
- mov %rax, w2
- mov %rdx, w3
add $4, n
js L(top)
-L(end): ADDSUB w0, (rp)
- adc w1, w2
- adc $0, w3
- ADDSUB w2, 8(rp)
- adc $0, w3
- mov w3, %rax
+L(end): xor R32(%rax), R32(%rax)
+ ADDSUB w2, -8(rp)
+ adc w3, %rax
+ pop %rbx
+IFDOS(` pop %rdi ')
+IFDOS(` pop %rsi ')
+ ret
+ ALIGN(32)
+L(n1): ADDSUB %rax, -8(rp)
+ mov $0, R32(%rax)
+ adc %rdx, %rax
pop %rbx
IFDOS(` pop %rdi ')
IFDOS(` pop %rsi ')
diff -r d296f15b0eec -r 5d3ad50a7156 mpn/x86_64/bt1/mul_1.asm
--- a/mpn/x86_64/bt1/mul_1.asm Sun Nov 17 18:34:30 2019 +0100
+++ b/mpn/x86_64/bt1/mul_1.asm Sun Nov 17 22:06:51 2019 +0100
@@ -1,6 +1,7 @@
-dnl AMD64 mpn_mul_1 optimised for AMD bobcat.
+dnl AMD64 mpn_mul_1 optimised for AMD bt1/bt2.
-dnl Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl Copyright 2003-2005, 2007, 2008, 2011, 2012, 2019 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -31,25 +32,27 @@
include(`../config.m4')
C cycles/limb
-C AMD K8,K9 4.53
-C AMD K10 4.53
-C AMD bull 4.56
-C AMD pile 4.47
-C AMD steam
-C AMD excavator
-C AMD bobcat 5.07
-C AMD jaguar 5.23 5.82
-C Intel P4 12.6
-C Intel core2 4.53
-C Intel NHM 4.36
-C Intel SBR 3.0
-C Intel IBR 2.55
-C Intel HWL 2.28
-C Intel BWL 2.36
-C Intel SKL 2.39
-C Intel atom 21.0
-C Intel SLM 9
-C VIA nano
+C AMD K8,K9 4.53 old measurement
+C AMD K10 4.53 old measurement
+C AMD bd1 4.56 old measurement
+C AMD bd2 4.47 old measurement
+C AMD bd3 ?
More information about the gmp-commit
mailing list