[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun May 21 22:24:18 UTC 2017
details: /var/hg/gmp/rev/765815cc0f79
changeset: 17394:765815cc0f79
user: Torbjorn Granlund <tg at gmplib.org>
date: Mon May 22 00:19:54 2017 +0200
description:
Rewrite.
details: /var/hg/gmp/rev/7c843e23d272
changeset: 17395:7c843e23d272
user: Torbjorn Granlund <tg at gmplib.org>
date: Mon May 22 00:23:00 2017 +0200
description:
Tweak header comment.
details: /var/hg/gmp/rev/069ff6facc35
changeset: 17396:069ff6facc35
user: Torbjorn Granlund <tg at gmplib.org>
date: Mon May 22 00:23:32 2017 +0200
description:
New grabber file.
details: /var/hg/gmp/rev/020ab7920eab
changeset: 17397:020ab7920eab
user: Torbjorn Granlund <tg at gmplib.org>
date: Mon May 22 00:24:15 2017 +0200
description:
ChangeLog
diffstat:
ChangeLog | 8 ++
mpn/x86_64/core2/com.asm | 37 ++++++++++
mpn/x86_64/core2/copyd.asm | 2 +-
mpn/x86_64/core2/copyi.asm | 2 +-
mpn/x86_64/core2/lshift.asm | 134 ++++++++++++++++++-------------------
mpn/x86_64/core2/lshiftc.asm | 152 +++++++++++++++++++++---------------------
mpn/x86_64/core2/rshift.asm | 136 ++++++++++++++++++-------------------
7 files changed, 254 insertions(+), 217 deletions(-)
diffs (truncated from 649 to 300 lines):
diff -r cd7b647bdabe -r 020ab7920eab ChangeLog
--- a/ChangeLog Sat May 20 16:03:50 2017 +0200
+++ b/ChangeLog Mon May 22 00:24:15 2017 +0200
@@ -1,3 +1,11 @@
+2017-05-22 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/core2/com.asm: New grabber file.
+
+ * mpn/x86_64/core2/lshift.asm: Rewrite.
+ * mpn/x86_64/core2/rshift.asm: Rewrite.
+ * mpn/x86_64/core2/lshiftc.asm: Rewrite.
+
2017-05-16 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/divis.c (mpn_divisible_p): Updated the divisibility
diff -r cd7b647bdabe -r 020ab7920eab mpn/x86_64/core2/com.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/core2/com.asm Mon May 22 00:24:15 2017 +0200
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_com.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_com)
+include_mpn(`x86_64/fastsse/com-palignr.asm')
diff -r cd7b647bdabe -r 020ab7920eab mpn/x86_64/core2/copyd.asm
--- a/mpn/x86_64/core2/copyd.asm Sat May 20 16:03:50 2017 +0200
+++ b/mpn/x86_64/core2/copyd.asm Mon May 22 00:24:15 2017 +0200
@@ -1,4 +1,4 @@
-dnl X86-64 mpn_copyd optimised for Intel Sandy Bridge.
+dnl X86-64 mpn_copyd.
dnl Copyright 2012 Free Software Foundation, Inc.
diff -r cd7b647bdabe -r 020ab7920eab mpn/x86_64/core2/copyi.asm
--- a/mpn/x86_64/core2/copyi.asm Sat May 20 16:03:50 2017 +0200
+++ b/mpn/x86_64/core2/copyi.asm Mon May 22 00:24:15 2017 +0200
@@ -1,4 +1,4 @@
-dnl X86-64 mpn_copyi optimised for Intel Sandy Bridge.
+dnl X86-64 mpn_copyi.
dnl Copyright 2012 Free Software Foundation, Inc.
diff -r cd7b647bdabe -r 020ab7920eab mpn/x86_64/core2/lshift.asm
--- a/mpn/x86_64/core2/lshift.asm Sat May 20 16:03:50 2017 +0200
+++ b/mpn/x86_64/core2/lshift.asm Mon May 22 00:24:15 2017 +0200
@@ -1,6 +1,6 @@
-dnl x86-64 mpn_lshift optimized for "Core 2".
+dnl x86-64 mpn_lshift optimised for Conroe/Penryn and Nehalem.
-dnl Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl Copyright 2007, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -30,17 +30,27 @@
include(`../config.m4')
-
C cycles/limb
-C AMD K8,K9 4.25
-C AMD K10 4.25
-C Intel P4 14.7
-C Intel core2 1.27
-C Intel NHM 1.375 (up to about n = 260, then 1.5)
-C Intel SBR 1.87
-C Intel atom ?
-C VIA nano ?
-
+C AMD K8,K9
+C AMD K10
+C AMD bd1
+C AMD bd2
+C AMD bd3
+C AMD bd4
+C AMD zen
+C AMD bobcat
+C AMD jaguar
+C Intel P4
+C Intel core2 1.32
+C Intel NHM 1.30 (drops to 2.5 for n > 256)
+C Intel SBR
+C Intel IBR
+C Intel HWL
+C Intel BWL
+C Intel SKL
+C Intel atom
+C Intel SLM
+C VIA nano
C INPUT PARAMETERS
define(`rp', `%rdi')
@@ -56,69 +66,55 @@
ALIGN(16)
PROLOGUE(mpn_lshift)
FUNC_ENTRY(4)
- lea -8(rp,n,8), rp
- lea -8(up,n,8), up
- mov R32(%rdx), R32(%rax)
- and $3, R32(%rax)
- jne L(nb00)
-L(b00): C n = 4, 8, 12, ...
+ xor R32(%rax), R32(%rax)
+
+ test $1, R8(n)
+ jnz L(bx1)
+L(bx0): test $2, R8(n)
+ jnz L(b10)
+
+L(b00): lea -8(up,n,8), up
+ lea 16(rp,n,8), rp
mov (up), %r10
mov -8(up), %r11
- xor R32(%rax), R32(%rax)
shld R8(cnt), %r10, %rax
mov -16(up), %r8
- lea 24(rp), rp
- sub $4, n
+ shr $2, n
jmp L(00)
-L(nb00):C n = 1, 5, 9, ...
- cmp $2, R32(%rax)
- jae L(nb01)
-L(b01): mov (up), %r9
- xor R32(%rax), R32(%rax)
- shld R8(cnt), %r9, %rax
- sub $2, n
- jb L(le1)
- mov -8(up), %r10
- mov -16(up), %r11
- lea -8(up), up
- lea 16(rp), rp
- jmp L(01)
-L(le1): shl R8(cnt), %r9
- mov %r9, (rp)
- FUNC_EXIT()
- ret
+L(bx1): test $2, R8(n)
+ jnz L(b11)
-L(nb01):C n = 2, 6, 10, ...
- jne L(b11)
-L(b10): mov (up), %r8
- mov -8(up), %r9
- xor R32(%rax), R32(%rax)
+L(b01): lea -16(up,n,8), up
+ lea 8(rp,n,8), rp
+ mov 8(up), %r9
+ shld R8(cnt), %r9, %rax
+ shr $2, n
+ jz L(1)
+ mov (up), %r10
+ mov -8(up), %r11
+ jmp L(01)
+
+L(b10): lea -24(up,n,8), up
+ lea (rp,n,8), rp
+ mov 16(up), %r8
+ mov 8(up), %r9
shld R8(cnt), %r8, %rax
- sub $3, n
- jb L(le2)
- mov -16(up), %r10
- lea -16(up), up
- lea 8(rp), rp
+ shr $2, n
+ jz L(2)
+ mov (up), %r10
jmp L(10)
-L(le2): shld R8(cnt), %r9, %r8
- mov %r8, (rp)
- shl R8(cnt), %r9
- mov %r9, -8(rp)
- FUNC_EXIT()
- ret
- ALIGN(16) C performance critical!
-L(b11): C n = 3, 7, 11, ...
- mov (up), %r11
- mov -8(up), %r8
- xor R32(%rax), R32(%rax)
+ ALIGN(16)
+L(b11): lea -32(up,n,8), up
+ lea -8(rp,n,8), rp
+ mov 24(up), %r11
+ mov 16(up), %r8
+ mov 8(up), %r9
shld R8(cnt), %r11, %rax
- mov -16(up), %r9
- lea -24(up), up
- sub $4, n
- jb L(end)
+ shr $2, n
+ jz L(end)
ALIGN(16)
L(top): shld R8(cnt), %r8, %r11
@@ -132,17 +128,17 @@
mov %r9, -16(rp)
L(00): shld R8(cnt), %r11, %r10
mov -24(up), %r9
- mov %r10, -24(rp)
add $-32, up
- lea -32(rp), rp
- sub $4, n
- jnc L(top)
+ mov %r10, -24(rp)
+ add $-32, rp
+ dec n
+ jnz L(top)
L(end): shld R8(cnt), %r8, %r11
mov %r11, (rp)
- shld R8(cnt), %r9, %r8
+L(2): shld R8(cnt), %r9, %r8
mov %r8, -8(rp)
- shl R8(cnt), %r9
+L(1): shl R8(cnt), %r9
mov %r9, -16(rp)
FUNC_EXIT()
ret
diff -r cd7b647bdabe -r 020ab7920eab mpn/x86_64/core2/lshiftc.asm
--- a/mpn/x86_64/core2/lshiftc.asm Sat May 20 16:03:50 2017 +0200
+++ b/mpn/x86_64/core2/lshiftc.asm Mon May 22 00:24:15 2017 +0200
@@ -1,6 +1,6 @@
-dnl x86-64 mpn_lshiftc optimized for "Core 2".
+dnl x86-64 mpn_lshiftc optimised for Conroe/Penryn and Nehalem.
-dnl Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl Copyright 2007, 2009, 2011, 2012, 2017 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -30,17 +30,27 @@
include(`../config.m4')
-
C cycles/limb
-C AMD K8,K9 ?
-C AMD K10 ?
-C Intel P4 ?
-C Intel core2 1.5
-C Intel NHM 2.25 (up to about n = 260, then 1.875)
-C Intel SBR 2.25
-C Intel atom ?
-C VIA nano ?
-
+C AMD K8,K9
+C AMD K10
+C AMD bd1
+C AMD bd2
+C AMD bd3
+C AMD bd4
+C AMD zen
+C AMD bobcat
+C AMD jaguar
+C Intel P4
+C Intel core2 1.52
+C Intel NHM 1.78 (just 2.15 for n < 256)
+C Intel SBR
+C Intel IBR
+C Intel HWL
+C Intel BWL
+C Intel SKL
+C Intel atom
More information about the gmp-commit
mailing list