[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Jun 5 22:36:16 UTC 2017
details: /var/hg/gmp/rev/c998a5a02f88
changeset: 17430:c998a5a02f88
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jun 06 00:30:49 2017 +0200
description:
Use 8-bit imm operands for "test".
details: /var/hg/gmp/rev/8e8d31c61355
changeset: 17431:8e8d31c61355
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jun 06 00:32:35 2017 +0200
description:
Provide several add/sub-and-shift functions for Zen.
details: /var/hg/gmp/rev/26f8ffe7c35c
changeset: 17432:26f8ffe7c35c
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jun 06 00:33:32 2017 +0200
description:
(c4_helper): New macro.
(mulx, shlx, shrx): Use c4_helper.
details: /var/hg/gmp/rev/a3233882f420
changeset: 17433:a3233882f420
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jun 06 00:34:41 2017 +0200
description:
Update c/l table.
details: /var/hg/gmp/rev/d7c2f8c0a6cc
changeset: 17434:d7c2f8c0a6cc
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jun 06 00:35:19 2017 +0200
description:
Update c/l table.
details: /var/hg/gmp/rev/11dd1fc6b879
changeset: 17435:11dd1fc6b879
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jun 06 00:35:52 2017 +0200
description:
Update c/l table.
diffstat:
mpn/x86_64/aorrlshC_n.asm | 20 ++-
mpn/x86_64/coreibwl/addmul_1.asm | 35 ++--
mpn/x86_64/coreibwl/sqr_basecase.asm | 18 +-
mpn/x86_64/x86_64-defs.m4 | 91 +++++++++++--
mpn/x86_64/zen/aorrlsh1_n.asm | 37 +++++
mpn/x86_64/zen/aorrlsh_n.asm | 227 +++++++++++++++++++++++++++++++++++
mpn/x86_64/zen/mul_basecase.asm | 12 +-
mpn/x86_64/zen/sublsh1_n.asm | 37 +++++
8 files changed, 423 insertions(+), 54 deletions(-)
diffs (truncated from 583 to 300 lines):
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/aorrlshC_n.asm
--- a/mpn/x86_64/aorrlshC_n.asm Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/aorrlshC_n.asm Tue Jun 06 00:35:52 2017 +0200
@@ -31,13 +31,25 @@
C cycles/limb
-C AMD K8,K9 2
-C AMD K10 2
+C AMD K8,K9 2.1
+C AMD K10 2.0
+C AMD bd1 ~2.7
+C AMD bd2 ~2.7
+C AMD bd3 ?
+C AMD bd4 ?
+C AMD zen 2.0
+C AMD bt1 3.3
+C AMD bt2 3.0
C Intel P4 ?
-C Intel core2 3
+C Intel PNR 3.0
C Intel NHM 2.75
C Intel SBR 2.55
-C Intel atom ?
+C Intel IBR 2.49
+C Intel HWL 2.25
+C Intel BWL 1.89
+C Intel SKL 1.90
+C Intel atom 8.4
+C Intel SLM 4.0
C VIA nano ?
C INPUT PARAMETERS
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/coreibwl/addmul_1.asm
--- a/mpn/x86_64/coreibwl/addmul_1.asm Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/coreibwl/addmul_1.asm Tue Jun 06 00:35:52 2017 +0200
@@ -31,25 +31,26 @@
include(`../config.m4')
C cycles/limb
-C AMD K8,K9 -
-C AMD K10 -
-C AMD bull -
-C AMD pile -
-C AMD steam -
-C AMD excavator -
-C AMD bobcat -
-C AMD jaguar -
-C Intel P4 -
-C Intel core2 -
-C Intel NHM -
-C Intel SBR -
-C Intel IBR -
-C Intel HWL -
+C AMD K8,K9 n/a
+C AMD K10 n/a
+C AMD bd1 n/a
+C AMD bd2 n/a
+C AMD bd3 n/a
+C AMD bd4 ?
+C AMD zen ?
+C AMD bt1 n/a
+C AMD bt2 n/a
+C Intel P4 n/a
+C Intel PNR n/a
+C Intel NHM n/a
+C Intel SBR n/a
+C Intel IBR n/a
+C Intel HWL n/a
C Intel BWL 1.67 1.74
C Intel SKL 1.63 1.71
-C Intel atom -
-C Intel SLM -
-C VIA nano -
+C Intel atom n/a
+C Intel SLM n/a
+C VIA nano n/a
C The loop of this code is the result of running a code generation and
C optimisation tool suite written by David Harvey and Torbjorn Granlund.
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/coreibwl/sqr_basecase.asm
--- a/mpn/x86_64/coreibwl/sqr_basecase.asm Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/coreibwl/sqr_basecase.asm Tue Jun 06 00:35:52 2017 +0200
@@ -33,19 +33,21 @@
C cycles/limb mul_1 addmul_1
C AMD K8,K9 n/a n/a
C AMD K10 n/a n/a
-C AMD bull n/a n/a
-C AMD pile n/a n/a
-C AMD steam n/a n/a
-C AMD excavator ? ?
-C AMD bobcat n/a n/a
-C AMD jaguar n/a n/a
+C AMD bd1 n/a n/a
+C AMD bd2 n/a n/a
+C AMD bd3 n/a n/a
+C AMD bd4 ? ?
+C AMD zen ? ?
+C AMD bt1 n/a n/a
+C AMD bt2 n/a n/a
C Intel P4 n/a n/a
-C Intel core2 n/a n/a
+C Intel PNR n/a n/a
C Intel NHM n/a n/a
C Intel SBR n/a n/a
C Intel IBR n/a n/a
C Intel HWL 1.68 n/a
-C Intel BWL 1.69 1.8-1.9
+C Intel BWL 1.51 1.67-1.74
+C Intel SKL 1.52 1.63-1.71
C Intel atom n/a n/a
C Intel SLM n/a n/a
C VIA nano n/a n/a
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/x86_64-defs.m4
--- a/mpn/x86_64/x86_64-defs.m4 Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/x86_64-defs.m4 Tue Jun 06 00:35:52 2017 +0200
@@ -314,6 +314,44 @@
`(%r8)', 24, `(%r9)', 25, `(%r10)',26, `(%r11)',27,
`(%r12)',28, `(%r13)',29, `(%r14)',30, `(%r15)',31')
+dnl Usage (by mulx, shlx, shrx)
+dnl
+dnl reg1,reg2,reg3,opc1,opc2
+dnl
+dnl or
+dnl
+dnl (reg1),reg2,reg3,opc1,opc2
+dnl
+dnl where reg1 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl or
+dnl
+dnl off,(reg1),reg2,reg3,opc1,opc2
+dnl
+dnl where reg1 is any register but rsp,r12.
+dnl
+dnl The exceptions are due to special coding needed for some registers; rsp
+dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
+dnl offset-less form.
+dnl
+dnl Other addressing forms are not handled. Invalid forms are not properly
+dnl detected. Offsets that don't fit one byte are not handled correctly.
+
+define(`c4_helper',`dnl
+.byte 0xc4`'dnl
+ifelse(`$#',5,`dnl
+,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
+,eval(0x$4-8*regnum($2))`'dnl
+,0x$5`'dnl
+,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
+',`$#',6,`dnl
+,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
+,eval(0x$5-8*regnum($3))`'dnl
+,0x$6`'dnl
+,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
+,eval(($1 + 256) % 256)`'dnl
+')')
+
dnl Usage
dnl
@@ -328,28 +366,43 @@
dnl mulx(off,(reg1),reg2,reg3)
dnl
dnl where reg1 is any register but rsp,r12.
-dnl
-dnl The exceptions are due to special coding needed for some registers; rsp
-dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
-dnl offset-less form.
-dnl
-dnl Other addressing forms are not handled. Invalid forms are not properly
-dnl detected. Offsets that don't fit one byte are not handled correctly.
define(`mulx',`dnl
-.byte 0xc4`'dnl
ifelse(`$#',3,`dnl
-,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
-,eval(0xfb-8*regnum($2))`'dnl
-,0xf6`'dnl
-,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
-',`$#',4,`dnl
-,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
-,eval(0xfb-8*regnum($3))`'dnl
-,0xf6`'dnl
-,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
-,eval(($1 + 256) % 256)`'dnl
-')')
+c4_helper($1,$2,$3,fb,f6)',`dnl format 1,2
+c4_helper($1,$2,$3,$4,fb,f6)',dnl format 3
+)')
+
+
+dnl Usage
+dnl
+dnl shlx(reg1,reg2,reg3)
+dnl shrx(reg1,reg2,reg3)
+dnl
+dnl or
+dnl
+dnl shlx(reg1,(reg2),reg3)
+dnl shrx(reg1,(reg2),reg3)
+dnl
+dnl where reg2 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl shlx(reg1,off,(reg2),reg3)
+dnl shrx(reg1,off,(reg2),reg3)
+dnl
+dnl where reg2 is any register but rsp,r12.
+
+define(`shlx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($2,$1,$3,f9,f7)',`dnl format 1,2
+c4_helper($1,$3,$2,$4,f9,f7)',dnl format 3
+)')
+
+define(`shrx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($2,$1,$3,fb,f7)',`dnl format 1,2
+c4_helper($1,$3,$2,$4,fb,f7)',dnl format 3
+)')
+
dnl Usage
dnl
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/zen/aorrlsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/zen/aorrlsh1_n.asm Tue Jun 06 00:35:52 2017 +0200
@@ -0,0 +1,37 @@
+dnl X86-64 mpn_addlsh1_n, mpn_addlsh1_nc, mpn_rsblsh1_n, mpn_rsblsh1_nc.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+include_mpn(`x86_64/atom/aorrlsh1_n.asm')
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/zen/aorrlsh_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/zen/aorrlsh_n.asm Tue Jun 06 00:35:52 2017 +0200
@@ -0,0 +1,227 @@
+dnl AMD64 mpn_addlsh_n, mpn_rsblsh_n.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
More information about the gmp-commit
mailing list