[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Jun 5 22:36:16 UTC 2017


details:   /var/hg/gmp/rev/c998a5a02f88
changeset: 17430:c998a5a02f88
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jun 06 00:30:49 2017 +0200
description:
Use 8-bit imm operands for "test".

details:   /var/hg/gmp/rev/8e8d31c61355
changeset: 17431:8e8d31c61355
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jun 06 00:32:35 2017 +0200
description:
Provide several add/sub-and-shift functions for Zen.

details:   /var/hg/gmp/rev/26f8ffe7c35c
changeset: 17432:26f8ffe7c35c
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jun 06 00:33:32 2017 +0200
description:
(c4_helper): New macro.
(mulx, shlx, shrx): Use c4_helper.

details:   /var/hg/gmp/rev/a3233882f420
changeset: 17433:a3233882f420
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jun 06 00:34:41 2017 +0200
description:
Update c/l table.

details:   /var/hg/gmp/rev/d7c2f8c0a6cc
changeset: 17434:d7c2f8c0a6cc
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jun 06 00:35:19 2017 +0200
description:
Update c/l table.

details:   /var/hg/gmp/rev/11dd1fc6b879
changeset: 17435:11dd1fc6b879
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jun 06 00:35:52 2017 +0200
description:
Update c/l table.

diffstat:

 mpn/x86_64/aorrlshC_n.asm            |   20 ++-
 mpn/x86_64/coreibwl/addmul_1.asm     |   35 ++--
 mpn/x86_64/coreibwl/sqr_basecase.asm |   18 +-
 mpn/x86_64/x86_64-defs.m4            |   91 +++++++++++--
 mpn/x86_64/zen/aorrlsh1_n.asm        |   37 +++++
 mpn/x86_64/zen/aorrlsh_n.asm         |  227 +++++++++++++++++++++++++++++++++++
 mpn/x86_64/zen/mul_basecase.asm      |   12 +-
 mpn/x86_64/zen/sublsh1_n.asm         |   37 +++++
 8 files changed, 423 insertions(+), 54 deletions(-)

diffs (truncated from 583 to 300 lines):

diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/aorrlshC_n.asm
--- a/mpn/x86_64/aorrlshC_n.asm	Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/aorrlshC_n.asm	Tue Jun 06 00:35:52 2017 +0200
@@ -31,13 +31,25 @@
 
 
 C	     cycles/limb
-C AMD K8,K9	 2
-C AMD K10	 2
+C AMD K8,K9	 2.1
+C AMD K10	 2.0
+C AMD bd1	~2.7
+C AMD bd2	~2.7
+C AMD bd3	 ?
+C AMD bd4	 ?
+C AMD zen	 2.0
+C AMD bt1	 3.3
+C AMD bt2	 3.0
 C Intel P4	 ?
-C Intel core2	 3
+C Intel PNR	 3.0
 C Intel NHM	 2.75
 C Intel SBR	 2.55
-C Intel atom	 ?
+C Intel IBR	 2.49
+C Intel HWL	 2.25
+C Intel BWL	 1.89
+C Intel SKL	 1.90
+C Intel atom	 8.4
+C Intel SLM	 4.0
 C VIA nano	 ?
 
 C INPUT PARAMETERS
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/coreibwl/addmul_1.asm
--- a/mpn/x86_64/coreibwl/addmul_1.asm	Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/coreibwl/addmul_1.asm	Tue Jun 06 00:35:52 2017 +0200
@@ -31,25 +31,26 @@
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 -
-C AMD K10	 -
-C AMD bull	 -
-C AMD pile	 -
-C AMD steam	 -
-C AMD excavator	 -
-C AMD bobcat	 -
-C AMD jaguar	 -
-C Intel P4	 -
-C Intel core2	 -
-C Intel NHM	 -
-C Intel SBR	 -
-C Intel IBR	 -
-C Intel HWL	 -
+C AMD K8,K9	n/a
+C AMD K10	n/a
+C AMD bd1	n/a
+C AMD bd2	n/a
+C AMD bd3	n/a
+C AMD bd4	 ?
+C AMD zen	 ?
+C AMD bt1	n/a
+C AMD bt2	n/a
+C Intel P4	n/a
+C Intel PNR	n/a
+C Intel NHM	n/a
+C Intel SBR	n/a
+C Intel IBR	n/a
+C Intel HWL	n/a
 C Intel BWL	 1.67	 1.74
 C Intel SKL	 1.63	 1.71
-C Intel atom	 -
-C Intel SLM	 -
-C VIA nano	 -
+C Intel atom	n/a
+C Intel SLM	n/a
+C VIA nano	n/a
 
 C The loop of this code is the result of running a code generation and
 C optimisation tool suite written by David Harvey and Torbjorn Granlund.
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/coreibwl/sqr_basecase.asm
--- a/mpn/x86_64/coreibwl/sqr_basecase.asm	Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/coreibwl/sqr_basecase.asm	Tue Jun 06 00:35:52 2017 +0200
@@ -33,19 +33,21 @@
 C cycles/limb	mul_1		addmul_1
 C AMD K8,K9	n/a		n/a
 C AMD K10	n/a		n/a
-C AMD bull	n/a		n/a
-C AMD pile	n/a		n/a
-C AMD steam	n/a		n/a
-C AMD excavator	 ?		 ?
-C AMD bobcat	n/a		n/a
-C AMD jaguar	n/a		n/a
+C AMD bd1	n/a		n/a
+C AMD bd2	n/a		n/a
+C AMD bd3	n/a		n/a
+C AMD bd4	 ?		 ?
+C AMD zen	 ?		 ?
+C AMD bt1	n/a		n/a
+C AMD bt2	n/a		n/a
 C Intel P4	n/a		n/a
-C Intel core2	n/a		n/a
+C Intel PNR	n/a		n/a
 C Intel NHM	n/a		n/a
 C Intel SBR	n/a		n/a
 C Intel IBR	n/a		n/a
 C Intel HWL	 1.68		n/a
-C Intel BWL	 1.69	      1.8-1.9
+C Intel BWL	 1.51	      1.67-1.74
+C Intel SKL	 1.52	      1.63-1.71
 C Intel atom	n/a		n/a
 C Intel SLM	n/a		n/a
 C VIA nano	n/a		n/a
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/x86_64-defs.m4
--- a/mpn/x86_64/x86_64-defs.m4	Sun Jun 04 17:15:46 2017 +0200
+++ b/mpn/x86_64/x86_64-defs.m4	Tue Jun 06 00:35:52 2017 +0200
@@ -314,6 +314,44 @@
  `(%r8)', 24, `(%r9)', 25, `(%r10)',26,  `(%r11)',27,
  `(%r12)',28, `(%r13)',29, `(%r14)',30,  `(%r15)',31')
 
+dnl  Usage (by mulx, shlx, shrx)
+dnl
+dnl     reg1,reg2,reg3,opc1,opc2
+dnl
+dnl  or
+dnl
+dnl     (reg1),reg2,reg3,opc1,opc2
+dnl
+dnl  where reg1 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl  or
+dnl
+dnl     off,(reg1),reg2,reg3,opc1,opc2
+dnl
+dnl  where reg1 is any register but rsp,r12.
+dnl
+dnl  The exceptions are due to special coding needed for some registers; rsp
+dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
+dnl  offset-less form.
+dnl
+dnl  Other addressing forms are not handled.  Invalid forms are not properly
+dnl  detected.  Offsets that don't fit one byte are not handled correctly.
+
+define(`c4_helper',`dnl
+.byte	0xc4`'dnl
+ifelse(`$#',5,`dnl
+,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
+,eval(0x$4-8*regnum($2))`'dnl
+,0x$5`'dnl
+,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
+',`$#',6,`dnl
+,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
+,eval(0x$5-8*regnum($3))`'dnl
+,0x$6`'dnl
+,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
+,eval(($1 + 256) % 256)`'dnl
+')')
+
 
 dnl  Usage
 dnl
@@ -328,28 +366,43 @@
 dnl     mulx(off,(reg1),reg2,reg3)
 dnl
 dnl  where reg1 is any register but rsp,r12.
-dnl
-dnl  The exceptions are due to special coding needed for some registers; rsp
-dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
-dnl  offset-less form.
-dnl
-dnl  Other addressing forms are not handled.  Invalid forms are not properly
-dnl  detected.  Offsets that don't fit one byte are not handled correctly.
 
 define(`mulx',`dnl
-.byte	0xc4`'dnl
 ifelse(`$#',3,`dnl
-,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
-,eval(0xfb-8*regnum($2))`'dnl
-,0xf6`'dnl
-,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
-',`$#',4,`dnl
-,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
-,eval(0xfb-8*regnum($3))`'dnl
-,0xf6`'dnl
-,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
-,eval(($1 + 256) % 256)`'dnl
-')')
+c4_helper($1,$2,$3,fb,f6)',`dnl         format 1,2
+c4_helper($1,$2,$3,$4,fb,f6)',dnl	format 3
+)')
+
+
+dnl  Usage
+dnl
+dnl     shlx(reg1,reg2,reg3)
+dnl     shrx(reg1,reg2,reg3)
+dnl
+dnl  or
+dnl
+dnl     shlx(reg1,(reg2),reg3)
+dnl     shrx(reg1,(reg2),reg3)
+dnl
+dnl  where reg2 is any register but rsp,rbp,r12,r13, or
+dnl
+dnl     shlx(reg1,off,(reg2),reg3)
+dnl     shrx(reg1,off,(reg2),reg3)
+dnl
+dnl  where reg2 is any register but rsp,r12.
+
+define(`shlx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($2,$1,$3,f9,f7)',`dnl         format 1,2
+c4_helper($1,$3,$2,$4,f9,f7)',dnl       format 3
+)')
+
+define(`shrx',`dnl
+ifelse(`$#',3,`dnl
+c4_helper($2,$1,$3,fb,f7)',`dnl         format 1,2
+c4_helper($1,$3,$2,$4,fb,f7)',dnl       format 3
+)')
+
 
 dnl  Usage
 dnl
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/zen/aorrlsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/zen/aorrlsh1_n.asm	Tue Jun 06 00:35:52 2017 +0200
@@ -0,0 +1,37 @@
+dnl  X86-64 mpn_addlsh1_n, mpn_addlsh1_nc, mpn_rsblsh1_n, mpn_rsblsh1_nc.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+include_mpn(`x86_64/atom/aorrlsh1_n.asm')
diff -r ca09ccdb1191 -r 11dd1fc6b879 mpn/x86_64/zen/aorrlsh_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/zen/aorrlsh_n.asm	Tue Jun 06 00:35:52 2017 +0200
@@ -0,0 +1,227 @@
+dnl  AMD64 mpn_addlsh_n, mpn_rsblsh_n.
+
+dnl  Copyright 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')


More information about the gmp-commit mailing list