[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Aug 21 21:28:04 UTC 2019


details:   /var/hg/gmp/rev/dfdcf95fc1ac
changeset: 17832:dfdcf95fc1ac
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Aug 21 23:03:14 2019 +0200
description:
Provide several x86-64 gcd_22 files.

details:   /var/hg/gmp/rev/195b9608e5e4
changeset: 17833:195b9608e5e4
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Wed Aug 21 23:03:40 2019 +0200
description:
ChangeLog

diffstat:

 ChangeLog                      |   42 +++++++++-
 mpn/x86_64/atom/gcd_22.asm     |   37 ++++++++
 mpn/x86_64/bd4/gcd_22.asm      |   37 ++++++++
 mpn/x86_64/bt1/gcd_22.asm      |  171 +++++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/core2/gcd_22.asm    |  141 +++++++++++++++++++++++++++++++++
 mpn/x86_64/coreihwl/gcd_22.asm |  145 ++++++++++++++++++++++++++++++++++
 mpn/x86_64/k10/gcd_22.asm      |  146 +++++++++++++++++++++++++++++++++++
 mpn/x86_64/k8/gcd_22.asm       |  169 ++++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/zen/gcd_22.asm      |   37 ++++++++
 9 files changed, 924 insertions(+), 1 deletions(-)

diffs (truncated from 985 to 300 lines):

diff -r fdc9d35fb5c8 -r 195b9608e5e4 ChangeLog
--- a/ChangeLog	Thu Aug 08 23:48:03 2019 +1000
+++ b/ChangeLog	Wed Aug 21 23:03:40 2019 +0200
@@ -1,3 +1,38 @@
+2019-08-21  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/core2/gcd_22.asm: New file.
+	* mpn/x86_64/k8/gcd_22.asm: New file.
+	* mpn/x86_64/k10/gcd_22.asm: New file.
+	* mpn/x86_64/coreihwl/gcd_22.asm: New file.
+	* mpn/x86_64/bt1/gcd_22.asm: New file.
+	* mpn/x86_64/bd4/gcd_22.asm: New grabber.
+	* mpn/x86_64/zen/gcd_22.asm: New grabber.
+	* mpn/x86_64/atom/gcd_22.asm: New grabber.
+
+2019-08-19  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac: Check for ELFv1 ABI on PowerPC.
+
+2019-08-18  Torbjörn Granlund  <tg at gmplib.org>
+
+	* longlong.h (arm32 sub_ddmmss): Define separately for thumb and
+	non-thumb as rsc instruction is missing for thumb.
+
+	* mpn/powerpc64/mode64/p7/gcd_22.asm: New file.
+	* mpn/powerpc64/mode64/p9/gcd_22.asm: New file.
+
+2019-08-17  Torbjörn Granlund  <tg at gmplib.org>
+
+	* demos/expr/t-expr.c: #include gmp-impl.h as it includes tests.h.
+
+	* mpn/asm-defs.m4: Add gcd_22.
+
+	* tests/refmpn.c (refmpn_gcd_22): New function.
+	* tests/tests.h: Declare it.
+
+	* tests/t-constants.c: #include gmp-impl.h.
+	* tests/mpf/t-get_d.c: Likewise.
+
 2019-08-17  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/generic/gcd_22.c (mpn_gcd_22): New implementation with less
@@ -26,6 +61,10 @@
 	* tests/mpn/Makefile.am (check_PROGRAMS): Add t-gcd_22.
 	* tests/refmpz.c (refmpz_gcd): New function (plain binary gcd).
 
+2019-08-15  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/zen/gcd_11.asm: Use bd2 instead of bd4 code.
+
 2019-08-13  Torbjörn Granlund  <tg at gmplib.org>
 
 	* mpn/x86_64: Add more gcd_11 variants of of x86_64 gcd_11.asm and
@@ -70,6 +109,7 @@
 	* mpn/x86_64/bd2/gcd_11.asm: Likewise.
 	* mpn/x86_64/core2/gcd_11.asm: Likewise.
 	* mpn/x86_64/gcd_11.asm: Likewise.
+	* mpn/asm-defs.m4: Add gcd_11.
 
 2019-08-06  Niels Möller  <nisse at lysator.liu.se>
 
@@ -910,7 +950,7 @@
 2017-02-24  Torbjörn Granlund  <tg at gmplib.org>
 
 	* longlong.h (arm32/arm64 add_ssaaaa): Use "subs" for some immediates.
-	* longlong.h (arm32/arm64 sub_ssaaaa): Use "adds" for some immediates.
+	* longlong.h (arm32/arm64 sub_ddmmss): Use "adds" for some immediates.
 
 	* mpn/arm64/copyi.asm: Avoid branching on flags.
 	* mpn/arm64/copyd.asm: Likewise.
diff -r fdc9d35fb5c8 -r 195b9608e5e4 mpn/x86_64/atom/gcd_22.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/atom/gcd_22.asm	Wed Aug 21 23:03:40 2019 +0200
@@ -0,0 +1,37 @@
+dnl  AMD64 mpn_gcd_22.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_22)
+include_mpn(`x86_64/k8/gcd_22.asm')
diff -r fdc9d35fb5c8 -r 195b9608e5e4 mpn/x86_64/bd4/gcd_22.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bd4/gcd_22.asm	Wed Aug 21 23:03:40 2019 +0200
@@ -0,0 +1,37 @@
+dnl  AMD64 mpn_gcd_22.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_22)
+include_mpn(`x86_64/coreihwl/gcd_22.asm')
diff -r fdc9d35fb5c8 -r 195b9608e5e4 mpn/x86_64/bt1/gcd_22.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bt1/gcd_22.asm	Wed Aug 21 23:03:40 2019 +0200
@@ -0,0 +1,171 @@
+dnl  AMD64 mpn_gcd_22.  Assumes useless bsf, useless shrd, tzcnt, no shlx.
+
+dnl  Copyright 2019 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/bit
+C AMD K8,K9	 ?
+C AMD K10	 ?
+C AMD bd1	 9.83
+C AMD bd2	 7.81
+C AMD bd3	 ?
+C AMD bd4	 ?
+C AMD bt1	 9.0
+C AMD bt2	 9.2
+C AMD zn1	 ?
+C AMD zn2	 ?
+C Intel P4	 ?
+C Intel CNR	 ?
+C Intel PNR	 ?
+C Intel NHM	 ?
+C Intel WSM	 ?
+C Intel SBR	 ?
+C Intel IBR	 ?
+C Intel HWL	 ?
+C Intel BWL	 ?
+C Intel SKL	 ?
+C Intel atom	 ?
+C Intel SLM	 ?
+C Intel GLM	 ?
+C Intel GLM+	 ?
+C VIA nano	 ?
+
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+PROTECT(`ctz_table')
+
+DEF_OBJECT(ctz_table,64)
+	.byte	MAXSHIFT
+forloop(i,1,MASK,
+`	.byte	m4_count_trailing_zeros(i)
+')
+END_OBJECT(ctz_table)
+
+define(`u1',    `%rdi')
+define(`u0',    `%rsi')
+define(`v1',    `%rdx')
+define(`v0_param', `%rcx')
+
+define(`v0',    `%rax')
+define(`cnt',   `%rcx')
+
+define(`s0',    `%r8')
+define(`s1',    `%r9')
+define(`t0',    `%r10')
+define(`t1',    `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_22)
+	FUNC_ENTRY(2)
+	push	%r12
+	mov	v0_param, v0
+
+	LEA(	ctz_table, %r12)
+
+	ALIGN(16)
+L(top):	mov	v0, t0
+	sub	u0, t0
+	jz	L(lowz)		C	jump when low limb result = 0
+	mov	v1, t1
+	sbb	u1, t1
+
+	mov	u0, s0
+	mov	u1, s1
+
+	sub	v0, u0
+	sbb	v1, u1
+
+L(bck):	cmovc	t0, u0		C u = |u - v|
+	cmovnc	u1, t1		C u = |u - v|
+	cmovc	s0, v0		C v = min(u,v)
+	cmovc	s1, v1		C v = min(u,v)
+
+	and	$MASK, R32(t0)
+	movzbl	(%r12,t0), R32(%rcx)
+	jz	L(count_better)
+C Rightshift (t1,,u0) into (u1,,u0)
+L(shr):	shr	R8(cnt), u0
+	mov	t1, u1
+	shr	R8(cnt), u1
+	neg	cnt
+	shl	R8(cnt), t1
+	or	t1, u0
+
+	test	u1, u1
+	jnz	L(top)
+	test	v1, v1
+	jnz	L(top)
+
+L(gcd_11):
+	mov	v0, %rdi
+	mov	u0, %rsi
+	xor	R32(%rdx), R32(%rdx)
+	pop	%r12
+	jmp	mpn_gcd_11
+
+L(count_better):
+	rep;bsf	u0, cnt		C tzcnt!


More information about the gmp-commit mailing list