[Gmp-commit] /var/hg/gmp: Slighty tweak, then update c/l tables.

mercurial at gmplib.org mercurial at gmplib.org
Sat May 20 14:05:14 UTC 2017


details:   /var/hg/gmp/rev/cd7b647bdabe
changeset: 17393:cd7b647bdabe
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat May 20 16:03:50 2017 +0200
description:
Slighty tweak, then update c/l tables.

diffstat:

 mpn/x86_64/k10/hamdist.asm        |  42 ++++++++++++++++++++++----------------
 mpn/x86_64/silvermont/hamdist.asm |  36 ++++++++++++++++----------------
 2 files changed, 42 insertions(+), 36 deletions(-)

diffs (122 lines):

diff -r c4df4efff4eb -r cd7b647bdabe mpn/x86_64/k10/hamdist.asm
--- a/mpn/x86_64/k10/hamdist.asm	Tue May 16 04:04:40 2017 +0200
+++ b/mpn/x86_64/k10/hamdist.asm	Sat May 20 16:03:50 2017 +0200
@@ -1,6 +1,6 @@
 dnl  AMD64 mpn_hamdist -- hamming distance.
 
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2010-2012, 2017 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -30,20 +30,26 @@
 
 include(`../config.m4')
 
-C		    cycles/limb
-C AMD K8,K9		 n/a
-C AMD K10		 2
-C Intel P4		 n/a
-C Intel core2		 n/a
-C Intel corei		 2.05
-C Intel atom		 n/a
-C VIA nano		 n/a
-
-C This is very straightforward 2-way unrolled code.
-
-C TODO
-C  * Write something less basic.  It should not be hard to reach 1.5 c/l with
-C    4-way unrolling.
+C	     cycles/limb
+C AMD K8,K9	 -
+C AMD K10	 2.0		=
+C AMD bd1	~4.4		=
+C AMD bd2	~4.4		=
+C AMD bd3
+C AMD bd4
+C AMD bobcat	 7.55		=
+C AMD jaguar	 2.52		-
+C Intel P4	 -
+C Intel core2	 -
+C Intel NHM	 2.03		+
+C Intel SBR	 2.01		+
+C Intel IBR	 1.96		+
+C Intel HWL	 1.64		=
+C Intel BWL	 1.56		-
+C Intel SKL	 1.52		=
+C Intel atom
+C Intel SLM	 3.0		-
+C VIA nano
 
 define(`ap',		`%rdi')
 define(`bp',		`%rsi')
@@ -64,12 +70,12 @@
 	lea	(bp,n,8), bp			C point at B operand end
 	neg	n
 
-	bt	$0, R32(n)
-	jnc	L(2)
+	test	$1, R8(n)
+	jz	L(2)
 
 L(1):	.byte	0xf3,0x49,0x0f,0xb8,0xc0	C popcnt %r8, %rax
 	xor	R32(%r10), R32(%r10)
-	add	$1, n
+	inc	n
 	js	L(top)
 	FUNC_EXIT()
 	ret
diff -r c4df4efff4eb -r cd7b647bdabe mpn/x86_64/silvermont/hamdist.asm
--- a/mpn/x86_64/silvermont/hamdist.asm	Tue May 16 04:04:40 2017 +0200
+++ b/mpn/x86_64/silvermont/hamdist.asm	Sat May 20 16:03:50 2017 +0200
@@ -31,24 +31,24 @@
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD steam
-C AMD excavator
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core2
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel SKL
+C AMD K8,K9	 -
+C AMD K10	 2.0		=
+C AMD bd1	~4.4		=
+C AMD bd2	~4.4		=
+C AMD bd3
+C AMD bd4
+C AMD bobcat	 7.55		=
+C AMD jaguar	 2.32		+
+C Intel P4	 -
+C Intel core2	 -
+C Intel NHM	 2.16		-
+C Intel SBR	 2.30		-
+C Intel IBR	 2.22		-
+C Intel HWL	 1.64		=
+C Intel BWL	 1.51		+
+C Intel SKL	 1.52		=
 C Intel atom
-C Intel SLM	 2.7
+C Intel SLM	 2.7		+
 C VIA nano
 
 define(`ap',		`%rdi')
@@ -75,7 +75,7 @@
 
 L(1):	.byte	0xf3,0x49,0x0f,0xb8,0xc0	C popcnt %r8, %rax
 	xor	R32(%r10), R32(%r10)
-	add	$1, n
+	inc	n
 	js	L(top)
 	FUNC_EXIT()
 	ret


More information about the gmp-commit mailing list