[Gmp-commit] /var/hg/gmp: 5 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Oct 24 16:15:35 CEST 2013


details:   /var/hg/gmp/rev/4dd00926640f
changeset: 16071:4dd00926640f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Oct 24 16:00:23 2013 +0200
description:
(fake_cpuid_table): Add Haswell.

details:   /var/hg/gmp/rev/3daa3a7e1345
changeset: 16072:3daa3a7e1345
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Oct 24 16:07:05 2013 +0200
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/72f64126e566
changeset: 16073:72f64126e566
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Oct 24 16:09:12 2013 +0200
description:
Add larger c/l table.

details:   /var/hg/gmp/rev/f88ca0884394
changeset: 16074:f88ca0884394
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Oct 24 16:09:37 2013 +0200
description:
Add larger c/l table.

details:   /var/hg/gmp/rev/7ff4cca045e3
changeset: 16075:7ff4cca045e3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Oct 24 16:15:30 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                       |   4 ++++
 mpn/generic/div_qr_1n_pi1.c     |   4 ++--
 mpn/x86/fat/fat.c               |   1 +
 mpn/x86_64/div_qr_1n_pi1.asm    |  31 +++++++++++++++++++++----------
 mpn/x86_64/k8/div_qr_1n_pi1.asm |  30 +++++++++++++++++++++++-------
 5 files changed, 51 insertions(+), 19 deletions(-)

diffs (200 lines):

diff -r 971c85d53a3e -r 7ff4cca045e3 ChangeLog
--- a/ChangeLog	Wed Oct 23 23:20:24 2013 +0200
+++ b/ChangeLog	Thu Oct 24 16:15:30 2013 +0200
@@ -1,3 +1,7 @@
+2013-10-24  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86/fat/fat.c (fake_cpuid_table): Add Haswell.
+
 2013-10-23  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/x86_64-defs.m4 (oplist): New define, data from `regnum'.
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/generic/div_qr_1n_pi1.c
--- a/mpn/generic/div_qr_1n_pi1.c	Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/generic/div_qr_1n_pi1.c	Thu Oct 24 16:15:30 2013 +0200
@@ -194,7 +194,7 @@
       udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
       return u1;
     }
-  
+
   /* FIXME: Could be precomputed */
   B2 = -d*dinv;
 
@@ -251,7 +251,7 @@
   t = (u1 >= d);
   q1 += t;
   u1 -= (-t) & d;
-    
+
   udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
   add_ssaaaa (q1, q0, q1, q0, 0, t);
 
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/x86/fat/fat.c
--- a/mpn/x86/fat/fat.c	Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/x86/fat/fat.c	Thu Oct 24 16:15:30 2013 +0200
@@ -68,6 +68,7 @@
   { "coreinhm",   "GenuineIntel", MAKE_FMS (6, 0x1a) },
   { "coreiwsm",   "GenuineIntel", MAKE_FMS (6, 0x25) },
   { "coreisbr",   "GenuineIntel", MAKE_FMS (6, 0x2a) },
+  { "coreihwl",   "GenuineIntel", MAKE_FMS (6, 0x3c) },
   { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
 
   { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/x86_64/div_qr_1n_pi1.asm
--- a/mpn/x86_64/div_qr_1n_pi1.asm	Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/x86_64/div_qr_1n_pi1.asm	Thu Oct 24 16:15:30 2013 +0200
@@ -25,12 +25,23 @@
 
 
 C		c/l
-C AMD K8	13
+C AMD K8,K9	13
 C AMD K10	13
-C Intel core2	19
-C Intel sbr	14.5-15
-C Intel nehalem	18
+C AMD bull	16.5
+C AMD pile	15
+C AMD steam	 ?
+C AMD bobcat	16
+C AMD jaguar	 ?
+C Intel P4	47	poor
+C Intel core	19.25
+C Intel NHM	18
+C Intel SBR	15	poor
+C Intel IBR	13
+C Intel HWL	11.7
+C Intel BWL	 ?
+C Intel atom	52	very poor
 C VIA nano	19
+
 	
 C INPUT Parameters
 define(`QP', `%rdi')
@@ -101,7 +112,7 @@
 	neg	B2
 	mov	B2, B2md
 	sub	D, B2md
-	
+
 	C D not needed until final reduction
 	push	D
 	mov	UN_INPUT, UN	C Clobbers D
@@ -122,7 +133,7 @@
 	dec	UN
 	mov	U1, %rax
 	jz	L(final)
-	
+
 	ALIGN(16)
 
 	C Loop is 28 instructions, 30 decoder slots, should run in 10 cycles.
@@ -134,7 +145,7 @@
 	mov	U2, Q2
 	and	U2, Q1
 	neg	Q2
-	mul	DINV 
+	mul	DINV
 	add	%rdx, Q1
 	adc	$0, Q2
 	add	Q0, Q1
@@ -152,7 +163,7 @@
 	C {QP+UN, ...} <-- {QP+UN, ...} + {Q2, Q1} + U1 + c
 	adc	U1, Q1
 	mov	-8(UP, UN, 8), U0
-	adc	Q2,8(QP, UN, 8)
+	adc	Q2, 8(QP, UN, 8)
 	jc	L(q_incr)
 L(q_incr_done):
 	add	%rax, U0
@@ -161,7 +172,7 @@
 	mov	Q1, (QP, UN, 8)
 	sbb 	U2, U2
 	dec	UN
-	mov	%rax, U1 
+	mov	%rax, U1
 	jnz	L(loop)
 
 L(final):
@@ -192,7 +203,7 @@
 	jc	L(div_done)
 	sub	D, %rax
 	add	$1, T
-L(div_done):	
+L(div_done):
 	add	T, Q0
 	mov	Q0, (QP)
 	adc	Q1, 8(QP)
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/x86_64/k8/div_qr_1n_pi1.asm
--- a/mpn/x86_64/k8/div_qr_1n_pi1.asm	Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/x86_64/k8/div_qr_1n_pi1.asm	Thu Oct 24 16:15:30 2013 +0200
@@ -25,7 +25,23 @@
 
 
 C		c/l
-C AMD K8,K10	11
+C AMD K8,K9	11
+C AMD K10	11
+C AMD bull	16
+C AMD pile	14.25
+C AMD steam	 ?
+C AMD bobcat	16
+C AMD jaguar	 ?
+C Intel P4	47.5	poor
+C Intel core	28.5	very poor
+C Intel NHM	29	very poor
+C Intel SBR	16	poor
+C Intel IBR	13.5
+C Intel HWL	12
+C Intel BWL	 ?
+C Intel atom	53	very poor
+C VIA nano	19
+
 
 C INPUT Parameters
 define(`QP', `%rdi')
@@ -96,7 +112,7 @@
 	neg	B2
 	mov	B2, B2md
 	sub	D, B2md
-	
+
 	C D not needed until final reduction
 	push	D
 	mov	UN_INPUT, UN	C Clobbers D
@@ -130,11 +146,11 @@
 	cmovc	DINV, Q1
 	mov	U2, Q2
 	neg	Q2
-	mul	DINV 
+	mul	DINV
 	add	%rdx, Q1
 	adc	$0, Q2
 	add	Q0, Q1
-	mov 	%rax, Q0
+	mov	%rax, Q0
 	mov	B2, %rax
 	lea	(B2md, U0), T
 	adc	$0, Q2
@@ -156,9 +172,9 @@
 	adc	%rdx, %rax
 	mov	Q1, (QP, UN, 8)
 	mov	$0, R32(Q1)
-	sbb 	U2, U2
+	sbb	U2, U2
 	dec	UN
-	mov	%rax, U1 
+	mov	%rax, U1
 	jnz	L(loop)
 
 L(final):
@@ -189,7 +205,7 @@
 	jc	L(div_done)
 	sub	D, %rax
 	add	$1, T
-L(div_done):	
+L(div_done):
 	add	T, Q0
 	mov	Q0, (QP)
 	adc	Q1, 8(QP)


More information about the gmp-commit mailing list