[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Oct 24 16:15:35 CEST 2013
details: /var/hg/gmp/rev/4dd00926640f
changeset: 16071:4dd00926640f
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Oct 24 16:00:23 2013 +0200
description:
(fake_cpuid_table): Add Haswell.
details: /var/hg/gmp/rev/3daa3a7e1345
changeset: 16072:3daa3a7e1345
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Oct 24 16:07:05 2013 +0200
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/72f64126e566
changeset: 16073:72f64126e566
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Oct 24 16:09:12 2013 +0200
description:
Add larger c/l table.
details: /var/hg/gmp/rev/f88ca0884394
changeset: 16074:f88ca0884394
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Oct 24 16:09:37 2013 +0200
description:
Add larger c/l table.
details: /var/hg/gmp/rev/7ff4cca045e3
changeset: 16075:7ff4cca045e3
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Oct 24 16:15:30 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 4 ++++
mpn/generic/div_qr_1n_pi1.c | 4 ++--
mpn/x86/fat/fat.c | 1 +
mpn/x86_64/div_qr_1n_pi1.asm | 31 +++++++++++++++++++++----------
mpn/x86_64/k8/div_qr_1n_pi1.asm | 30 +++++++++++++++++++++++-------
5 files changed, 51 insertions(+), 19 deletions(-)
diffs (200 lines):
diff -r 971c85d53a3e -r 7ff4cca045e3 ChangeLog
--- a/ChangeLog Wed Oct 23 23:20:24 2013 +0200
+++ b/ChangeLog Thu Oct 24 16:15:30 2013 +0200
@@ -1,3 +1,7 @@
+2013-10-24 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86/fat/fat.c (fake_cpuid_table): Add Haswell.
+
2013-10-23 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/x86_64-defs.m4 (oplist): New define, data from `regnum'.
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/generic/div_qr_1n_pi1.c
--- a/mpn/generic/div_qr_1n_pi1.c Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/generic/div_qr_1n_pi1.c Thu Oct 24 16:15:30 2013 +0200
@@ -194,7 +194,7 @@
udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
return u1;
}
-
+
/* FIXME: Could be precomputed */
B2 = -d*dinv;
@@ -251,7 +251,7 @@
t = (u1 >= d);
q1 += t;
u1 -= (-t) & d;
-
+
udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
add_ssaaaa (q1, q0, q1, q0, 0, t);
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/x86/fat/fat.c
--- a/mpn/x86/fat/fat.c Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/x86/fat/fat.c Thu Oct 24 16:15:30 2013 +0200
@@ -68,6 +68,7 @@
{ "coreinhm", "GenuineIntel", MAKE_FMS (6, 0x1a) },
{ "coreiwsm", "GenuineIntel", MAKE_FMS (6, 0x25) },
{ "coreisbr", "GenuineIntel", MAKE_FMS (6, 0x2a) },
+ { "coreihwl", "GenuineIntel", MAKE_FMS (6, 0x3c) },
{ "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) },
{ "k5", "AuthenticAMD", MAKE_FMS (5, 0) },
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/x86_64/div_qr_1n_pi1.asm
--- a/mpn/x86_64/div_qr_1n_pi1.asm Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/x86_64/div_qr_1n_pi1.asm Thu Oct 24 16:15:30 2013 +0200
@@ -25,12 +25,23 @@
C c/l
-C AMD K8 13
+C AMD K8,K9 13
C AMD K10 13
-C Intel core2 19
-C Intel sbr 14.5-15
-C Intel nehalem 18
+C AMD bull 16.5
+C AMD pile 15
+C AMD steam ?
+C AMD bobcat 16
+C AMD jaguar ?
+C Intel P4 47 poor
+C Intel core 19.25
+C Intel NHM 18
+C Intel SBR 15 poor
+C Intel IBR 13
+C Intel HWL 11.7
+C Intel BWL ?
+C Intel atom 52 very poor
C VIA nano 19
+
C INPUT Parameters
define(`QP', `%rdi')
@@ -101,7 +112,7 @@
neg B2
mov B2, B2md
sub D, B2md
-
+
C D not needed until final reduction
push D
mov UN_INPUT, UN C Clobbers D
@@ -122,7 +133,7 @@
dec UN
mov U1, %rax
jz L(final)
-
+
ALIGN(16)
C Loop is 28 instructions, 30 decoder slots, should run in 10 cycles.
@@ -134,7 +145,7 @@
mov U2, Q2
and U2, Q1
neg Q2
- mul DINV
+ mul DINV
add %rdx, Q1
adc $0, Q2
add Q0, Q1
@@ -152,7 +163,7 @@
C {QP+UN, ...} <-- {QP+UN, ...} + {Q2, Q1} + U1 + c
adc U1, Q1
mov -8(UP, UN, 8), U0
- adc Q2,8(QP, UN, 8)
+ adc Q2, 8(QP, UN, 8)
jc L(q_incr)
L(q_incr_done):
add %rax, U0
@@ -161,7 +172,7 @@
mov Q1, (QP, UN, 8)
sbb U2, U2
dec UN
- mov %rax, U1
+ mov %rax, U1
jnz L(loop)
L(final):
@@ -192,7 +203,7 @@
jc L(div_done)
sub D, %rax
add $1, T
-L(div_done):
+L(div_done):
add T, Q0
mov Q0, (QP)
adc Q1, 8(QP)
diff -r 971c85d53a3e -r 7ff4cca045e3 mpn/x86_64/k8/div_qr_1n_pi1.asm
--- a/mpn/x86_64/k8/div_qr_1n_pi1.asm Wed Oct 23 23:20:24 2013 +0200
+++ b/mpn/x86_64/k8/div_qr_1n_pi1.asm Thu Oct 24 16:15:30 2013 +0200
@@ -25,7 +25,23 @@
C c/l
-C AMD K8,K10 11
+C AMD K8,K9 11
+C AMD K10 11
+C AMD bull 16
+C AMD pile 14.25
+C AMD steam ?
+C AMD bobcat 16
+C AMD jaguar ?
+C Intel P4 47.5 poor
+C Intel core 28.5 very poor
+C Intel NHM 29 very poor
+C Intel SBR 16 poor
+C Intel IBR 13.5
+C Intel HWL 12
+C Intel BWL ?
+C Intel atom 53 very poor
+C VIA nano 19
+
C INPUT Parameters
define(`QP', `%rdi')
@@ -96,7 +112,7 @@
neg B2
mov B2, B2md
sub D, B2md
-
+
C D not needed until final reduction
push D
mov UN_INPUT, UN C Clobbers D
@@ -130,11 +146,11 @@
cmovc DINV, Q1
mov U2, Q2
neg Q2
- mul DINV
+ mul DINV
add %rdx, Q1
adc $0, Q2
add Q0, Q1
- mov %rax, Q0
+ mov %rax, Q0
mov B2, %rax
lea (B2md, U0), T
adc $0, Q2
@@ -156,9 +172,9 @@
adc %rdx, %rax
mov Q1, (QP, UN, 8)
mov $0, R32(Q1)
- sbb U2, U2
+ sbb U2, U2
dec UN
- mov %rax, U1
+ mov %rax, U1
jnz L(loop)
L(final):
@@ -189,7 +205,7 @@
jc L(div_done)
sub D, %rax
add $1, T
-L(div_done):
+L(div_done):
add T, Q0
mov Q0, (QP)
adc Q1, 8(QP)
More information about the gmp-commit
mailing list