[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Oct 24 19:20:32 CEST 2013
details: /var/hg/gmp/rev/bea4c0f98ad2
changeset: 16076:bea4c0f98ad2
user: Niels M?ller <nisse at lysator.liu.se>
date: Thu Oct 24 19:04:42 2013 +0200
description:
Bugfixes to C mpn_div_qr_1n_pi1.
details: /var/hg/gmp/rev/69a493d7a8b5
changeset: 16077:69a493d7a8b5
user: Niels M?ller <nisse at lysator.liu.se>
date: Thu Oct 24 19:20:19 2013 +0200
description:
Bugfixes to x86_64 mpn_div_qr_1n_pi1.
diffstat:
ChangeLog | 8 ++++++++
mpn/generic/div_qr_1n_pi1.c | 7 ++++---
mpn/x86_64/div_qr_1n_pi1.asm | 6 ++++--
3 files changed, 16 insertions(+), 5 deletions(-)
diffs (83 lines):
diff -r 7ff4cca045e3 -r 69a493d7a8b5 ChangeLog
--- a/ChangeLog Thu Oct 24 16:15:30 2013 +0200
+++ b/ChangeLog Thu Oct 24 19:20:19 2013 +0200
@@ -1,3 +1,11 @@
+2013-10-24 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/x86_64/div_qr_1n_pi1.asm: Bugfixes, for case n == 1 and
+ in-place operation.
+
+ * mpn/generic/div_qr_1n_pi1.c (mpn_div_qr_1n_pi1): Bug fixes,
+ off-by-one MPN_INCR_U, and support for in-place operation.
+
2013-10-24 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86/fat/fat.c (fake_cpuid_table): Add Haswell.
diff -r 7ff4cca045e3 -r 69a493d7a8b5 mpn/generic/div_qr_1n_pi1.c
--- a/mpn/generic/div_qr_1n_pi1.c Thu Oct 24 16:15:30 2013 +0200
+++ b/mpn/generic/div_qr_1n_pi1.c Thu Oct 24 19:20:19 2013 +0200
@@ -202,9 +202,10 @@
umul_ppmm (p1, p0, B2, u1);
q1 += u1;
ASSERT (q1 >= u1);
+ u0 = up[n-1]; /* Early read, to allow qp == up. */
qp[n-1] = q1;
- add_mssaaaa (u2, u1, u0, up[n-1], up[n-2], p1, p0);
+ add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
/* FIXME: Keep q1 in a variable between iterations, to reduce number
of memory accesses. */
@@ -240,7 +241,7 @@
/* Final q update */
add_ssaaaa (q2, q1, q2, q1, 0, cy);
qp[j+1] = q1;
- MPN_INCR_U (qp+j+2, n-j-3, q2);
+ MPN_INCR_U (qp+j+2, n-j-2, q2);
add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
}
@@ -255,7 +256,7 @@
udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
add_ssaaaa (q1, q0, q1, q0, 0, t);
- MPN_INCR_U (qp + 1, n-2, q1);
+ MPN_INCR_U (qp+1, n-1, q1);
qp[0] = q0;
return u0;
diff -r 7ff4cca045e3 -r 69a493d7a8b5 mpn/x86_64/div_qr_1n_pi1.asm
--- a/mpn/x86_64/div_qr_1n_pi1.asm Thu Oct 24 16:15:30 2013 +0200
+++ b/mpn/x86_64/div_qr_1n_pi1.asm Thu Oct 24 19:20:19 2013 +0200
@@ -42,7 +42,7 @@
C Intel atom 52 very poor
C VIA nano 19
-
+
C INPUT Parameters
define(`QP', `%rdi')
define(`UP', `%rsi')
@@ -84,6 +84,7 @@
mov (UP), U0
add U0, %rax
adc T, %rdx
+ mov %rdx, T
imul D, %rdx
sub %rdx, U0
cmp U0, %rax
@@ -121,12 +122,13 @@
mul U1
mov %rax, Q0
add U1, %rdx
- mov %rdx, (QP, UN, 8)
+ mov %rdx, T
mov B2, %rax
mul U1
mov -8(UP, UN, 8), U0
mov (UP, UN, 8), U1
+ mov T, (QP, UN, 8)
add %rax, U0
adc %rdx, U1
sbb U2, U2
More information about the gmp-commit
mailing list