[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Oct 24 19:20:32 CEST 2013


details:   /var/hg/gmp/rev/bea4c0f98ad2
changeset: 16076:bea4c0f98ad2
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Thu Oct 24 19:04:42 2013 +0200
description:
Bugfixes to C mpn_div_qr_1n_pi1.

details:   /var/hg/gmp/rev/69a493d7a8b5
changeset: 16077:69a493d7a8b5
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Thu Oct 24 19:20:19 2013 +0200
description:
Bugfixes to x86_64 mpn_div_qr_1n_pi1.

diffstat:

 ChangeLog                    |  8 ++++++++
 mpn/generic/div_qr_1n_pi1.c  |  7 ++++---
 mpn/x86_64/div_qr_1n_pi1.asm |  6 ++++--
 3 files changed, 16 insertions(+), 5 deletions(-)

diffs (83 lines):

diff -r 7ff4cca045e3 -r 69a493d7a8b5 ChangeLog
--- a/ChangeLog	Thu Oct 24 16:15:30 2013 +0200
+++ b/ChangeLog	Thu Oct 24 19:20:19 2013 +0200
@@ -1,3 +1,11 @@
+2013-10-24  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/x86_64/div_qr_1n_pi1.asm: Bugfixes, for case n == 1 and
+	in-place operation.
+
+	* mpn/generic/div_qr_1n_pi1.c (mpn_div_qr_1n_pi1): Bug fixes,
+	off-by-one MPN_INCR_U, and support for in-place operation.
+
 2013-10-24  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86/fat/fat.c (fake_cpuid_table): Add Haswell.
diff -r 7ff4cca045e3 -r 69a493d7a8b5 mpn/generic/div_qr_1n_pi1.c
--- a/mpn/generic/div_qr_1n_pi1.c	Thu Oct 24 16:15:30 2013 +0200
+++ b/mpn/generic/div_qr_1n_pi1.c	Thu Oct 24 19:20:19 2013 +0200
@@ -202,9 +202,10 @@
   umul_ppmm (p1, p0, B2, u1);
   q1 += u1;
   ASSERT (q1 >= u1);
+  u0 = up[n-1];	/* Early read, to allow qp == up. */
   qp[n-1] = q1;
 
-  add_mssaaaa (u2, u1, u0, up[n-1], up[n-2], p1, p0);
+  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
 
   /* FIXME: Keep q1 in a variable between iterations, to reduce number
      of memory accesses. */
@@ -240,7 +241,7 @@
       /* Final q update */
       add_ssaaaa (q2, q1, q2, q1, 0, cy);
       qp[j+1] = q1;
-      MPN_INCR_U (qp+j+2, n-j-3, q2);
+      MPN_INCR_U (qp+j+2, n-j-2, q2);
 
       add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
     }
@@ -255,7 +256,7 @@
   udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
   add_ssaaaa (q1, q0, q1, q0, 0, t);
 
-  MPN_INCR_U (qp + 1, n-2, q1);
+  MPN_INCR_U (qp+1, n-1, q1);
 
   qp[0] = q0;
   return u0;
diff -r 7ff4cca045e3 -r 69a493d7a8b5 mpn/x86_64/div_qr_1n_pi1.asm
--- a/mpn/x86_64/div_qr_1n_pi1.asm	Thu Oct 24 16:15:30 2013 +0200
+++ b/mpn/x86_64/div_qr_1n_pi1.asm	Thu Oct 24 19:20:19 2013 +0200
@@ -42,7 +42,7 @@
 C Intel atom	52	very poor
 C VIA nano	19
 
-	
+
 C INPUT Parameters
 define(`QP', `%rdi')
 define(`UP', `%rsi')
@@ -84,6 +84,7 @@
 	mov	(UP), U0
 	add	U0, %rax
 	adc	T, %rdx
+	mov	%rdx, T
 	imul	D, %rdx
 	sub	%rdx, U0
 	cmp	U0, %rax
@@ -121,12 +122,13 @@
 	mul	U1
 	mov	%rax, Q0
 	add	U1, %rdx
-	mov	%rdx, (QP, UN, 8)
+	mov	%rdx, T
 
 	mov	B2, %rax
 	mul	U1
 	mov	-8(UP, UN, 8), U0
 	mov	(UP, UN, 8), U1
+	mov	T, (QP, UN, 8)
 	add	%rax, U0
 	adc	%rdx, U1
 	sbb	U2, U2


More information about the gmp-commit mailing list