[Gmp-commit] /var/hg/gmp: 4 new changesets

Mon Feb 28 16:36:36 CET 2011

details:   /var/hg/gmp/rev/4d0ad229b9c9
changeset: 13930:4d0ad229b9c9
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Feb 28 14:07:42 2011 +0100
description:
Remove file no longer used.

details:   /var/hg/gmp/rev/2a0487ea817f
changeset: 13931:2a0487ea817f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Feb 28 16:35:48 2011 +0100
description:
Shorten software pipeline.

details:   /var/hg/gmp/rev/93333ff73967
changeset: 13932:93333ff73967
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Feb 28 16:35:52 2011 +0100
description:
*** empty log message ***

details:   /var/hg/gmp/rev/ab657cfe1a12
changeset: 13933:ab657cfe1a12
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Feb 28 16:36:33 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                       |   14 ++++
 gmp-impl.h                      |   64 ++++++++++++---------
 mpn/x86/atom/mul_basecase.asm   |   23 --------
 mpn/x86/atom/sse2/aorsmul_1.asm |  114 ++++++++++++++++++---------------------
 tests/mpn/t-divrem_1.c          |    5 +
 5 files changed, 108 insertions(+), 112 deletions(-)

diffs (truncated from 313 to 300 lines):

diff -r be51c74c1ad9 -r ab657cfe1a12 ChangeLog

--- a/ChangeLog	Mon Feb 28 13:37:39 2011 +0100
+++ b/ChangeLog	Mon Feb 28 16:36:33 2011 +0100
@@ -1,5 +1,19 @@
+2011-02-28  Niels Möller  <nisse at lysator.liu.se>
+
+	* gmp-impl.h (udiv_qrnnd_preinv3): Eliminated unpredictable branch
+	using masking logic. Further optimization of the nl == constant 0
+	case, similar to udiv_rnd_preinv.
+	(udiv_rnnd_preinv): Likewise.
+
+	* tests/mpn/t-divrem_1.c (check_data): Added testcase to exercise
+	the nl == constant 0 special case in udiv_qrnnd_preinv3.
+
 2011-02-28  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/x86/atom/sse2/aorsmul_1.asm: Shorten software pipeline.
+
+	* mpn/x86/atom/mul_basecase.asm: Remove file no longer used.
+
 	* mpn/generic/rootrem.c (mpn_rootrem_internal): Delay O(log(U))
 	allocations until they are known to be needed.
 
diff -r be51c74c1ad9 -r ab657cfe1a12 gmp-impl.h
--- a/gmp-impl.h	Mon Feb 28 13:37:39 2011 +0100
+++ b/gmp-impl.h	Mon Feb 28 16:36:33 2011 +0100
@@ -2771,51 +2771,59 @@
 */
 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)				\
   do {									\
-    mp_limb_t _qh, _ql, _r;						\
+    mp_limb_t _qh, _ql, _r, _mask;					\
     umul_ppmm (_qh, _ql, (nh), (di));					\
     if (__builtin_constant_p (nl) && (nl) == 0)				\
-      _qh += (nh) + 1;							\
+      {									\
+	_qh += (nh) + 1;						\
+	_r = - _qh * (d);						\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_qh += _mask;							\
+	_r += _mask & (d);						\
+      }									\
     else								\
-      add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));			\
-    _r = (nl) - _qh * (d);						\
-    if (_r > _ql)	/* both > and >= should be OK */		\
       {									\
-	_r += (d);							\
-	_qh--;								\
-      }									\
-    if (UNLIKELY (_r >= (d)))						\
-      {									\
-	_r -= (d);							\
-	_qh++;								\
+	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+	_r = (nl) - _qh * (d);						\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_qh += _mask;							\
+	_r += _mask & (d);						\
+	if (UNLIKELY (_r >= (d)))					\
+	  {								\
+	    _r -= (d);							\
+	    _qh++;							\
+	  }								\
       }									\
     (r) = _r;								\
     (q) = _qh;								\
   } while (0)
 
-/* Unlike udiv_qrnnd_preinv, works also for nh == d.
-
-   FIXME: The special case for nl = constant 0 could be simplified
-   further, like in udiv_rnd_preinv below. Note that with nl = 0, the
-   case _r >= d can't happen. Also applies to udiv_qrnnd_preinv
-   above.
-
-   FIXME: Use mask for adjustment? */
+/* Dividing (NH, NL) by D, returning the remainder only. Unlike
+   udiv_qrnnd_preinv, works also for the case NH == D, where the
+   quotient doesn't quite fit in a single limb. */
 #define udiv_rnnd_preinv(r, nh, nl, d, di)				\
   do {									\
-    mp_limb_t _qh, _ql, _r;						\
+    mp_limb_t _qh, _ql, _r, _mask;					\
     umul_ppmm (_qh, _ql, (nh), (di));					\
     if (__builtin_constant_p (nl) && (nl) == 0)				\
-      _qh += (nh) + 1;							\
+      {									\
+	_r = ~(_qh + (nh)) * (d);					\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_r += _mask & (d);						\
+      }									\
     else								\
-      add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));			\
-    _r = (nl) - _qh * (d);						\
-    if (_r > _ql)	/* both > and >= should be OK */		\
-      _r += (d);							\
-    if (UNLIKELY (_r >= (d)))						\
-      _r -= (d);							\
+      {									\
+	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+	_r = (nl) - _qh * (d);						\
+	_mask = -(_r > _ql);	/* both > and >= should be OK */	\
+	_r += _mask & (d);						\
+	if (UNLIKELY (_r >= (d)))					\
+	  _r -= (d);							\
+      }									\
     (r) = _r;								\
   } while (0)
 
+/* FIXME: Obsolete? Use udiv_rnnd_preinv(r, nh, 0, d, di) instead. */
 /* Compute r = nh*B mod d, where di is the inverse of d.  */
 #define udiv_rnd_preinv(r, nh, d, di)					\
   do {									\
diff -r be51c74c1ad9 -r ab657cfe1a12 mpn/x86/atom/mul_basecase.asm
--- a/mpn/x86/atom/mul_basecase.asm	Mon Feb 28 13:37:39 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-dnl  Intel Atom mpn_mul_basecase -- multiply two mpn numbers.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-dnl
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mul_basecase)
-include_mpn(`x86/p6/mul_basecase.asm')
diff -r be51c74c1ad9 -r ab657cfe1a12 mpn/x86/atom/sse2/aorsmul_1.asm
--- a/mpn/x86/atom/sse2/aorsmul_1.asm	Mon Feb 28 13:37:39 2011 +0100
+++ b/mpn/x86/atom/sse2/aorsmul_1.asm	Mon Feb 28 16:36:33 2011 +0100
@@ -67,100 +67,92 @@
 	mov	%eax, n
 	and	$1, %eax
 	jz	L(fi0or2)
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	shr	$2, n
+	jnc	L(fi1)
+
+L(fi3):	lea	-8(up), up
+	lea	-8(rp), rp
+	movd	12(up), %mm1
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	add	$1, n			C increment and clear carry
+	jmp	L(lo3)
+
+L(fi1):	movd	%mm0, %ebx
+	jz	L(wd1)
+	movd	4(up), %mm1
+	pmuludq	%mm7, %mm1
+	jmp	L(lo1)
+
+L(fi0or2):
 	movd	(up), %mm1
 	pmuludq	%mm7, %mm1
 	shr	$2, n
-	jnc	L(fi1)
-
-L(fi3):	lea	4(up), up
-	lea	-12(rp), rp
-	movd	%mm1, %ebx
-	add	$1, n			C increment and clear carry
-	movd	(up), %mm0
-	jmp	L(lo3)
-
-L(fi1):	lea	-4(rp), rp
-	movd	%mm1, %ebx
-	jz	L(wd1)
 	movd	4(up), %mm0
+	jc	L(fi2)
 	lea	-4(up), up
+	lea	-4(rp), rp
+	movd	%mm1, %eax
 	pmuludq	%mm7, %mm0
-	jmp	L(lo1)
-
-L(fi0or2):
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	shr	$2, n
-	movd	4(up), %mm1
-	jc	L(fi2)
-	lea	-8(up), up
-	lea	-8(rp), rp
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
 	jmp	L(lo0)
 
-L(fi2):	test	n, n			C clear carry
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	jnz	L(lo2)
-	jmp	L(wd2)
+L(fi2):	lea	4(up), up
+	add	$1, n			C increment and clear carry
+	movd	%mm1, %eax
+	lea	-12(rp), rp
+	jmp	L(lo2)
 
 C	ALIGN(16)			C alignment seems irrelevant
-L(top):	adc	$0, %edx
-	ADDSUB	%ebx, 12(rp)
-	movd	%mm0, %eax
+L(top):	movd	4(up), %mm1
+	adc	$0, %edx
+	ADDSUB	%eax, 12(rp)
+	movd	%mm0, %ebx
 	pmuludq	%mm7, %mm1
 	lea	16(rp), rp
-L(lo2):	psrlq	$32, %mm0
-	adc	%edx, %eax
+L(lo1):	psrlq	$32, %mm0
+	adc	%edx, %ebx
 	movd	%mm0, %edx
-	movd	%mm1, %ebx
+	movd	%mm1, %eax
 	movd	8(up), %mm0
 	pmuludq	%mm7, %mm0
 	adc	$0, %edx
-	ADDSUB	%eax, (rp)
-L(lo1):	psrlq	$32, %mm1
-	adc	%edx, %ebx
+	ADDSUB	%ebx, (rp)
+L(lo0):	psrlq	$32, %mm1
+	adc	%edx, %eax
 	movd	%mm1, %edx
-	movd	%mm0, %eax
+	movd	%mm0, %ebx
 	movd	12(up), %mm1
 	pmuludq	%mm7, %mm1
 	adc	$0, %edx
-	ADDSUB	%ebx, 4(rp)
-L(lo0):	psrlq	$32, %mm0
-	adc	%edx, %eax
+	ADDSUB	%eax, 4(rp)
+L(lo3):	psrlq	$32, %mm0
+	adc	%edx, %ebx
 	movd	%mm0, %edx
-	movd	%mm1, %ebx
+	movd	%mm1, %eax
 	lea	16(up), up
 	movd	(up), %mm0
 	adc	$0, %edx
-	ADDSUB	%eax, 8(rp)
-L(lo3):	psrlq	$32, %mm1
-	adc	%edx, %ebx
+	ADDSUB	%ebx, 8(rp)
+L(lo2):	psrlq	$32, %mm1
+	adc	%edx, %eax
 	movd	%mm1, %edx
 	pmuludq	%mm7, %mm0
 	dec	n
-	movd	4(up), %mm1
 	jnz	L(top)
 
 L(end):	adc	n, %edx			C n is zero here
-	ADDSUB	%ebx, 12(rp)
+	ADDSUB	%eax, 12(rp)
+	movd	%mm0, %ebx
+	lea	16(rp), rp
+L(wd1):	psrlq	$32, %mm0
+	adc	%edx, %ebx
 	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-L(wd2):	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	adc	n, %edx
-	ADDSUB	%eax, (rp)
-L(wd1):	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %eax
 	adc	n, %eax
-	ADDSUB	%ebx, 4(rp)
+	ADDSUB	%ebx, (rp)
+	emms
 	adc	n, %eax
-	emms
 	pop	%ebx
 	pop	%esi
 	pop	%edi
diff -r be51c74c1ad9 -r ab657cfe1a12 tests/mpn/t-divrem_1.c
--- a/tests/mpn/t-divrem_1.c	Mon Feb 28 13:37:39 2011 +0100