[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Feb 28 16:36:36 CET 2011
details: /var/hg/gmp/rev/4d0ad229b9c9
changeset: 13930:4d0ad229b9c9
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Feb 28 14:07:42 2011 +0100
description:
Remove file no longer used.
details: /var/hg/gmp/rev/2a0487ea817f
changeset: 13931:2a0487ea817f
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Feb 28 16:35:48 2011 +0100
description:
Shorten software pipeline.
details: /var/hg/gmp/rev/93333ff73967
changeset: 13932:93333ff73967
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Feb 28 16:35:52 2011 +0100
description:
*** empty log message ***
details: /var/hg/gmp/rev/ab657cfe1a12
changeset: 13933:ab657cfe1a12
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Feb 28 16:36:33 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 14 ++++
gmp-impl.h | 64 ++++++++++++---------
mpn/x86/atom/mul_basecase.asm | 23 --------
mpn/x86/atom/sse2/aorsmul_1.asm | 114 ++++++++++++++++++---------------------
tests/mpn/t-divrem_1.c | 5 +
5 files changed, 108 insertions(+), 112 deletions(-)
diffs (truncated from 313 to 300 lines):
diff -r be51c74c1ad9 -r ab657cfe1a12 ChangeLog
--- a/ChangeLog Mon Feb 28 13:37:39 2011 +0100
+++ b/ChangeLog Mon Feb 28 16:36:33 2011 +0100
@@ -1,5 +1,19 @@
+2011-02-28 Niels Möller <nisse at lysator.liu.se>
+
+ * gmp-impl.h (udiv_qrnnd_preinv3): Eliminated unpredictable branch
+ using masking logic. Further optimization of the nl == constant 0
+ case, similar to udiv_rnd_preinv.
+ (udiv_rnnd_preinv): Likewise.
+
+ * tests/mpn/t-divrem_1.c (check_data): Added testcase to exercise
+ the nl == constant 0 special case in udiv_qrnnd_preinv3.
+
2011-02-28 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/x86/atom/sse2/aorsmul_1.asm: Shorten software pipeline.
+
+ * mpn/x86/atom/mul_basecase.asm: Remove file no longer used.
+
* mpn/generic/rootrem.c (mpn_rootrem_internal): Delay O(log(U))
allocations until they are known to be needed.
diff -r be51c74c1ad9 -r ab657cfe1a12 gmp-impl.h
--- a/gmp-impl.h Mon Feb 28 13:37:39 2011 +0100
+++ b/gmp-impl.h Mon Feb 28 16:36:33 2011 +0100
@@ -2771,51 +2771,59 @@
*/
#define udiv_qrnnd_preinv3(q, r, nh, nl, d, di) \
do { \
- mp_limb_t _qh, _ql, _r; \
+ mp_limb_t _qh, _ql, _r, _mask; \
umul_ppmm (_qh, _ql, (nh), (di)); \
if (__builtin_constant_p (nl) && (nl) == 0) \
- _qh += (nh) + 1; \
+ { \
+ _qh += (nh) + 1; \
+ _r = - _qh * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _qh += _mask; \
+ _r += _mask & (d); \
+ } \
else \
- add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - _qh * (d); \
- if (_r > _ql) /* both > and >= should be OK */ \
{ \
- _r += (d); \
- _qh--; \
- } \
- if (UNLIKELY (_r >= (d))) \
- { \
- _r -= (d); \
- _qh++; \
+ add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
+ _r = (nl) - _qh * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _qh += _mask; \
+ _r += _mask & (d); \
+ if (UNLIKELY (_r >= (d))) \
+ { \
+ _r -= (d); \
+ _qh++; \
+ } \
} \
(r) = _r; \
(q) = _qh; \
} while (0)
-/* Unlike udiv_qrnnd_preinv, works also for nh == d.
-
- FIXME: The special case for nl = constant 0 could be simplified
- further, like in udiv_rnd_preinv below. Note that with nl = 0, the
- case _r >= d can't happen. Also applies to udiv_qrnnd_preinv
- above.
-
- FIXME: Use mask for adjustment? */
+/* Dividing (NH, NL) by D, returning the remainder only. Unlike
+ udiv_qrnnd_preinv, works also for the case NH == D, where the
+ quotient doesn't quite fit in a single limb. */
#define udiv_rnnd_preinv(r, nh, nl, d, di) \
do { \
- mp_limb_t _qh, _ql, _r; \
+ mp_limb_t _qh, _ql, _r, _mask; \
umul_ppmm (_qh, _ql, (nh), (di)); \
if (__builtin_constant_p (nl) && (nl) == 0) \
- _qh += (nh) + 1; \
+ { \
+ _r = ~(_qh + (nh)) * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _r += _mask & (d); \
+ } \
else \
- add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - _qh * (d); \
- if (_r > _ql) /* both > and >= should be OK */ \
- _r += (d); \
- if (UNLIKELY (_r >= (d))) \
- _r -= (d); \
+ { \
+ add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
+ _r = (nl) - _qh * (d); \
+ _mask = -(_r > _ql); /* both > and >= should be OK */ \
+ _r += _mask & (d); \
+ if (UNLIKELY (_r >= (d))) \
+ _r -= (d); \
+ } \
(r) = _r; \
} while (0)
+/* FIXME: Obsolete? Use udiv_rnnd_preinv(r, nh, 0, d, di) instead. */
/* Compute r = nh*B mod d, where di is the inverse of d. */
#define udiv_rnd_preinv(r, nh, d, di) \
do { \
diff -r be51c74c1ad9 -r ab657cfe1a12 mpn/x86/atom/mul_basecase.asm
--- a/mpn/x86/atom/mul_basecase.asm Mon Feb 28 13:37:39 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-dnl Intel Atom mpn_mul_basecase -- multiply two mpn numbers.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mul_basecase)
-include_mpn(`x86/p6/mul_basecase.asm')
diff -r be51c74c1ad9 -r ab657cfe1a12 mpn/x86/atom/sse2/aorsmul_1.asm
--- a/mpn/x86/atom/sse2/aorsmul_1.asm Mon Feb 28 13:37:39 2011 +0100
+++ b/mpn/x86/atom/sse2/aorsmul_1.asm Mon Feb 28 16:36:33 2011 +0100
@@ -67,100 +67,92 @@
mov %eax, n
and $1, %eax
jz L(fi0or2)
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ shr $2, n
+ jnc L(fi1)
+
+L(fi3): lea -8(up), up
+ lea -8(rp), rp
+ movd 12(up), %mm1
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ add $1, n C increment and clear carry
+ jmp L(lo3)
+
+L(fi1): movd %mm0, %ebx
+ jz L(wd1)
+ movd 4(up), %mm1
+ pmuludq %mm7, %mm1
+ jmp L(lo1)
+
+L(fi0or2):
movd (up), %mm1
pmuludq %mm7, %mm1
shr $2, n
- jnc L(fi1)
-
-L(fi3): lea 4(up), up
- lea -12(rp), rp
- movd %mm1, %ebx
- add $1, n C increment and clear carry
- movd (up), %mm0
- jmp L(lo3)
-
-L(fi1): lea -4(rp), rp
- movd %mm1, %ebx
- jz L(wd1)
movd 4(up), %mm0
+ jc L(fi2)
lea -4(up), up
+ lea -4(rp), rp
+ movd %mm1, %eax
pmuludq %mm7, %mm0
- jmp L(lo1)
-
-L(fi0or2):
- movd (up), %mm0
- pmuludq %mm7, %mm0
- shr $2, n
- movd 4(up), %mm1
- jc L(fi2)
- lea -8(up), up
- lea -8(rp), rp
- movd %mm0, %eax
- pmuludq %mm7, %mm1
jmp L(lo0)
-L(fi2): test n, n C clear carry
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- jnz L(lo2)
- jmp L(wd2)
+L(fi2): lea 4(up), up
+ add $1, n C increment and clear carry
+ movd %mm1, %eax
+ lea -12(rp), rp
+ jmp L(lo2)
C ALIGN(16) C alignment seems irrelevant
-L(top): adc $0, %edx
- ADDSUB %ebx, 12(rp)
- movd %mm0, %eax
+L(top): movd 4(up), %mm1
+ adc $0, %edx
+ ADDSUB %eax, 12(rp)
+ movd %mm0, %ebx
pmuludq %mm7, %mm1
lea 16(rp), rp
-L(lo2): psrlq $32, %mm0
- adc %edx, %eax
+L(lo1): psrlq $32, %mm0
+ adc %edx, %ebx
movd %mm0, %edx
- movd %mm1, %ebx
+ movd %mm1, %eax
movd 8(up), %mm0
pmuludq %mm7, %mm0
adc $0, %edx
- ADDSUB %eax, (rp)
-L(lo1): psrlq $32, %mm1
- adc %edx, %ebx
+ ADDSUB %ebx, (rp)
+L(lo0): psrlq $32, %mm1
+ adc %edx, %eax
movd %mm1, %edx
- movd %mm0, %eax
+ movd %mm0, %ebx
movd 12(up), %mm1
pmuludq %mm7, %mm1
adc $0, %edx
- ADDSUB %ebx, 4(rp)
-L(lo0): psrlq $32, %mm0
- adc %edx, %eax
+ ADDSUB %eax, 4(rp)
+L(lo3): psrlq $32, %mm0
+ adc %edx, %ebx
movd %mm0, %edx
- movd %mm1, %ebx
+ movd %mm1, %eax
lea 16(up), up
movd (up), %mm0
adc $0, %edx
- ADDSUB %eax, 8(rp)
-L(lo3): psrlq $32, %mm1
- adc %edx, %ebx
+ ADDSUB %ebx, 8(rp)
+L(lo2): psrlq $32, %mm1
+ adc %edx, %eax
movd %mm1, %edx
pmuludq %mm7, %mm0
dec n
- movd 4(up), %mm1
jnz L(top)
L(end): adc n, %edx C n is zero here
- ADDSUB %ebx, 12(rp)
+ ADDSUB %eax, 12(rp)
+ movd %mm0, %ebx
+ lea 16(rp), rp
+L(wd1): psrlq $32, %mm0
+ adc %edx, %ebx
movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
-L(wd2): psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- adc n, %edx
- ADDSUB %eax, (rp)
-L(wd1): psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %eax
adc n, %eax
- ADDSUB %ebx, 4(rp)
+ ADDSUB %ebx, (rp)
+ emms
adc n, %eax
- emms
pop %ebx
pop %esi
pop %edi
diff -r be51c74c1ad9 -r ab657cfe1a12 tests/mpn/t-divrem_1.c
--- a/tests/mpn/t-divrem_1.c Mon Feb 28 13:37:39 2011 +0100
More information about the gmp-commit
mailing list