[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Feb 28 16:46:40 CET 2011
details: /var/hg/gmp/rev/455b3b13f11b
changeset: 13935:455b3b13f11b
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 16:19:07 2011 +0100
description:
Use udiv_rnnd_preinv instead of udiv_qrnnd_preinv.
details: /var/hg/gmp/rev/e59e8baec625
changeset: 13936:e59e8baec625
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 16:23:29 2011 +0100
description:
Use udiv_rnnd_preinv.
details: /var/hg/gmp/rev/a55a42c2d350
changeset: 13937:a55a42c2d350
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 16:27:41 2011 +0100
description:
Use udiv_rnnd_preinv.
details: /var/hg/gmp/rev/fed7457c4430
changeset: 13938:fed7457c4430
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 16:42:53 2011 +0100
description:
Use udiv_rnnd_preinv.
details: /var/hg/gmp/rev/ff7cd9c674da
changeset: 13939:ff7cd9c674da
user: Niels M?ller <nisse at lysator.liu.se>
date: Mon Feb 28 16:46:37 2011 +0100
description:
Trivial merge
diffstat:
ChangeLog | 28 +++++++++
mpn/generic/mod_1.c | 6 +-
mpn/generic/mod_1_1.c | 6 +-
mpn/generic/mod_1_2.c | 10 +-
mpn/generic/mod_1_3.c | 10 +-
mpn/generic/mod_1_4.c | 12 +-
mpn/x86/atom/mul_basecase.asm | 23 -------
mpn/x86/atom/sse2/aorsmul_1.asm | 114 +++++++++++++++++-------------------
mpn/x86/atom/sse2/mul_basecase.asm | 6 +-
9 files changed, 107 insertions(+), 108 deletions(-)
diffs (truncated from 424 to 300 lines):
diff -r 00cf5f4b2e9f -r ff7cd9c674da ChangeLog
--- a/ChangeLog Mon Feb 28 15:10:48 2011 +0100
+++ b/ChangeLog Mon Feb 28 16:46:37 2011 +0100
@@ -1,5 +1,29 @@
2011-02-28 Niels Möller <nisse at lysator.liu.se>
+ * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Converted to use
+ udiv_rnnd_preinv rather than udiv_rnd_preinv.
+ (mpn_mod_1_1p): Use udiv_rnnd_preinv rather than
+ udiv_qrnnd_preinv.
+
+ * mpn/generic/mod_1_4.c (mpn_mod_1s_4p_cps): Converted to use
+ udiv_rnnd_preinv rather than udiv_rnd_preinv.
+ (mpn_mod_1s_4p): Use udiv_rnnd_preinv rather than
+ udiv_qrnnd_preinv.
+
+ * mpn/generic/mod_1_3.c (mpn_mod_1s_3p_cps): Converted to use
+ udiv_rnnd_preinv rather than udiv_rnd_preinv.
+ (mpn_mod_1s_3p): Use udiv_rnnd_preinv rather than
+ udiv_qrnnd_preinv.
+
+ * mpn/generic/mod_1_2.c (mpn_mod_1s_2p_cps): Converted to use
+ udiv_rnnd_preinv rather than udiv_rnd_preinv.
+ (mpn_mod_1s_2p): Use udiv_rnnd_preinv rather than
+ udiv_qrnnd_preinv.
+
+ * mpn/generic/mod_1.c (mpn_mod_1_unnorm): Use udiv_rnnd_preinv
+ rather than udiv_qrnnd_preinv.
+ (mpn_mod_1_norm): Likewise.
+
* gmp-impl.h (udiv_qrnnd_preinv3): Eliminated unpredictable branch
using masking logic. Further optimization of the nl == constant 0
case, similar to udiv_rnd_preinv.
@@ -10,6 +34,10 @@
2011-02-28 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/x86/atom/sse2/aorsmul_1.asm: Shorten software pipeline.
+
+ * mpn/x86/atom/mul_basecase.asm: Remove file no longer used.
+
* mpn/generic/rootrem.c (mpn_rootrem_internal): Delay O(log(U))
allocations until they are known to be needed.
diff -r 00cf5f4b2e9f -r ff7cd9c674da mpn/generic/mod_1.c
--- a/mpn/generic/mod_1.c Mon Feb 28 15:10:48 2011 +0100
+++ b/mpn/generic/mod_1.c Mon Feb 28 16:46:37 2011 +0100
@@ -140,11 +140,11 @@
{
n0 = up[i] << GMP_NAIL_BITS;
nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
- udiv_qrnnd_preinv (dummy, r, r, nshift, d, inv);
+ udiv_rnnd_preinv (r, r, nshift, d, inv);
r >>= GMP_NAIL_BITS;
n1 = n0;
}
- udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
+ udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
r >>= GMP_NAIL_BITS;
return r >> cnt;
}
@@ -190,7 +190,7 @@
for (i = un - 1; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
+ udiv_rnnd_preinv (r, r, n0, d, inv);
r >>= GMP_NAIL_BITS;
}
return r;
diff -r 00cf5f4b2e9f -r ff7cd9c674da mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c Mon Feb 28 15:10:48 2011 +0100
+++ b/mpn/generic/mod_1_1.c Mon Feb 28 16:46:37 2011 +0100
@@ -55,14 +55,14 @@
* B2modb = - b * bi;
* ASSERT (B2modb <= b); // NB: equality iff b = B/2
*/
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
cps[3] = B2modb >> cnt;
}
mp_limb_t
mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
{
- mp_limb_t rh, rl, bi, q, ph, pl, r;
+ mp_limb_t rh, rl, bi, ph, pl, r;
mp_limb_t B1modb, B2modb;
mp_size_t i;
int cnt;
@@ -99,7 +99,7 @@
mask = -(mp_limb_t) (rh >= b);
rh -= mask & b;
- udiv_qrnnd_preinv (q, r, rh, rl << cnt, b, bi);
+ udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
return r >> cnt;
}
diff -r 00cf5f4b2e9f -r ff7cd9c674da mpn/generic/mod_1_2.c
--- a/mpn/generic/mod_1_2.c Mon Feb 28 15:10:48 2011 +0100
+++ b/mpn/generic/mod_1_2.c Mon Feb 28 16:46:37 2011 +0100
@@ -51,10 +51,10 @@
ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
cps[2] = B1modb >> cnt;
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
cps[3] = B2modb >> cnt;
- udiv_rnd_preinv (B3modb, B2modb, b, bi);
+ udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
cps[4] = B3modb >> cnt;
#if WANT_ASSERT
@@ -73,7 +73,7 @@
mp_limb_t
mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
{
- mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
mp_limb_t B1modb, B2modb, B3modb;
mp_size_t i;
int cnt;
@@ -91,7 +91,7 @@
rl = ap[n - 1];
bi = cps[0];
cnt = cps[1];
- udiv_qrnnd_preinv (q, r, rl >> (GMP_LIMB_BITS - cnt),
+ udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
rl << cnt, b, bi);
return r >> cnt;
}
@@ -132,7 +132,7 @@
bi = cps[0];
r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
- udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
return r >> cnt;
}
diff -r 00cf5f4b2e9f -r ff7cd9c674da mpn/generic/mod_1_3.c
--- a/mpn/generic/mod_1_3.c Mon Feb 28 15:10:48 2011 +0100
+++ b/mpn/generic/mod_1_3.c Mon Feb 28 16:46:37 2011 +0100
@@ -46,9 +46,9 @@
B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
- udiv_rnd_preinv (B3modb, B2modb, b, bi);
- udiv_rnd_preinv (B4modb, B3modb, b, bi);
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
+ udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
+ udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
cps[0] = bi;
cps[1] = cnt;
@@ -73,7 +73,7 @@
mp_limb_t
mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
{
- mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
mp_limb_t B1modb, B2modb, B3modb, B4modb;
mp_size_t i;
int cnt;
@@ -137,7 +137,7 @@
bi = cps[0];
r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
- udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
return r >> cnt;
}
diff -r 00cf5f4b2e9f -r ff7cd9c674da mpn/generic/mod_1_4.c
--- a/mpn/generic/mod_1_4.c Mon Feb 28 15:10:48 2011 +0100
+++ b/mpn/generic/mod_1_4.c Mon Feb 28 16:46:37 2011 +0100
@@ -51,16 +51,16 @@
ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
cps[2] = B1modb >> cnt;
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
cps[3] = B2modb >> cnt;
- udiv_rnd_preinv (B3modb, B2modb, b, bi);
+ udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
cps[4] = B3modb >> cnt;
- udiv_rnd_preinv (B4modb, B3modb, b, bi);
+ udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
cps[5] = B4modb >> cnt;
- udiv_rnd_preinv (B5modb, B4modb, b, bi);
+ udiv_rnnd_preinv (B5modb, B4modb, 0, b, bi);
cps[6] = B5modb >> cnt;
#if WANT_ASSERT
@@ -79,7 +79,7 @@
mp_limb_t
mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
{
- mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
mp_size_t i;
int cnt;
@@ -154,7 +154,7 @@
bi = cps[0];
r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
- udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
return r >> cnt;
}
diff -r 00cf5f4b2e9f -r ff7cd9c674da mpn/x86/atom/mul_basecase.asm
--- a/mpn/x86/atom/mul_basecase.asm Mon Feb 28 15:10:48 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-dnl Intel Atom mpn_mul_basecase -- multiply two mpn numbers.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mul_basecase)
-include_mpn(`x86/p6/mul_basecase.asm')
diff -r 00cf5f4b2e9f -r ff7cd9c674da mpn/x86/atom/sse2/aorsmul_1.asm
--- a/mpn/x86/atom/sse2/aorsmul_1.asm Mon Feb 28 15:10:48 2011 +0100
+++ b/mpn/x86/atom/sse2/aorsmul_1.asm Mon Feb 28 16:46:37 2011 +0100
@@ -67,100 +67,92 @@
mov %eax, n
and $1, %eax
jz L(fi0or2)
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ shr $2, n
+ jnc L(fi1)
+
+L(fi3): lea -8(up), up
+ lea -8(rp), rp
+ movd 12(up), %mm1
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ add $1, n C increment and clear carry
+ jmp L(lo3)
+
+L(fi1): movd %mm0, %ebx
+ jz L(wd1)
+ movd 4(up), %mm1
+ pmuludq %mm7, %mm1
+ jmp L(lo1)
+
+L(fi0or2):
movd (up), %mm1
pmuludq %mm7, %mm1
shr $2, n
- jnc L(fi1)
-
-L(fi3): lea 4(up), up
- lea -12(rp), rp
- movd %mm1, %ebx
- add $1, n C increment and clear carry
- movd (up), %mm0
- jmp L(lo3)
-
-L(fi1): lea -4(rp), rp
- movd %mm1, %ebx
- jz L(wd1)
movd 4(up), %mm0
+ jc L(fi2)
lea -4(up), up
+ lea -4(rp), rp
+ movd %mm1, %eax
pmuludq %mm7, %mm0
- jmp L(lo1)
-
-L(fi0or2):
- movd (up), %mm0
- pmuludq %mm7, %mm0
- shr $2, n
- movd 4(up), %mm1
More information about the gmp-commit
mailing list