[Gmp-commit] /home/hgfiles/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun May 16 09:14:12 CEST 2010
details: /home/hgfiles/gmp/rev/ebdf8aa713d5
changeset: 13637:ebdf8aa713d5
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 16 08:45:51 2010 +0200
description:
Completely finish MOD_1_N tuning before tuning MOD_1U_TO_MOD_1_1_THRESHOLD.
details: /home/hgfiles/gmp/rev/569a84988892
changeset: 13638:569a84988892
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 16 09:06:56 2010 +0200
description:
Do sizeof tests for 32-bit and 64-bit ABI.
details: /home/hgfiles/gmp/rev/f06e8533c7b3
changeset: 13639:f06e8533c7b3
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 16 09:10:18 2010 +0200
description:
Minor cleanup.
details: /home/hgfiles/gmp/rev/8211734e5ef1
changeset: 13640:8211734e5ef1
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun May 16 09:14:05 2010 +0200
description:
Major overhaul of sparc64 mod_1.
diffstat:
ChangeLog | 18 +++
configure.in | 2 +
mpn/generic/divrem_1.c | 33 +++---
mpn/generic/mod_1.c | 13 +-
mpn/generic/mod_1_1.c | 12 +-
mpn/generic/mod_1_2.c | 16 +--
mpn/generic/mod_1_3.c | 14 +--
mpn/generic/mod_1_4.c | 16 +--
mpn/sparc64/mod_1.c | 59 ++++++++++++-
mpn/sparc64/mod_1_4.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++++
mpn/sparc64/sparc64.h | 18 +++
tune/tuneup.c | 12 +-
12 files changed, 359 insertions(+), 75 deletions(-)
diffs (truncated from 669 to 300 lines):
diff -r beaa098c547e -r 8211734e5ef1 ChangeLog
--- a/ChangeLog Fri May 14 11:40:58 2010 +0200
+++ b/ChangeLog Sun May 16 09:14:05 2010 +0200
@@ -1,3 +1,21 @@
+2010-05-16 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/sparc64/mod_1.c: Rewrite.
+ * mpn/sparc64/sparc64.h (umul_ppmm_s): New macro.
+ * mpn/sparc64/mod_1_4.c: New file.
+
+ * mpn/generic/divrem_1.c: Minor cleanup.
+ * mpn/generic/mod_1.c: Likewise.
+ * mpn/generic/mod_1_1.c: Likewise.
+ * mpn/generic/mod_1_2.c: Likewise.
+ * mpn/generic/mod_1_3.c: Likewise.
+ * mpn/generic/mod_1_4.c: Likewise.
+
+ * configure.in (ia64-hpux): Do sizeof tests for 32-bit and 64-bit ABI.
+
+ * tune/tuneup.c (tune_mod_1): Completely finish MOD_1_N tuning before
+ tuning MOD_1U_TO_MOD_1_1_THRESHOLD.
+
2010-05-14 Torbjorn Granlund <tege at gmplib.org>
* mpn/generic/redc_2.c: Use asm code just for GNU C.
diff -r beaa098c547e -r 8211734e5ef1 configure.in
--- a/configure.in Fri May 14 11:40:58 2010 +0200
+++ b/configure.in Sun May 16 09:14:05 2010 +0200
@@ -683,6 +683,7 @@
abilist="64"
GMP_INCLUDE_MPN(ia64/ia64-defs.m4)
SPEED_CYCLECOUNTER_OBJ=ia64.lo
+ any_testlist="sizeof-long-4"
case $host_cpu in
itanium) path="ia64/itanium ia64" ;;
@@ -716,6 +717,7 @@
# let us use whatever seems to work.
#
abilist="32 64"
+ any_64_testlist="sizeof-long-8"
cclist_32="gcc cc"
path_32="ia64"
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/divrem_1.c
--- a/mpn/generic/divrem_1.c Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/divrem_1.c Sun May 16 09:14:05 2010 +0200
@@ -156,7 +156,7 @@
else
{
/* Most significant bit of divisor == 0. */
- int norm;
+ int cnt;
/* Skip a division if high < divisor (high quotient 0). Testing here
before normalizing will still skip as often as possible. */
@@ -178,28 +178,28 @@
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
goto plain;
- count_leading_zeros (norm, d);
- d <<= norm;
- r <<= norm;
+ count_leading_zeros (cnt, d);
+ d <<= cnt;
+ r <<= cnt;
if (UDIV_NEEDS_NORMALIZATION
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
{
+ mp_limb_t nshift;
if (un != 0)
{
n1 = up[un - 1] << GMP_NAIL_BITS;
- r |= (n1 >> (GMP_LIMB_BITS - norm));
+ r |= (n1 >> (GMP_LIMB_BITS - cnt));
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd (*qp, r, r,
- (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
- d);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd (*qp, r, r, nshift, d);
r >>= GMP_NAIL_BITS;
qp--;
n1 = n0;
}
- udiv_qrnnd (*qp, r, r, n1 << norm, d);
+ udiv_qrnnd (*qp, r, r, n1 << cnt, d);
r >>= GMP_NAIL_BITS;
qp--;
}
@@ -209,27 +209,26 @@
r >>= GMP_NAIL_BITS;
qp--;
}
- return r >> norm;
+ return r >> cnt;
}
else
{
- mp_limb_t dinv;
+ mp_limb_t dinv, nshift;
invert_limb (dinv, d);
if (un != 0)
{
n1 = up[un - 1] << GMP_NAIL_BITS;
- r |= (n1 >> (GMP_LIMB_BITS - norm));
+ r |= (n1 >> (GMP_LIMB_BITS - cnt));
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd_preinv (*qp, r, r,
- ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
- d, dinv);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
n1 = n0;
}
- udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
+ udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
}
@@ -239,7 +238,7 @@
r >>= GMP_NAIL_BITS;
qp--;
}
- return r >> norm;
+ return r >> cnt;
}
}
}
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1.c
--- a/mpn/generic/mod_1.c Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1.c Sun May 16 09:14:05 2010 +0200
@@ -118,12 +118,12 @@
if (UDIV_NEEDS_NORMALIZATION
&& BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
{
+ mp_limb_t nshift;
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd (dummy, r, r,
- (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
- d);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd (dummy, r, r, nshift, d);
r >>= GMP_NAIL_BITS;
n1 = n0;
}
@@ -133,15 +133,14 @@
}
else
{
- mp_limb_t inv;
+ mp_limb_t inv, nshift;
invert_limb (inv, d);
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd_preinv (dummy, r, r,
- (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
- d, inv);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd_preinv (dummy, r, r, nshift, d, inv);
r >>= GMP_NAIL_BITS;
n1 = n0;
}
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_1.c Sun May 16 09:14:05 2010 +0200
@@ -67,15 +67,9 @@
B1modb = bmodb[2];
B2modb = bmodb[3];
-#if 1
- umul_ppmm (ph, pl, ap[n - 1], B1modb);
+ rl = ap[n - 1];
+ umul_ppmm (ph, pl, rl, B1modb);
add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
-#else
- /* FIXME: We could avoid the above multiply when n > 2, i.e., we're about to
- enter the loop. But the post loop code assumes rh is reduced. */
- rh = ap[n - 1];
- rl = ap[n - 2];
-#endif
for (i = n - 3; i >= 0; i -= 1)
{
@@ -90,8 +84,8 @@
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
+ cnt = bmodb[1];
bi = bmodb[0];
- cnt = bmodb[1];
if (LIKELY (cnt != 0))
rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_2.c
--- a/mpn/generic/mod_1_2.c Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_2.c Sun May 16 09:14:05 2010 +0200
@@ -9,7 +9,7 @@
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -123,19 +123,13 @@
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
- bi = cps[0];
- cnt = cps[1];
-
-#if 1
umul_ppmm (rh, cl, rh, B1modb);
add_ssaaaa (rh, rl, rh, rl, 0, cl);
+
+ cnt = cps[1];
+ bi = cps[0];
+
r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
- udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
- (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
- ASSERT (q <= 2); /* optimize for small quotient? */
-#endif
-
udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
return r >> cnt;
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_3.c
--- a/mpn/generic/mod_1_3.c Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_3.c Sun May 16 09:14:05 2010 +0200
@@ -130,19 +130,13 @@
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
- bi = cps[0];
- cnt = cps[1];
-
-#if 1
umul_ppmm (rh, cl, rh, B1modb);
add_ssaaaa (rh, rl, rh, rl, 0, cl);
+
+ cnt = cps[1];
+ bi = cps[0];
+
r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
- udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
- (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
- ASSERT (q <= 3); /* optimize for small quotient? */
-#endif
-
udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
return r >> cnt;
diff -r beaa098c547e -r 8211734e5ef1 mpn/generic/mod_1_4.c
--- a/mpn/generic/mod_1_4.c Fri May 14 11:40:58 2010 +0200
+++ b/mpn/generic/mod_1_4.c Sun May 16 09:14:05 2010 +0200
@@ -1,4 +1,4 @@
-/* mpn_mod_1s_3p (ap, n, b, cps)
+/* mpn_mod_1s_4p (ap, n, b, cps)
Divide (ap,,n) by b. Return the single-limb remainder.
Requires that d < B / 4.
@@ -143,19 +143,13 @@
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
- bi = cps[0];
- cnt = cps[1];
-
-#if 1
umul_ppmm (rh, cl, rh, B1modb);
add_ssaaaa (rh, rl, rh, rl, 0, cl);
+
+ cnt = cps[1];
+ bi = cps[0];
+
r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
- udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
- (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
- ASSERT (q <= 4); /* optimize for small quotient? */
-#endif
-
udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
More information about the gmp-commit
mailing list