[Gmp-commit] /home/hgfiles/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Dec 22 01:48:03 CET 2009
details: /home/hgfiles/gmp/rev/49c36e32d2ae
changeset: 13171:49c36e32d2ae
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Dec 22 01:42:14 2009 +0100
description:
Simplify, trim allocation.
details: /home/hgfiles/gmp/rev/9c995e3f6f20
changeset: 13172:9c995e3f6f20
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Dec 22 01:46:02 2009 +0100
description:
Rewrite resp simplify itch functions.
diffstat:
ChangeLog | 8 ++++++++
mpn/generic/bdiv_qr.c | 21 +++++----------------
mpn/generic/mu_bdiv_q.c | 46 ++++++++++++++++++++++++++++++++++------------
mpn/generic/mu_bdiv_qr.c | 39 ++++++++++++++++++---------------------
4 files changed, 65 insertions(+), 49 deletions(-)
diffs (219 lines):
diff -r b75c1e49113f -r 9c995e3f6f20 ChangeLog
--- a/ChangeLog Mon Dec 21 22:09:09 2009 +0100
+++ b/ChangeLog Tue Dec 22 01:46:02 2009 +0100
@@ -1,3 +1,11 @@
+2009-12-22 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/mu_bdiv_q.c (mpn_mu_bdiv_q_itch): Rewrite.
+ * mpn/generic/mu_bdiv_qr.c (mpn_mu_bdiv_qr_itch): Simplify.
+
+ * mpn/generic/bdiv_qr.c (mpn_bdiv_qr): Simplify, don't allocate.
+ (mpn_bdiv_qr_itch): Conditionalise on MU_BDIV_QR_THRESHOLD.
+
2009-12-21 Torbjorn Granlund <tege at gmplib.org>
* mpn/generic/sbpi1_div_q.c: Fix fixup code for to work for qn = 0.
diff -r b75c1e49113f -r 9c995e3f6f20 mpn/generic/bdiv_qr.c
--- a/mpn/generic/bdiv_qr.c Mon Dec 21 22:09:09 2009 +0100
+++ b/mpn/generic/bdiv_qr.c Tue Dec 22 01:46:02 2009 +0100
@@ -44,7 +44,6 @@
BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
{
MPN_COPY (tp, np, nn);
- tp[nn] = 0;
binvert_limb (di, dp[0]); di = -di;
rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
MPN_COPY (rp, tp + nn - dn, dn);
@@ -52,23 +51,13 @@
else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
{
MPN_COPY (tp, np, nn);
- tp[nn] = 0;
binvert_limb (di, dp[0]); di = -di;
rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
MPN_COPY (rp, tp + nn - dn, dn);
}
else
{
- mp_size_t itch;
- mp_ptr scratch_out;
- TMP_DECL;
- TMP_MARK;
- itch = mpn_mu_bdiv_qr_itch (nn, dn);
- scratch_out = TMP_BALLOC_LIMBS (itch);
- MPN_COPY (tp, np, nn);
- tp[nn] = 0;
- rh = mpn_mu_bdiv_qr (qp, rp, tp, nn, dp, dn, scratch_out);
- TMP_FREE;
+ rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
}
return rh;
@@ -77,8 +66,8 @@
mp_size_t
mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
{
- mp_size_t itch;
- itch = mpn_mu_bdiv_qr_itch (nn, dn);
- itch = MAX (itch, nn + 1);
- return itch;
+ if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
+ return nn;
+ else
+ return mpn_mu_bdiv_qr_itch (nn, dn);
}
diff -r b75c1e49113f -r 9c995e3f6f20 mpn/generic/mu_bdiv_q.c
--- a/mpn/generic/mu_bdiv_q.c Mon Dec 21 22:09:09 2009 +0100
+++ b/mpn/generic/mu_bdiv_q.c Tue Dec 22 01:46:02 2009 +0100
@@ -101,12 +101,11 @@
should reduce itch to perhaps 3dn.
*/
- ip = scratch; /* in limbs */
- rp = scratch + in; /* dn limbs */
- tp = scratch + in + dn; /* dn + in limbs FIXME: mpn_fft_next_size */
- scratch += in; /* Roughly 2in+1 limbs */
+ ip = scratch; /* in limbs */
+ rp = scratch + in; /* MAX(binvert_itch(in),dn) limbs */
+ tp = scratch + in + dn; /* MAX(next_size(dn,k),dn+in) limbs */
- mpn_binvert (ip, dp, in, scratch);
+ mpn_binvert (ip, dp, in, rp);
cy = 0;
@@ -209,11 +208,10 @@
/* Compute half-sized inverse. */
in = qn - (qn >> 1);
- ip = scratch; /* ceil(qn/2) limbs */
- rp = scratch + in; /* ceil(qn/2)+qn limbs */
- scratch += in; /* 2*ceil(qn/2)+2 */
+ ip = scratch; /* in limbs */
+ rp = scratch + in; /* MAX(binvert_itch(in),next_size(qn,k),qn+in) limbs */
- mpn_binvert (ip, dp, in, scratch);
+ mpn_binvert (ip, dp, in, rp);
mpn_mullo_n (qp, np, ip, in); /* low `in' quotient limbs */
#if WANT_FFT
@@ -240,16 +238,40 @@
mp_size_t
mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
{
- mp_size_t qn;
+ mp_size_t qn, in, m, itch_invert;
+ mp_size_t b;
qn = nn;
if (qn > dn)
{
- return 4 * dn; /* FIXME FIXME FIXME need mpn_fft_next_size */
+ b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
+ in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+#if WANT_FFT
+ if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+ {
+ m = mpn_fft_next_size (dn, mpn_fft_best_k (dn, 0));
+ m = MAX (dn + in, m);
+ }
+ else
+#endif
+ m = dn + in;
+ itch_invert = mpn_binvert_itch (in);
+ return in + MAX (itch_invert, dn + m);
}
else
{
- return 2 * qn + 1 + 2; /* FIXME FIXME FIXME need mpn_fft_next_size */
+ in = qn - (qn >> 1);
+#if WANT_FFT
+ if (ABOVE_THRESHOLD (qn, MUL_FFT_MODF_THRESHOLD))
+ {
+ m = mpn_fft_next_size (qn, mpn_fft_best_k (qn, 0));
+ m = MAX (qn + in, m);
+ }
+ else
+#endif
+ m = qn + in;
+ itch_invert = mpn_binvert_itch (in);
+ return in + MAX (itch_invert, m);
}
}
diff -r b75c1e49113f -r 9c995e3f6f20 mpn/generic/mu_bdiv_qr.c
--- a/mpn/generic/mu_bdiv_qr.c Mon Dec 21 22:09:09 2009 +0100
+++ b/mpn/generic/mu_bdiv_qr.c Tue Dec 22 01:46:02 2009 +0100
@@ -100,11 +100,10 @@
should reduce itch to perhaps 3dn.
*/
- ip = scratch; /* in limbs */
- tp = scratch + in; /* dn + in limbs FIXME: mpn_fft_next_size */
- scratch += in; /* Roughly 2in+1 limbs */
+ ip = scratch; /* in limbs */
+ tp = scratch + in; /* MAX(binvert_itch(in),next_size(dn,k),dn+in) limbs */
- mpn_binvert (ip, dp, in, scratch);
+ mpn_binvert (ip, dp, in, tp);
MPN_COPY (rp, np, dn);
np += dn;
@@ -205,11 +204,10 @@
/* Compute half-sized inverse. */
in = qn - (qn >> 1);
- ip = scratch; /* ceil(qn/2) limbs */
- tp = scratch + in; /* dn + in limbs FIXME: mpn_fft_next_size */
- scratch += in; /* 2*ceil(qn/2)+2 */
+ ip = scratch; /* in limbs */
+ tp = scratch + in; /* MAX(binvert_itch(in),next_size(dn,k),dn+in) limbs */
- mpn_binvert (ip, dp, in, scratch);
+ mpn_binvert (ip, dp, in, tp);
mpn_mullo_n (qp, np, ip, in); /* low `in' quotient limbs */
#if WANT_FFT
@@ -269,7 +267,7 @@
mp_size_t
mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
{
- mp_size_t qn, in, m, itch_invert, itch;
+ mp_size_t qn, in, m, itch_invert;
mp_size_t b;
qn = nn - dn;
@@ -278,22 +276,21 @@
{
b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
- return in + mpn_binvert_itch (in);
}
else
{
in = qn - (qn >> 1);
+ }
+
#if WANT_FFT
- if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
- {
- m = mpn_fft_next_size (dn, mpn_fft_best_k (dn, 0));
- m = MAX (dn + in, m);
- }
- else
+ if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+ {
+ m = mpn_fft_next_size (dn, mpn_fft_best_k (dn, 0));
+ m = MAX (dn + in, m);
+ }
+ else
#endif
- m = dn + in;
- itch_invert = mpn_binvert_itch (in);
- itch = MAX (itch_invert, m);
- return in + itch;
- }
+ m = dn + in;
+ itch_invert = mpn_binvert_itch (in);
+ return in + MAX (itch_invert, m);
}
More information about the gmp-commit
mailing list