[Gmp-commit] /home/hgfiles/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Jan 6 05:07:21 CET 2010
details: /home/hgfiles/gmp/rev/1da421deae4b
changeset: 13325:1da421deae4b
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jan 06 04:56:43 2010 +0100
description:
Actually make dividend constant.
details: /home/hgfiles/gmp/rev/0e958868b7b2
changeset: 13326:0e958868b7b2
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jan 06 04:57:47 2010 +0100
description:
Align comments.
details: /home/hgfiles/gmp/rev/28c191d22b52
changeset: 13327:28c191d22b52
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jan 06 05:06:44 2010 +0100
description:
New function mpn_div_q.
details: /home/hgfiles/gmp/rev/b3b1ce7d08fb
changeset: 13328:b3b1ce7d08fb
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Jan 06 05:07:16 2010 +0100
description:
Test mpn_div_q.
diffstat:
ChangeLog | 10 +
configure.in | 2 +-
gmp-impl.h | 5 +-
mpn/generic/div_q.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++
mpn/generic/mu_div_q.c | 2 +-
mpn/generic/tdiv_qr.c | 2 +-
tests/mpn/t-div.c | 45 +++++++-
7 files changed, 334 insertions(+), 8 deletions(-)
diffs (truncated from 444 to 300 lines):
diff -r 1bc916e4e393 -r b3b1ce7d08fb ChangeLog
--- a/ChangeLog Tue Jan 05 03:05:51 2010 +0100
+++ b/ChangeLog Wed Jan 06 05:07:16 2010 +0100
@@ -1,3 +1,13 @@
+2010-01-06 Torbjorn Granlund <tege at gmplib.org>
+
+ * tests/mpn/t-div.c: Test mpn_div_q.
+ (SIZE_LOG): Up to 17.
+
+ * mpn/generic/div_q.c: New file.
+ * configure.in (gmp_mpn_functions): Add div_q.
+
+ * mpn/generic/mu_div_q.c: Actually make dividend constant.
+
2010-01-04 Torbjorn Granlund <tege at gmplib.org>
* tune/tuneup.c (fft): Separate tuning of modf and full products.
diff -r 1bc916e4e393 -r b3b1ce7d08fb configure.in
--- a/configure.in Tue Jan 05 03:05:51 2010 +0100
+++ b/configure.in Wed Jan 06 05:07:16 2010 +0100
@@ -2501,7 +2501,7 @@
perfsqr perfpow \
gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step \
gcdext_lehmer gcdext_subdiv_step \
- tdiv_qr jacbase get_d \
+ div_q tdiv_qr jacbase get_d \
matrix22_mul hgcd2 hgcd mullo_n mullo_basecase \
toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
toom33_mul toom43_mul toom53_mul toom63_mul \
diff -r 1bc916e4e393 -r b3b1ce7d08fb gmp-impl.h
--- a/gmp-impl.h Tue Jan 05 03:05:51 2010 +0100
+++ b/gmp-impl.h Wed Jan 06 05:07:16 2010 +0100
@@ -1199,10 +1199,13 @@
__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_mu_div_q __MPN(mu_div_q)
-__GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_mu_div_q_itch __MPN(mu_div_q_itch)
__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+#define mpn_div_q __MPN(div_q)
+__GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
#define mpn_invert __MPN(invert)
__GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
#define mpn_invert_itch(n) mpn_invertappr_itch(n)
diff -r 1bc916e4e393 -r b3b1ce7d08fb mpn/generic/div_q.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_q.c Wed Jan 06 05:07:16 2010 +0100
@@ -0,0 +1,276 @@
+/* mpn_div_q -- division for arbitrary size operands.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Compute Q = N/D with truncation.
+ N = {np,nn}
+ D = {dp,dn}
+ Q = {qp,nn-dn+1}
+ T = {scratch,nn+1} is scratch space
+ N and D are both untouched by the computation.
+ N and T may overlap; pass the same space if N is irrelevant after the call,
+ but note that tp needs an extra limb.
+
+ Operand requirements:
+ N >= D > 0
+ dp[dn-1] != 0
+ No overlap between the N, D, and Q areas.
+
+ This division function does not clobber its input operands, since it is
+ intended to support average-O(qn) divison, and for that to be effective, it
+ cannot put requirements on callers to copy a O(nn) operand.
+
+ If a caller does not care about the value of {np,nn+1} after calling this
+ function, it should pass np also for the scratch argument. This function
+ will then save some time and space by avoiding allocation and copying.
+ (FIXME: Is this a good design?)
+
+ We write nn-dn+1 limbs for the quotient, but return void. Why not return
+ the most significant quotient limb? Look at the 4 main code blocks below
+ (consisting of an outer if-else where each arm contains an if-else). It is
+ tricky for the first code block, since the mpn_*_div_q calls will typically
+ generate all nn-dn+1 and return 0. I don't see how to fix that unless we
+ generate the most significant quotient limb here, before calling
+ mpn_*_div_q.
+*/
+
+/* FUDGE determines when to try getting an approximate quotient from the upper
+ parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2
+ for the code to be correct. */
+#define FUDGE 2
+
+#define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD
+#define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD
+#ifndef MUPI_DIVAPPR_Q_THRESHOLD
+#define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD
+#endif
+
+void
+mpn_div_q (mp_ptr qp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
+{
+ mp_ptr new_dp, new_np, tp, rp;
+ mp_limb_t cy, dh, qh;
+ mp_size_t new_nn, qn;
+ gmp_pi1_t dinv;
+ int cnt;
+ TMP_DECL;
+ TMP_MARK;
+
+ ASSERT (nn >= dn);
+ ASSERT (dn > 0);
+ ASSERT (dp[dn - 1] != 0);
+ ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
+ ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
+
+ ASSERT_ALWAYS (FUDGE >= 2);
+
+ if (dn == 1)
+ {
+ mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
+ return;
+ }
+
+ qn = nn - dn + 1; /* Quotient size, high limb might be zero */
+
+ if (qn + FUDGE >= dn)
+ {
+ /* |________________________|
+ |_______| */
+ dh = dp[dn - 1];
+ if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+ {
+ count_leading_zeros (cnt, dh);
+
+ new_np = scratch;
+ new_dp = TMP_ALLOC_LIMBS (dn);
+
+ cy = mpn_lshift (new_np, np, nn, cnt);
+ new_np[nn] = cy;
+ new_nn = nn + (cy != 0);
+
+ mpn_lshift (new_dp, dp, dn, cnt);
+
+ if (dn == 2)
+ {
+ qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
+ }
+ else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD))
+ {
+ invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+ qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
+ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
+ {
+ invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
+ qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
+ }
+ else
+ {
+ mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
+ }
+ if (cy == 0)
+ qp[qn - 1] = qh;
+ }
+ else /* divisior is already normalised */
+ {
+ new_np = scratch;
+ if (new_np != np)
+ MPN_COPY (new_np, np, nn);
+
+ if (dn == 2)
+ {
+ qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
+ }
+ else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD))
+ {
+ invert_pi1 (dinv, dh, dp[dn - 2]);
+ qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
+ BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
+ (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
+ + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
+ {
+ invert_pi1 (dinv, dh, dp[dn - 2]);
+ qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
+ }
+ else
+ {
+ mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+ }
+ qp[nn - dn] = qh;
+ }
+ }
+ else
+ {
+ /* |________________________|
+ |_________________| */
+ tp = TMP_ALLOC_LIMBS (qn + 1);
+
+ dh = dp[dn - 1];
+ if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
+ {
+ count_leading_zeros (cnt, dh);
+
+ new_np = scratch;
+ new_dp = TMP_ALLOC_LIMBS (qn + 1);
+ new_nn = 2 * qn + 1;
+
+ cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
+ new_np[new_nn] = cy;
+ new_nn += (cy != 0);
+
+ mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
+ new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
+
+ if (qn + 1 == 2)
+ {
+ qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+ }
+ else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+ qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
+ qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
+ }
+ else
+ {
+ mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
+ }
+ if (cy == 0)
+ tp[qn] = qh;
+ }
+ else /* divisior is already normalised */
+ {
+ new_nn = 2 * qn + 1;
+ new_np = scratch;
+ if (new_np != np)
+ MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */
+ else
+ new_np += nn - new_nn;
+
+ new_dp = (mp_ptr) dp + dn - (qn + 1);
+
+ if (qn == 2 - 1)
+ {
+ qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
+ }
+ else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, dh, new_dp[qn - 1]);
+ qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
+ }
+ else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
+ {
+ invert_pi1 (dinv, dh, new_dp[qn - 1]);
+ qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
More information about the gmp-commit
mailing list