[Gmp-commit] /home/hgfiles/gmp: Tweaks to udiv_qr_3by2. Use it in sbpi1_div_q.
mercurial at gmplib.org
mercurial at gmplib.org
Tue Dec 22 11:43:53 CET 2009
details: /home/hgfiles/gmp/rev/38624b666c6c
changeset: 13179:38624b666c6c
user: Niels Möller <nisse at lysator.liu.se>
date: Tue Dec 22 11:43:50 2009 +0100
description:
Tweaks to udiv_qr_3by2. Use it in sbpi1_div_q.
diffstat:
ChangeLog | 14 +++++++
gmp-impl.h | 36 ++++++++++--------
mpn/generic/sbpi1_div_q.c | 80 ++----------------------------------------
mpn/generic/sbpi1_divappr_q.c | 7 +++-
4 files changed, 45 insertions(+), 92 deletions(-)
diffs (210 lines):
diff -r af80158a951d -r 38624b666c6c ChangeLog
--- a/ChangeLog Tue Dec 22 04:40:56 2009 +0100
+++ b/ChangeLog Tue Dec 22 11:43:50 2009 +0100
@@ -1,3 +1,17 @@
+2009-12-22 Niels Möller <<nisse at lysator.liu.se>>
+
+ * mpn/generic/sbpi1_div_q.c (mpn_sbpi1_div_q): Use udiv_qr_3by2.
+ Intended to change nothing after preprocessing.
+
+ * mpn/generic/sbpi1_divappr_q.c (mpn_sbpi1_divappr_q): For the
+ last call to udiv_qr_3by2, avoid using memory locations as output
+ parameters, and revert to explicitly copying n1 and n0 to memory.
+
+ * gmp-impl.h (udiv_qr_3by2): Tweaked to expand to precisely the
+ same code as was used before the introduction of this mamcro.
+ Eliminated some local variables, instead do multiple updates to
+ the output paramaters.
+
2009-12-22 Torbjorn Granlund <tege at gmplib.org>
* tests/mpn/t-bdiv.c: Get itch function calls right.
diff -r af80158a951d -r 38624b666c6c gmp-impl.h
--- a/gmp-impl.h Tue Dec 22 04:40:56 2009 +0100
+++ b/gmp-impl.h Tue Dec 22 11:43:50 2009 +0100
@@ -2726,33 +2726,37 @@
>= B^2 / 2 and n < d B. di is the inverse
floor ((B^3 - 1) / (d0 + d1 B)) - B.
+
+
+ NOTE: Output variables are updated multiple times. Only some inputs
+ and outputs may overlap.
*/
#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
do { \
- mp_limb_t _q1, _q0, _r1, _r0, _t1, _t0, _mask; \
- umul_ppmm (_q1, _q0, (n2), (dinv)); \
- add_ssaaaa (_q1, _q0, _q1, _q0, (n2), (n1)); \
+ mp_limb_t _q0, _t1, _t0, _mask; \
+ umul_ppmm ((q), _q0, (n2), (dinv)); \
+ add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
\
/* Compute the two most significant limbs of n - q'd */ \
- _r1 = (n1) - _q1 * (d1); \
- sub_ddmmss (_r1, _r0, _r1, (n0), (d1), (d0)); \
- umul_ppmm (_t1, _t0, _q1, (d0)); \
- sub_ddmmss (_r1, _r0, _r1, _r0, _t1, _t0); \
- _q1++; \
+ (r1) = (n1) - (d1) * (q); \
+ (r0) = (n0); \
+ sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
+ umul_ppmm (_t1, _t0, (d0), (q)); \
+ sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
+ (q)++; \
\
/* Conditionally adjust q and the remainders */ \
- _mask = - (mp_limb_t) (_r1 >= _q0); \
- _q1 += _mask; \
- add_ssaaaa (_r1, _r0, _r1, _r0, _mask & (d1), _mask & (d0)); \
- if (UNLIKELY (_r1 >= (d1))) \
+ _mask = - (mp_limb_t) ((r1) >= _q0); \
+ (q) += _mask; \
+ add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
+ if (UNLIKELY ((r1) >= (d1))) \
{ \
- if (_r1 > (d1) || _r0 >= (d0)) \
+ if ((r1) > (d1) || (r0) >= (d0)) \
{ \
- _q1++; \
- sub_ddmmss (_r1, _r0, _r1, _r0, (d1), (d0)); \
+ (q)++; \
+ sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
} \
} \
- (q) = _q1; (r1) = _r1; (r0) = _r0; \
} while (0)
#ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
diff -r af80158a951d -r 38624b666c6c mpn/generic/sbpi1_div_q.c
--- a/mpn/generic/sbpi1_div_q.c Tue Dec 22 04:40:56 2009 +0100
+++ b/mpn/generic/sbpi1_div_q.c Tue Dec 22 11:43:50 2009 +0100
@@ -40,9 +40,8 @@
mp_limb_t n1, n0;
mp_limb_t d1, d0;
mp_limb_t cy, cy1;
- mp_limb_t q, q0;
- mp_limb_t t1, t0;
- mp_limb_t mask, flag;
+ mp_limb_t q;
+ mp_limb_t flag;
mp_size_t dn_orig = dn;
mp_srcptr dp_orig = dp;
@@ -87,30 +86,7 @@
}
else
{
- umul_ppmm (q, q0, n1, dinv);
- add_ssaaaa (q, q0, q, q0, n1, np[1]);
-
- /* Compute the two most significant limbs of n - q'd */
- n1 = np[1] - d1 * q;
- n0 = np[0];
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
- umul_ppmm (t1, t0, d0, q);
- sub_ddmmss (n1, n0, n1, n0, t1, t0);
- q++;
-
- /* Conditionally adjust q and the remainders */
- mask = - (mp_limb_t) (n1 >= q0);
- q += mask;
- add_ssaaaa (n1, n0, n1, n0, mask & d1, mask & d0);
-
- if (UNLIKELY (n1 >= d1))
- {
- if (n1 > d1 || n0 >= d0)
- {
- q++;
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
- }
- }
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
cy = mpn_submul_1 (np - dn, dp, dn, q);
@@ -156,30 +132,7 @@
}
else
{
- umul_ppmm (q, q0, n1, dinv);
- add_ssaaaa (q, q0, q, q0, n1, np[1]);
-
- /* Compute the two most significant limbs of n - q'd */
- n1 = np[1] - d1 * q;
- n0 = np[0];
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
- umul_ppmm (t1, t0, d0, q);
- sub_ddmmss (n1, n0, n1, n0, t1, t0);
- q++;
-
- /* Conditionally adjust q and the remainders */
- mask = - (mp_limb_t) (n1 >= q0);
- q += mask;
- add_ssaaaa (n1, n0, n1, n0, mask & d1, mask & d0);
-
- if (UNLIKELY (n1 >= d1))
- {
- if (n1 > d1 || n0 >= d0)
- {
- q++;
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
- }
- }
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
cy = mpn_submul_1 (np - dn, dp, dn, q);
@@ -223,30 +176,7 @@
}
else
{
- umul_ppmm (q, q0, n1, dinv);
- add_ssaaaa (q, q0, q, q0, n1, np[1]);
-
- /* Compute the two most significant limbs of n - q'd */
- n1 = np[1] - d1 * q;
- n0 = np[0];
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
- umul_ppmm (t1, t0, d0, q);
- sub_ddmmss (n1, n0, n1, n0, t1, t0);
- q++;
-
- /* Conditionally adjust q and the remainders */
- mask = - (mp_limb_t) (n1 >= q0);
- q += mask;
- add_ssaaaa (n1, n0, n1, n0, mask & d1, mask & d0);
-
- if (UNLIKELY (n1 >= d1))
- {
- if (n1 > d1 || n0 >= d0)
- {
- q++;
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
- }
- }
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
np[0] = n0;
np[1] = n1;
diff -r af80158a951d -r 38624b666c6c mpn/generic/sbpi1_divappr_q.c
--- a/mpn/generic/sbpi1_divappr_q.c Tue Dec 22 04:40:56 2009 +0100
+++ b/mpn/generic/sbpi1_divappr_q.c Tue Dec 22 11:43:50 2009 +0100
@@ -174,11 +174,16 @@
}
else
{
- udiv_qr_3by2 (q, np[1], np[0], n1, np[1], np[0], d1, d0, dinv);
+ udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
+
+ np[1] = n1;
+ np[0] = n0;
}
*--qp = q;
}
+ ASSERT_ALWAYS (np[1] == n1);
+
return qh;
}
More information about the gmp-commit
mailing list