[Gmp-commit] /home/hgfiles/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Jan 23 23:37:38 CET 2010
details: /home/hgfiles/gmp/rev/c9563dc13f78
changeset: 13392:c9563dc13f78
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 23 16:16:54 2010 +0100
description:
Rewrite to use mpn_div_q.
details: /home/hgfiles/gmp/rev/6134217773ba
changeset: 13393:6134217773ba
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 23 23:15:40 2010 +0100
description:
(__GNU_MP_VERSION_PATCHLEVEL): Bump. (__GMP_MP_RELEASE): New macro.
details: /home/hgfiles/gmp/rev/b82342aa5abb
changeset: 13394:b82342aa5abb
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 23 23:17:48 2010 +0100
description:
Whitespace cleanup.
details: /home/hgfiles/gmp/rev/a42852f22ebd
changeset: 13395:a42852f22ebd
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 23 23:37:29 2010 +0100
description:
Add FFT_TABLE3 tables for more machines.
diffstat:
ChangeLog | 7 +
gmp-h.in | 3 +-
mpf/div.c | 76 ++++-------
mpn/alpha/ev6/gmp-mparam.h | 94 +++++++++++++-
mpn/generic/mulmod_bnm1.c | 4 +-
mpn/generic/sqrmod_bnm1.c | 2 +-
mpn/pa32/hppa2_0/gmp-mparam.h | 71 ++++++++++-
mpn/powerpc32/gmp-mparam.h | 91 ++++++++++++++-
mpn/powerpc64/mode64/p5/gmp-mparam.h | 114 +++++++++++++++++-
mpn/x86/k6/gmp-mparam.h | 69 ++++++++++-
mpn/x86/p6/sse2/gmp-mparam.h | 88 ++++++++++++-
mpn/x86/pentium/mmx/gmp-mparam.h | 93 +++++++++++---
mpn/x86_64/gmp-mparam.h | 8 +-
mpn/x86_64/pentium4/gmp-mparam.h | 216 ++++++++++++++++++++++++++--------
tests/mpz/t-gcd.c | 2 +-
15 files changed, 760 insertions(+), 178 deletions(-)
diffs (truncated from 1245 to 300 lines):
diff -r fde439f60722 -r a42852f22ebd ChangeLog
--- a/ChangeLog Thu Jan 21 21:55:35 2010 +0100
+++ b/ChangeLog Sat Jan 23 23:37:29 2010 +0100
@@ -1,3 +1,10 @@
+2010-01-23 Torbjorn Granlund <tege at gmplib.org>
+
+ * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+ (__GMP_MP_RELEASE): New macro.
+
+ * mpf/div.c: Rewrite to use mpn_div_q.
+
2010-01-21 Torbjorn Granlund <tege at gmplib.org>
* Add FFT_TABLE3 tables for a basic set of machines.
diff -r fde439f60722 -r a42852f22ebd gmp-h.in
--- a/gmp-h.in Thu Jan 21 21:55:35 2010 +0100
+++ b/gmp-h.in Sat Jan 23 23:37:29 2010 +0100
@@ -2273,7 +2273,8 @@
/* Major version number is the value of __GNU_MP__ too, above and in mp.h. */
#define __GNU_MP_VERSION 5
#define __GNU_MP_VERSION_MINOR 0
-#define __GNU_MP_VERSION_PATCHLEVEL 0
+#define __GNU_MP_VERSION_PATCHLEVEL 1
+#define __GMP_MP_RELEASE (__GNU_MP_VERSION * 10000 + __GNU_MP_VERSION_MINOR * 100 + __GNU_MP_VERSION_PATCHLEVEL)
#define __GMP_H__
#endif /* __GMP_H__ */
diff -r fde439f60722 -r a42852f22ebd mpf/div.c
--- a/mpf/div.c Thu Jan 21 21:55:35 2010 +0100
+++ b/mpf/div.c Sat Jan 23 23:37:29 2010 +0100
@@ -1,6 +1,6 @@
/* mpf_div -- Divide two floats.
-Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2010 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
@@ -18,10 +18,8 @@
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#include <stdio.h> /* for NULL */
#include "gmp.h"
#include "gmp-impl.h"
-#include "longlong.h"
/* Not done:
@@ -42,105 +40,87 @@
overlap between quotient and dividend in mpn_tdiv_qr, then we can avoid
copying up,usize. This would only arise from a prec reduced with
mpf_set_prec_raw and will be pretty unusual, but might be worthwhile if
- it could be worked into the copy_u decision cleanly.
-
- Future:
-
- If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of
- padding u with zeros in temporary space.
-
- If/when a quotient-only division exists it can be used here immediately.
- remp is only to satisfy mpn_tdiv_qr, the remainder is not used. */
+ it could be worked into the copy_u decision cleanly. */
void
mpf_div (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
{
mp_srcptr up, vp;
- mp_ptr rp, remp, tp, new_vp;
- mp_size_t usize, vsize, rsize, prospective_rsize, tsize, zeros, copy_v_size;
+ mp_ptr rp, tp, new_vp;
+ mp_size_t usize, vsize, rsize, prospective_rsize, tsize, zeros;
mp_size_t sign_quotient, prec, high_zero, chop;
mp_exp_t rexp;
int copy_u;
TMP_DECL;
- usize = u->_mp_size;
- vsize = v->_mp_size;
+ usize = SIZ(u);
+ vsize = SIZ(v);
sign_quotient = usize ^ vsize;
usize = ABS (usize);
vsize = ABS (vsize);
- prec = r->_mp_prec;
+ prec = PREC(r);
if (vsize == 0)
DIVIDE_BY_ZERO;
if (usize == 0)
{
- r->_mp_size = 0;
- r->_mp_exp = 0;
+ SIZ(r) = 0;
+ EXP(r) = 0;
return;
}
TMP_MARK;
- rexp = u->_mp_exp - v->_mp_exp + 1;
+ rexp = EXP(u) - EXP(v) + 1;
- rp = r->_mp_d;
- up = u->_mp_d;
- vp = v->_mp_d;
+ rp = PTR(r);
+ up = PTR(u);
+ vp = PTR(v);
prospective_rsize = usize - vsize + 1; /* quot from using given u,v sizes */
- rsize = prec + 1; /* desired quot */
+ rsize = prec + 1; /* desired quot */
- zeros = rsize - prospective_rsize; /* padding u to give rsize */
- copy_u = (zeros > 0 || rp == up); /* copy u if overlap or padding */
+ zeros = rsize - prospective_rsize; /* padding u to give rsize */
+ copy_u = (zeros > 0 || rp == up); /* copy u if overlap or padding */
- chop = MAX (-zeros, 0); /* negative zeros means shorten u */
+ chop = MAX (-zeros, 0); /* negative zeros means shorten u */
up += chop;
usize -= chop;
- zeros += chop; /* now zeros >= 0 */
+ zeros += chop; /* now zeros >= 0 */
- tsize = usize + zeros; /* size for possible copy of u */
-
- if (WANT_TMP_DEBUG)
- {
- /* separate blocks, for malloc debugging */
- remp = TMP_ALLOC_LIMBS (vsize);
- tp = (copy_u ? TMP_ALLOC_LIMBS (tsize) : NULL);
- new_vp = (rp == vp ? TMP_ALLOC_LIMBS (vsize) : NULL);
- }
- else
- {
- /* one block with conditionalized size, for efficiency */
- copy_v_size = (rp == vp ? vsize : 0);
- remp = TMP_ALLOC_LIMBS (vsize + copy_v_size + (copy_u ? tsize : 0));
- new_vp = remp + vsize;
- tp = new_vp + copy_v_size;
- }
+ tsize = usize + zeros; /* size for possible copy of u */
/* copy and possibly extend u if necessary */
if (copy_u)
{
+ tp = TMP_ALLOC_LIMBS (tsize + 1); /* +1 for mpn_div_q's scratch needs */
MPN_ZERO (tp, zeros);
MPN_COPY (tp+zeros, up, usize);
up = tp;
usize = tsize;
}
+ else
+ {
+ tp = TMP_ALLOC_LIMBS (usize + 1);
+ }
/* ensure divisor doesn't overlap quotient */
if (rp == vp)
{
+ new_vp = TMP_ALLOC_LIMBS (vsize);
MPN_COPY (new_vp, vp, vsize);
vp = new_vp;
}
ASSERT (usize-vsize+1 == rsize);
- mpn_tdiv_qr (rp, remp, (mp_size_t) 0, up, usize, vp, vsize);
+ mpn_div_q (rp, up, usize, vp, vsize, tp);
/* strip possible zero high limb */
high_zero = (rp[rsize-1] == 0);
rsize -= high_zero;
rexp -= high_zero;
- r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;
- r->_mp_exp = rexp;
+ SIZ(r) = sign_quotient >= 0 ? rsize : -rsize;
+ EXP(r) = rexp;
TMP_FREE;
}
diff -r fde439f60722 -r a42852f22ebd mpn/alpha/ev6/gmp-mparam.h
--- a/mpn/alpha/ev6/gmp-mparam.h Thu Jan 21 21:55:35 2010 +0100
+++ b/mpn/alpha/ev6/gmp-mparam.h Sat Jan 23 23:37:29 2010 +0100
@@ -61,13 +61,91 @@
#define MULMOD_BNM1_THRESHOLD 20
#define SQRMOD_BNM1_THRESHOLD 23
-#define MUL_FFT_TABLE { 368, 864, 1984, 3840, 9216, 20480, 81920, 327680, 0 }
-#define MUL_FFT_MODF_THRESHOLD 464
-#define MUL_FFT_THRESHOLD 7808
+#define MUL_FFT_MODF_THRESHOLD 480 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 480, 5}, { 18, 6}, { 10, 5}, { 21, 6}, \
+ { 11, 5}, { 23, 6}, { 12, 5}, { 25, 6}, \
+ { 19, 7}, { 10, 6}, { 25, 7}, { 13, 6}, \
+ { 27, 7}, { 14, 6}, { 29, 7}, { 25, 8}, \
+ { 13, 7}, { 29, 8}, { 15, 7}, { 32, 8}, \
+ { 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
+ { 29, 9}, { 15, 8}, { 37, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
+ { 55, 9}, { 31, 8}, { 63, 9}, { 35, 8}, \
+ { 71, 9}, { 39,10}, { 23, 9}, { 55,10}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 83,10}, \
+ { 47, 9}, { 99,10}, { 55,11}, { 31,10}, \
+ { 79,11}, { 47,10}, { 103,12}, { 31,11}, \
+ { 63,10}, { 135,11}, { 79,10}, { 167,11}, \
+ { 95,10}, { 191,11}, { 111,12}, { 63,11}, \
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319,12}, { 95,11}, \
+ { 191,10}, { 383,11}, { 207,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,11}, { 447,13}, { 127,12}, { 255,11}, \
+ { 543,12}, { 287,11}, { 575,10}, { 1151,12}, \
+ { 319,11}, { 639,12}, { 351,11}, { 703,13}, \
+ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
+ { 831,12}, { 447,11}, { 895,14}, { 127,13}, \
+ { 255,12}, { 543,11}, { 1087,12}, { 575,11}, \
+ { 1151,12}, { 607,13}, { 319,12}, { 671,11}, \
+ { 1343,12}, { 703,13}, { 383,12}, { 831,13}, \
+ { 447,12}, { 927,14}, { 255,13}, { 511,12}, \
+ { 1087,13}, { 575,12}, { 1151,13}, { 639,12}, \
+ { 1279,13}, { 703,12}, { 1407,14}, { 383,13}, \
+ { 767,15}, { 255,14}, { 511,13}, { 1215,14}, \
+ { 639,13}, { 1407,14}, { 767,13}, { 1663,14}, \
+ { 895,13}, { 1791,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 151
+#define MUL_FFT_THRESHOLD 7552
-#define SQR_FFT_TABLE { 432, 864, 1856, 4864, 9216, 20480, 81920, 327680, 0 }
-#define SQR_FFT_MODF_THRESHOLD 408
-#define SQR_FFT_THRESHOLD 4736
+#define SQR_FFT_MODF_THRESHOLD 476 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 476, 5}, { 19, 6}, { 10, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 27, 7}, { 14, 6}, \
+ { 29, 7}, { 28, 8}, { 15, 7}, { 31, 8}, \
+ { 29, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 49, 9}, { 27,10}, \
+ { 15, 9}, { 35, 8}, { 71, 9}, { 39,10}, \
+ { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
+ { 95,10}, { 55,11}, { 31,10}, { 79,11}, \
+ { 47,10}, { 103,12}, { 31,11}, { 63,10}, \
+ { 135,11}, { 79,10}, { 159, 9}, { 319,11}, \
+ { 95,10}, { 191, 9}, { 383,11}, { 111,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271,11}, { 143,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,10}, { 319,12}, { 95,11}, \
+ { 191,10}, { 383, 9}, { 767,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,11}, { 303,12}, \
+ { 159,11}, { 319,10}, { 639,11}, { 335,10}, \
+ { 671,11}, { 351,10}, { 703,11}, { 367,10}, \
+ { 735,12}, { 191,11}, { 383,10}, { 767,11}, \
+ { 415,10}, { 831,11}, { 447,10}, { 895,13}, \
+ { 127,12}, { 255,11}, { 543,12}, { 287,11}, \
+ { 575,10}, { 1151,11}, { 607,12}, { 319,11}, \
+ { 671,12}, { 351,11}, { 735,13}, { 191,12}, \
+ { 383,11}, { 767,12}, { 415,11}, { 831,12}, \
+ { 447,11}, { 895,12}, { 479,14}, { 127,13}, \
+ { 255,12}, { 575,11}, { 1151,12}, { 607,13}, \
+ { 319,12}, { 735,13}, { 383,12}, { 831,13}, \
+ { 447,12}, { 959,14}, { 255,13}, { 511,12}, \
+ { 1023,13}, { 575,12}, { 1215,13}, { 639,12}, \
+ { 1279,13}, { 703,12}, { 1407,14}, { 383,13}, \
+ { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \
+ { 895,12}, { 1791,15}, { 255,14}, { 511,13}, \
+ { 1215,14}, { 639,13}, { 1407,14}, { 767,13}, \
+ { 1663,14}, { 895,13}, { 1791,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 152
+#define SQR_FFT_THRESHOLD 5248
#define MULLO_BASECASE_THRESHOLD 0 /* always */
#define MULLO_DC_THRESHOLD 130
@@ -101,7 +179,3 @@
#define GET_STR_PRECOMPUTE_THRESHOLD 19
#define SET_STR_DC_THRESHOLD 3754
#define SET_STR_PRECOMPUTE_THRESHOLD 8097
-
-#define MUL_FFT_TABLE2 {{1,4}, {273,5}, {609,6}, {1729,7}, {4097,8}, {8961,9}, {11777,8}, {12545,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {26113,10}, {31745,9}, {35841,10}, {39937,9}, {42497,10}, {48129,9}, {50689,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {105473,12}, {126977,11}, {129025,10}, {143361,11}, {161793,10}, {171009,11}, {227329,12}, {258049,11}, {292865,10}, {310273,11}, {326657,12}, {389121,11}, {391169,10}, {394241,11}, {397313,10}, {408577,11}, {423937,13}, {516097,12}, {520193,11}, {620545,12}, {651265,11}, {751617,12}, {782337,11}, {915457,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,11}, {1439745,13}, {1564673,12}, {1568769,11}, {1636353,12}, {1961985,14}, {MP_SIZE_T_MAX, 0}}
-
-#define SQR_FFT_TABLE2 {{1,4}, {305,5}, {801,6}, {1729,7}, {3713,8}, {3841,7}, {4097,8}, {10497,9}, {11777,8}, {12545,9}, {20481,10}, {23553,9}, {26113,11}, {30721,10}, {31745,9}, {34305,10}, {48641,9}, {50177,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {105473,12}, {126977,11}, {129025,10}, {140289,11}, {227329,12}, {258049,11}, {260097,10}, {261121,11}, {266241,10}, {275457,11}, {292865,10}, {310785,11}, {325633,10}, {326657,12}, {389121,11}, {391169,10}, {408577,11}, {409601,13}, {516097,12}, {520193,11}, {621569,12}, {651265,11}, {751617,12}, {913409,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1384449,11}, {1388545,12}, {1425409,11}, {1429505,12}, {1437697,11}, {1505281,13}, {1564673,12}, {1568769,11}, {1636353,12}, {1961985,14}, {MP_SIZE_T_MAX, 0}}
diff -r fde439f60722 -r a42852f22ebd mpn/generic/mulmod_bnm1.c
--- a/mpn/generic/mulmod_bnm1.c Thu Jan 21 21:55:35 2010 +0100
+++ b/mpn/generic/mulmod_bnm1.c Sat Jan 23 23:37:29 2010 +0100
@@ -125,7 +125,7 @@
the coded slightly simpler. If desired, we could avoid this
restriction by initially halving rn as long as rn is even and
an + bn <= rn/2. */
-
+
More information about the gmp-commit
mailing list