[Gmp-commit] /home/hgfiles/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Jan 23 23:37:38 CET 2010


details:   /home/hgfiles/gmp/rev/c9563dc13f78
changeset: 13392:c9563dc13f78
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Jan 23 16:16:54 2010 +0100
description:
Rewrite to use mpn_div_q.

details:   /home/hgfiles/gmp/rev/6134217773ba
changeset: 13393:6134217773ba
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Jan 23 23:15:40 2010 +0100
description:
(__GNU_MP_VERSION_PATCHLEVEL): Bump.  (__GMP_MP_RELEASE): New macro.

details:   /home/hgfiles/gmp/rev/b82342aa5abb
changeset: 13394:b82342aa5abb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Jan 23 23:17:48 2010 +0100
description:
Whitespace cleanup.

details:   /home/hgfiles/gmp/rev/a42852f22ebd
changeset: 13395:a42852f22ebd
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Jan 23 23:37:29 2010 +0100
description:
Add FFT_TABLE3 tables for more machines.

diffstat:

 ChangeLog                            |    7 +
 gmp-h.in                             |    3 +-
 mpf/div.c                            |   76 ++++-------
 mpn/alpha/ev6/gmp-mparam.h           |   94 +++++++++++++-
 mpn/generic/mulmod_bnm1.c            |    4 +-
 mpn/generic/sqrmod_bnm1.c            |    2 +-
 mpn/pa32/hppa2_0/gmp-mparam.h        |   71 ++++++++++-
 mpn/powerpc32/gmp-mparam.h           |   91 ++++++++++++++-
 mpn/powerpc64/mode64/p5/gmp-mparam.h |  114 +++++++++++++++++-
 mpn/x86/k6/gmp-mparam.h              |   69 ++++++++++-
 mpn/x86/p6/sse2/gmp-mparam.h         |   88 ++++++++++++-
 mpn/x86/pentium/mmx/gmp-mparam.h     |   93 +++++++++++---
 mpn/x86_64/gmp-mparam.h              |    8 +-
 mpn/x86_64/pentium4/gmp-mparam.h     |  216 ++++++++++++++++++++++++++--------
 tests/mpz/t-gcd.c                    |    2 +-
 15 files changed, 760 insertions(+), 178 deletions(-)

diffs (truncated from 1245 to 300 lines):

diff -r fde439f60722 -r a42852f22ebd ChangeLog
--- a/ChangeLog	Thu Jan 21 21:55:35 2010 +0100
+++ b/ChangeLog	Sat Jan 23 23:37:29 2010 +0100
@@ -1,3 +1,10 @@
+2010-01-23  Torbjorn Granlund  <tege at gmplib.org>
+
+	* gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+	(__GMP_MP_RELEASE): New macro.
+
+	* mpf/div.c: Rewrite to use mpn_div_q.
+
 2010-01-21  Torbjorn Granlund  <tege at gmplib.org>
 
 	* Add FFT_TABLE3 tables for a basic set of machines.
diff -r fde439f60722 -r a42852f22ebd gmp-h.in
--- a/gmp-h.in	Thu Jan 21 21:55:35 2010 +0100
+++ b/gmp-h.in	Sat Jan 23 23:37:29 2010 +0100
@@ -2273,7 +2273,8 @@
 /* Major version number is the value of __GNU_MP__ too, above and in mp.h. */
 #define __GNU_MP_VERSION 5
 #define __GNU_MP_VERSION_MINOR 0
-#define __GNU_MP_VERSION_PATCHLEVEL 0
+#define __GNU_MP_VERSION_PATCHLEVEL 1
+#define __GMP_MP_RELEASE (__GNU_MP_VERSION * 10000 + __GNU_MP_VERSION_MINOR * 100 + __GNU_MP_VERSION_PATCHLEVEL)
 
 #define __GMP_H__
 #endif /* __GMP_H__ */
diff -r fde439f60722 -r a42852f22ebd mpf/div.c
--- a/mpf/div.c	Thu Jan 21 21:55:35 2010 +0100
+++ b/mpf/div.c	Sat Jan 23 23:37:29 2010 +0100
@@ -1,6 +1,6 @@
 /* mpf_div -- Divide two floats.
 
-Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2010 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
@@ -18,10 +18,8 @@
 You should have received a copy of the GNU Lesser General Public License
 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#include <stdio.h>  /* for NULL */
 #include "gmp.h"
 #include "gmp-impl.h"
-#include "longlong.h"
 
 
 /* Not done:
@@ -42,105 +40,87 @@
    overlap between quotient and dividend in mpn_tdiv_qr, then we can avoid
    copying up,usize.  This would only arise from a prec reduced with
    mpf_set_prec_raw and will be pretty unusual, but might be worthwhile if
-   it could be worked into the copy_u decision cleanly.
-
-   Future:
-
-   If/when mpn_tdiv_qr supports its qxn parameter we can use that instead of
-   padding u with zeros in temporary space.
-
-   If/when a quotient-only division exists it can be used here immediately.
-   remp is only to satisfy mpn_tdiv_qr, the remainder is not used.  */
+   it could be worked into the copy_u decision cleanly.  */
 
 void
 mpf_div (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
 {
   mp_srcptr up, vp;
-  mp_ptr rp, remp, tp, new_vp;
-  mp_size_t usize, vsize, rsize, prospective_rsize, tsize, zeros, copy_v_size;
+  mp_ptr rp, tp, new_vp;
+  mp_size_t usize, vsize, rsize, prospective_rsize, tsize, zeros;
   mp_size_t sign_quotient, prec, high_zero, chop;
   mp_exp_t rexp;
   int copy_u;
   TMP_DECL;
 
-  usize = u->_mp_size;
-  vsize = v->_mp_size;
+  usize = SIZ(u);
+  vsize = SIZ(v);
   sign_quotient = usize ^ vsize;
   usize = ABS (usize);
   vsize = ABS (vsize);
-  prec = r->_mp_prec;
+  prec = PREC(r);
 
   if (vsize == 0)
     DIVIDE_BY_ZERO;
 
   if (usize == 0)
     {
-      r->_mp_size = 0;
-      r->_mp_exp = 0;
+      SIZ(r) = 0;
+      EXP(r) = 0;
       return;
     }
 
   TMP_MARK;
-  rexp = u->_mp_exp - v->_mp_exp + 1;
+  rexp = EXP(u) - EXP(v) + 1;
 
-  rp = r->_mp_d;
-  up = u->_mp_d;
-  vp = v->_mp_d;
+  rp = PTR(r);
+  up = PTR(u);
+  vp = PTR(v);
 
   prospective_rsize = usize - vsize + 1; /* quot from using given u,v sizes */
-  rsize = prec + 1;                      /* desired quot */
+  rsize = prec + 1;			 /* desired quot */
 
-  zeros = rsize - prospective_rsize;     /* padding u to give rsize */
-  copy_u = (zeros > 0 || rp == up);      /* copy u if overlap or padding */
+  zeros = rsize - prospective_rsize;	 /* padding u to give rsize */
+  copy_u = (zeros > 0 || rp == up);	 /* copy u if overlap or padding */
 
-  chop = MAX (-zeros, 0);                /* negative zeros means shorten u */
+  chop = MAX (-zeros, 0);		 /* negative zeros means shorten u */
   up += chop;
   usize -= chop;
-  zeros += chop;                         /* now zeros >= 0 */
+  zeros += chop;			 /* now zeros >= 0 */
 
-  tsize = usize + zeros;                 /* size for possible copy of u */
-
-  if (WANT_TMP_DEBUG)
-    {
-      /* separate blocks, for malloc debugging */
-      remp = TMP_ALLOC_LIMBS (vsize);
-      tp = (copy_u ? TMP_ALLOC_LIMBS (tsize) : NULL);
-      new_vp = (rp == vp ? TMP_ALLOC_LIMBS (vsize) : NULL);
-    }
-  else
-    {
-      /* one block with conditionalized size, for efficiency */
-      copy_v_size = (rp == vp ? vsize : 0);
-      remp = TMP_ALLOC_LIMBS (vsize + copy_v_size + (copy_u ? tsize : 0));
-      new_vp = remp + vsize;
-      tp = new_vp + copy_v_size;
-    }
+  tsize = usize + zeros;		 /* size for possible copy of u */
 
   /* copy and possibly extend u if necessary */
   if (copy_u)
     {
+      tp = TMP_ALLOC_LIMBS (tsize + 1);	/* +1 for mpn_div_q's scratch needs */
       MPN_ZERO (tp, zeros);
       MPN_COPY (tp+zeros, up, usize);
       up = tp;
       usize = tsize;
     }
+  else
+    {
+      tp = TMP_ALLOC_LIMBS (usize + 1);
+    }
 
   /* ensure divisor doesn't overlap quotient */
   if (rp == vp)
     {
+      new_vp = TMP_ALLOC_LIMBS (vsize);
       MPN_COPY (new_vp, vp, vsize);
       vp = new_vp;
     }
 
   ASSERT (usize-vsize+1 == rsize);
-  mpn_tdiv_qr (rp, remp, (mp_size_t) 0, up, usize, vp, vsize);
+  mpn_div_q (rp, up, usize, vp, vsize, tp);
 
   /* strip possible zero high limb */
   high_zero = (rp[rsize-1] == 0);
   rsize -= high_zero;
   rexp -= high_zero;
 
-  r->_mp_size = sign_quotient >= 0 ? rsize : -rsize;
-  r->_mp_exp = rexp;
+  SIZ(r) = sign_quotient >= 0 ? rsize : -rsize;
+  EXP(r) = rexp;
   TMP_FREE;
 }
diff -r fde439f60722 -r a42852f22ebd mpn/alpha/ev6/gmp-mparam.h
--- a/mpn/alpha/ev6/gmp-mparam.h	Thu Jan 21 21:55:35 2010 +0100
+++ b/mpn/alpha/ev6/gmp-mparam.h	Sat Jan 23 23:37:29 2010 +0100
@@ -61,13 +61,91 @@
 #define MULMOD_BNM1_THRESHOLD               20
 #define SQRMOD_BNM1_THRESHOLD               23
 
-#define MUL_FFT_TABLE  { 368, 864, 1984, 3840, 9216, 20480, 81920, 327680, 0 }
-#define MUL_FFT_MODF_THRESHOLD             464
-#define MUL_FFT_THRESHOLD                 7808
+#define MUL_FFT_MODF_THRESHOLD             480  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    480, 5}, {     18, 6}, {     10, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
+    {     19, 7}, {     10, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     14, 6}, {     29, 7}, {     25, 8}, \
+    {     13, 7}, {     29, 8}, {     15, 7}, {     32, 8}, \
+    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     29, 9}, {     15, 8}, {     37, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     55, 9}, {     31, 8}, {     63, 9}, {     35, 8}, \
+    {     71, 9}, {     39,10}, {     23, 9}, {     55,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     83,10}, \
+    {     47, 9}, {     99,10}, {     55,11}, {     31,10}, \
+    {     79,11}, {     47,10}, {    103,12}, {     31,11}, \
+    {     63,10}, {    135,11}, {     79,10}, {    167,11}, \
+    {     95,10}, {    191,11}, {    111,12}, {     63,11}, \
+    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    543,12}, {    287,11}, {    575,10}, {   1151,12}, \
+    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,11}, {    895,14}, {    127,13}, \
+    {    255,12}, {    543,11}, {   1087,12}, {    575,11}, \
+    {   1151,12}, {    607,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    703,13}, {    383,12}, {    831,13}, \
+    {    447,12}, {    927,14}, {    255,13}, {    511,12}, \
+    {   1087,13}, {    575,12}, {   1151,13}, {    639,12}, \
+    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
+    {    767,15}, {    255,14}, {    511,13}, {   1215,14}, \
+    {    639,13}, {   1407,14}, {    767,13}, {   1663,14}, \
+    {    895,13}, {   1791,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 151
+#define MUL_FFT_THRESHOLD                 7552
 
-#define SQR_FFT_TABLE  { 432, 864, 1856, 4864, 9216, 20480, 81920, 327680, 0 }
-#define SQR_FFT_MODF_THRESHOLD             408
-#define SQR_FFT_THRESHOLD                 4736
+#define SQR_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    476, 5}, {     19, 6}, {     10, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
+    {     29, 7}, {     28, 8}, {     15, 7}, {     31, 8}, \
+    {     29, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
+    {     15, 9}, {     35, 8}, {     71, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     47,10}, {    103,12}, {     31,11}, {     63,10}, \
+    {    135,11}, {     79,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383,11}, {    111,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319,12}, {     95,11}, \
+    {    191,10}, {    383, 9}, {    767,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    335,10}, \
+    {    671,11}, {    351,10}, {    703,11}, {    367,10}, \
+    {    735,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,10}, {    831,11}, {    447,10}, {    895,13}, \
+    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
+    {    575,10}, {   1151,11}, {    607,12}, {    319,11}, \
+    {    671,12}, {    351,11}, {    735,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
+    {    447,11}, {    895,12}, {    479,14}, {    127,13}, \
+    {    255,12}, {    575,11}, {   1151,12}, {    607,13}, \
+    {    319,12}, {    735,13}, {    383,12}, {    831,13}, \
+    {    447,12}, {    959,14}, {    255,13}, {    511,12}, \
+    {   1023,13}, {    575,12}, {   1215,13}, {    639,12}, \
+    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
+    {    767,12}, {   1535,13}, {    831,12}, {   1663,13}, \
+    {    895,12}, {   1791,15}, {    255,14}, {    511,13}, \
+    {   1215,14}, {    639,13}, {   1407,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1791,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 152
+#define SQR_FFT_THRESHOLD                 5248
 
 #define MULLO_BASECASE_THRESHOLD             0  /* always */
 #define MULLO_DC_THRESHOLD                 130
@@ -101,7 +179,3 @@
 #define GET_STR_PRECOMPUTE_THRESHOLD        19
 #define SET_STR_DC_THRESHOLD              3754
 #define SET_STR_PRECOMPUTE_THRESHOLD      8097
-
-#define MUL_FFT_TABLE2 {{1,4}, {273,5}, {609,6}, {1729,7}, {4097,8}, {8961,9}, {11777,8}, {12545,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {26113,10}, {31745,9}, {35841,10}, {39937,9}, {42497,10}, {48129,9}, {50689,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {105473,12}, {126977,11}, {129025,10}, {143361,11}, {161793,10}, {171009,11}, {227329,12}, {258049,11}, {292865,10}, {310273,11}, {326657,12}, {389121,11}, {391169,10}, {394241,11}, {397313,10}, {408577,11}, {423937,13}, {516097,12}, {520193,11}, {620545,12}, {651265,11}, {751617,12}, {782337,11}, {915457,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,11}, {1439745,13}, {1564673,12}, {1568769,11}, {1636353,12}, {1961985,14}, {MP_SIZE_T_MAX, 0}}
-
-#define SQR_FFT_TABLE2 {{1,4}, {305,5}, {801,6}, {1729,7}, {3713,8}, {3841,7}, {4097,8}, {10497,9}, {11777,8}, {12545,9}, {20481,10}, {23553,9}, {26113,11}, {30721,10}, {31745,9}, {34305,10}, {48641,9}, {50177,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {105473,12}, {126977,11}, {129025,10}, {140289,11}, {227329,12}, {258049,11}, {260097,10}, {261121,11}, {266241,10}, {275457,11}, {292865,10}, {310785,11}, {325633,10}, {326657,12}, {389121,11}, {391169,10}, {408577,11}, {409601,13}, {516097,12}, {520193,11}, {621569,12}, {651265,11}, {751617,12}, {913409,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1384449,11}, {1388545,12}, {1425409,11}, {1429505,12}, {1437697,11}, {1505281,13}, {1564673,12}, {1568769,11}, {1636353,12}, {1961985,14}, {MP_SIZE_T_MAX, 0}}
diff -r fde439f60722 -r a42852f22ebd mpn/generic/mulmod_bnm1.c
--- a/mpn/generic/mulmod_bnm1.c	Thu Jan 21 21:55:35 2010 +0100
+++ b/mpn/generic/mulmod_bnm1.c	Sat Jan 23 23:37:29 2010 +0100
@@ -125,7 +125,7 @@
 	 the coded slightly simpler. If desired, we could avoid this
 	 restriction by initially halving rn as long as rn is even and
 	 an + bn <= rn/2. */
-      
+


More information about the gmp-commit mailing list