[Gmp-commit] /home/hgfiles/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Dec 22 01:41:07 CET 2009


details:   /home/hgfiles/gmp/rev/8c87e5485c71
changeset: 13169:8c87e5485c71
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 21 22:02:52 2009 +0100
description:
Fix fixup code for to work for qn = 0.

details:   /home/hgfiles/gmp/rev/b75c1e49113f
changeset: 13170:b75c1e49113f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 21 22:09:09 2009 +0100
description:
Merge: Fix fixup code for to work for qn = 0.

diffstat:

 ChangeLog                 |   9 +++++++++
 gmp-impl.h                |  27 +++++++++++++++++++++++++--
 mpn/generic/sbpi1_div_q.c |  12 ++++++------
 mpn/generic/toom6h_mul.c  |  22 ++++------------------
 tune/common.c             |  10 ++++++++++
 tune/speed.c              |   2 ++
 tune/speed.h              |  13 +++++++++++++
 7 files changed, 69 insertions(+), 26 deletions(-)

diffs (252 lines):

diff -r fb0247ca2316 -r b75c1e49113f ChangeLog
--- a/ChangeLog	Mon Dec 21 21:57:23 2009 +0100
+++ b/ChangeLog	Mon Dec 21 22:09:09 2009 +0100
@@ -1,5 +1,7 @@
 2009-12-21  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/generic/sbpi1_div_q.c: Fix fixup code for to work for qn = 0.
+
 	* mpn/generic/dcpi1_divappr_q.c: Handle qn = 1 and qn = 2 for initial
 	quotient block (code block copied from dcpi1_div_qr.c).
 
@@ -31,6 +33,13 @@
 
 2009-12-21  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
+	* gmp-impl.h (mpn_toom6h_mul_itch): New inline function.
+	(MUL_TOOM6H_THRESHOLD): Default value.
+	(SQR_TOOM6_THRESHOLD): Default value.
+	* mpn/generic/toom6h_mul.c: Remove definitions moved to gmp-impl.h.
+	* tune/common.c, tune/speed.c, tune/speed.h: Support for measuring
+	mpn_toom6h_mul and mpn_toom6_sqr speed.
+
 	* mpn/generic/toom63_mul.c: Remove unused TMP_*.
 
 	* mpn/generic/toom_eval_pm2rexp.c: New file.
diff -r fb0247ca2316 -r b75c1e49113f gmp-impl.h
--- a/gmp-impl.h	Mon Dec 21 21:57:23 2009 +0100
+++ b/gmp-impl.h	Mon Dec 21 22:09:09 2009 +0100
@@ -1048,6 +1048,9 @@
 #define MPN_TOOM44_MUL_MINSIZE   30
 #define MPN_TOOM4_SQR_MINSIZE    30
 
+#define MPN_TOOM6H_MUL_MINSIZE   46
+#define MPN_TOOM6_SQR_MINSIZE    46
+
 #define MPN_TOOM32_MUL_MINSIZE   10
 #define MPN_TOOM42_MUL_MINSIZE   10
 #define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
@@ -1137,8 +1140,8 @@
 #define   mpn_toom6h_mul __MPN(toom6h_mul)
 __GMP_DECLSPEC void      mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
 
-#define   mpn_toom6h_sqr __MPN(toom6h_sqr)
-__GMP_DECLSPEC void      mpn_toom6h_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_toom6_sqr __MPN(toom6_sqr)
+__GMP_DECLSPEC void      mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 
 #define   mpn_fft_best_k __MPN(fft_best_k)
 __GMP_DECLSPEC int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
@@ -1669,6 +1672,14 @@
 #define MUL_TOOM44_THRESHOLD            300
 #endif
 
+#ifndef MUL_TOOM6H_THRESHOLD
+#define MUL_TOOM6H_THRESHOLD            350
+#endif
+
+#ifndef SQR_TOOM6_THRESHOLD
+#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
+#endif
+
 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
 #endif
@@ -4439,6 +4450,18 @@
 #define mpn_toom4_sqr_itch(an) \
   (3 * (an) + GMP_NUMB_BITS)
 
+#define mpn_toom6_sqr_itch(n)					\
+( ((n) - MUL_TOOM6H_THRESHOLD)*2 +					\
+   MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,			\
+       mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 10 + 1;
+  return mpn_toom6_sqr_itch( estimatedN * 6 );
+}
+
 static inline mp_size_t
 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
 {
diff -r fb0247ca2316 -r b75c1e49113f mpn/generic/sbpi1_div_q.c
--- a/mpn/generic/sbpi1_div_q.c	Mon Dec 21 21:57:23 2009 +0100
+++ b/mpn/generic/sbpi1_div_q.c	Mon Dec 21 22:09:09 2009 +0100
@@ -320,9 +320,6 @@
 	{
 	  /* Compensate for ignored dividend and divisor tails.  */
 
-	  if (qn == 0)
-	    return qh;
-
 	  dp = dp_orig;
 	  np = np_orig;
 
@@ -333,13 +330,17 @@
 		{
 		  if (x == 0)
 		    {
-		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      if (qn != 0)
+			cy = mpn_sub_1 (qp, qp, qn, 1);
 		      return qh - cy;
 		    }
 		  x--;
 		}
 	    }
 
+	  if (qn == 0)
+	    return qh;
+
 	  for (i = dn - qn - 2; i >= 0; i--)
 	    {
 	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
@@ -349,8 +350,7 @@
 		  if (x == 0)
 		    {
 		      cy = mpn_sub_1 (qp, qp, qn, 1);
-		      ASSERT_ALWAYS (cy == 0);
-		      return qh - cy;
+		      return qh;
 		    }
 		  x--;
 		}
diff -r fb0247ca2316 -r b75c1e49113f mpn/generic/toom6h_mul.c
--- a/mpn/generic/toom6h_mul.c	Mon Dec 21 21:57:23 2009 +0100
+++ b/mpn/generic/toom6h_mul.c	Mon Dec 21 22:09:09 2009 +0100
@@ -76,27 +76,16 @@
   do {	mpn_mul (p, a, na, b, nb);			\
   } while (0)
 
-/* S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
-   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
- */
-#define mpn_toom6h_mul_n_itch(n)					\
-( ((n) - MUL_TOOM6H_THRESHOLD)*2 +					\
-   MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,			\
-       mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
-
-mp_size_t
-mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
-  mp_size_t estimatedN;
-  estimatedN = (an + bn) / (size_t) 10 + 1;
-  return mpn_toom6h_mul_n_itch( estimatedN * 6 );
-}
-
 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
    With: an >= bn >= 46, an*6 <  bn * 17.
    It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
 
    Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
 */
+/* Estimate on needed scratch:
+   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
+   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
+ */
 
 void
 mpn_toom6h_mul   (mp_ptr pp,
@@ -250,9 +239,6 @@
 
 
 
-#ifndef SQR_TOOM6_THRESHOLD
-#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
-#endif
 
 #ifdef  SQR_TOOM8_THRESHOLD
 #define SQR_TOOM6_MAX (SQR_TOOM8_THRESHOLD+6*2-1)
diff -r fb0247ca2316 -r b75c1e49113f tune/common.c
--- a/tune/common.c	Mon Dec 21 21:57:23 2009 +0100
+++ b/tune/common.c	Mon Dec 21 22:09:09 2009 +0100
@@ -1009,6 +1009,11 @@
   SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
 }
 double
+speed_mpn_toom6_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
+}
+double
 speed_mpn_toom22_mul (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
@@ -1023,6 +1028,11 @@
 {
   SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
 }
+double
+speed_mpn_toom6h_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
+}
 
 double
 speed_mpn_toom32_mul (struct speed_params *s)
diff -r fb0247ca2316 -r b75c1e49113f tune/speed.c
--- a/tune/speed.c	Mon Dec 21 21:57:23 2009 +0100
+++ b/tune/speed.c	Mon Dec 21 22:09:09 2009 +0100
@@ -294,9 +294,11 @@
   { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
   { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
+  { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
   { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
   { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
   { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
+  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
   { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
   { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
diff -r fb0247ca2316 -r b75c1e49113f tune/speed.h
--- a/tune/speed.h	Mon Dec 21 21:57:23 2009 +0100
+++ b/tune/speed.h	Mon Dec 21 22:09:09 2009 +0100
@@ -276,9 +276,11 @@
 double speed_mpn_toom2_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom3_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom4_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom22_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom33_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom6h_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
@@ -1122,6 +1124,12 @@
      mpn_toom44_mul_itch (s->size, s->size),				\
      MPN_TOOM44_MUL_MINSIZE)
 
+#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function)			\
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
+    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
+     mpn_toom6h_mul_itch (s->size, s->size),				\
+     MPN_TOOM6H_MUL_MINSIZE)
+
 #define SPEED_ROUTINE_MPN_TOOM32_MUL(function)				\
   SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
     (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace),		\
@@ -1254,6 +1262,11 @@
 				mpn_toom4_sqr_itch (s->size),		\
 				MPN_TOOM4_SQR_MINSIZE)
 
+#define SPEED_ROUTINE_MPN_TOOM6_SQR(function)				\
+  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
+				mpn_toom6_sqr_itch (s->size),		\
+				MPN_TOOM6_SQR_MINSIZE)
+
 #define SPEED_ROUTINE_MPN_MOD_CALL(call)				\
   {									\
     unsigned   i;							\


More information about the gmp-commit mailing list