[Gmp-commit] /home/hgfiles/gmp: 3 new changesets

Wed Dec 23 17:32:21 CET 2009

details:   /home/hgfiles/gmp/rev/3537e485a55b
changeset: 13202:3537e485a55b
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Dec 23 15:42:02 2009 +0100
description:
Nailify CRT code mod_bnm1.

details:   /home/hgfiles/gmp/rev/e64503807a24
changeset: 13203:e64503807a24
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Dec 23 16:06:33 2009 +0100
description:
Split toom6h_mul and toom6_sqr.

details:   /home/hgfiles/gmp/rev/61fe62428855
changeset: 13204:61fe62428855
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Dec 23 17:31:50 2009 +0100
description:
New Toom-8.5, with test for _mul.

diffstat:

 ChangeLog                            |   21 +-
 configure.in                         |    4 +-
 gmp-impl.h                           |   34 ++-
 mpn/Makefile.am                      |    4 +-
 mpn/generic/mulmod_bnm1.c            |    9 +-
 mpn/generic/sqrmod_bnm1.c            |    9 +-
 mpn/generic/toom6_sqr.c              |  169 +++++++++++
 mpn/generic/toom6h_mul.c             |  146 +---------
 mpn/generic/toom8_sqr.c              |  206 ++++++++++++++
 mpn/generic/toom8h_mul.c             |  287 +++++++++++++++++++
 mpn/generic/toom_interpolate_16pts.c |  512 +++++++++++++++++++++++++++++++++++
 tests/mpn/Makefile.am                |    2 +-
 tests/mpn/t-toom8h.c                 |   21 +
 13 files changed, 1266 insertions(+), 158 deletions(-)

diffs (truncated from 1594 to 300 lines):

diff -r a5b28ede335f -r 61fe62428855 ChangeLog

--- a/ChangeLog	Wed Dec 23 15:00:21 2009 +0100
+++ b/ChangeLog	Wed Dec 23 17:31:50 2009 +0100
@@ -1,17 +1,32 @@
 2009-12-23  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
-	* mpn/generic/mullo_n.c: Split dc_mullo_n function; ALLOC memory at once.
+	* mpn/generic/toom8h_mul.c: New file.
+	* mpn/generic/toom8_sqr.c: New file.
+	* mpn/generic/toom_interpolate_16pts.c: New file.
+	* gmp-impl.h: Provide corresponding declarations.
+	* configure.in (gmp_mpn_functions): List toom_interpolate_16pts,
+	toom8h_mul, and toom8h_sqr.
+	* tests/mpn/t-toom8h.c: New test program.
+
+	* mpn/generic/toom6_sqr.c: New file, was part of toom6h_mul.
+	* mpn/generic/toom6h_mul.c: Removed _sqr.
+
+	* mpn/generic/mulmod_bnm1.c: Nailify CRT.
+	* mpn/generic/sqrmod_bnm1.c: Likewise.
+
+	* mpn/generic/mullo_n.c: Split dc_mullo_n function;
+	ALLOC memory at once.
 
 	* mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Update.
 
 	* mpn/generic/toom6h_mul.c: Add prefix to toom_interpolate_12pts.
 	* mpn/generic/toom_interpolate_12pts.c: Likewise.
 
-	* mpn/generic/invertappr.c (mpn_bc_invertappr): If n=2, mpn_divrem_2.
+	* mpn/generic/invertappr.c (mpn_bc_invertappr): Use mpn_divrem_2.
 	* mpn/generic/invert.c: Faster basecase, use mpn_sbpi1_div_q.
 
 	* mpn/generic/toom_eval_pm2exp.c: Assert support for degree 3.
-	* mpn/generic/toom6h_mul.c: Do not use obsolete _itch function any more.
+	* mpn/generic/toom6h_mul.c: Avoid obsolete _itch function.
 
 2009-12-23  Torbjorn Granlund  <tege at gmplib.org>
 
diff -r a5b28ede335f -r 61fe62428855 configure.in
--- a/configure.in	Wed Dec 23 15:00:21 2009 +0100
+++ b/configure.in	Wed Dec 23 17:31:50 2009 +0100
@@ -2506,13 +2506,13 @@
   toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul		   \
   toom33_mul toom43_mul toom53_mul toom63_mul				   \
   toom44_mul								   \
-  toom6h_mul								   \
+  toom6h_mul toom6_sqr toom8h_mul toom8_sqr				   \
   toom_couple_handling							   \
   toom2_sqr toom3_sqr toom4_sqr						   \
   toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 				   \
   toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp	   \
   toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts	   \
-  toom_interpolate_8pts toom_interpolate_12pts				   \
+  toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts	   \
   invertappr invert binvert mulmod_bnm1 sqrmod_bnm1			   \
   sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q				   \
   dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q				   \
diff -r a5b28ede335f -r 61fe62428855 gmp-impl.h
--- a/gmp-impl.h	Wed Dec 23 15:00:21 2009 +0100
+++ b/gmp-impl.h	Wed Dec 23 17:31:50 2009 +0100
@@ -1051,6 +1051,9 @@
 #define MPN_TOOM6H_MUL_MINSIZE   46
 #define MPN_TOOM6_SQR_MINSIZE    46
 
+#define MPN_TOOM8H_MUL_MINSIZE   86
+#define MPN_TOOM8_SQR_MINSIZE    86
+
 #define MPN_TOOM32_MUL_MINSIZE   10
 #define MPN_TOOM42_MUL_MINSIZE   10
 #define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
@@ -1077,6 +1080,9 @@
 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
 __GMP_DECLSPEC void      mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
 
+#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
+__GMP_DECLSPEC void      mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+
 #define   mpn_toom_couple_handling __MPN(toom_couple_handling)
 __GMP_DECLSPEC void      toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
 
@@ -1143,6 +1149,12 @@
 #define   mpn_toom6_sqr __MPN(toom6_sqr)
 __GMP_DECLSPEC void      mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 
+#define   mpn_toom8h_mul __MPN(toom8h_mul)
+__GMP_DECLSPEC void      mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define   mpn_toom8_sqr __MPN(toom8_sqr)
+__GMP_DECLSPEC void      mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
 #define   mpn_fft_best_k __MPN(fft_best_k)
 __GMP_DECLSPEC int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
 
@@ -1684,6 +1696,14 @@
 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
 #endif
 
+#ifndef MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD            450
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
 #endif
@@ -4467,7 +4487,19 @@
 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
   mp_size_t estimatedN;
   estimatedN = (an + bn) / (size_t) 10 + 1;
-  return mpn_toom6_sqr_itch( estimatedN * 6 );
+  return mpn_toom6_sqr_itch (estimatedN * 6);
+}
+
+#define mpn_toom8_sqr_itch(n)						\
+( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) +			\
+   MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,		\
+       mpn_toom6_sqr_itch(MUL_TOOM8H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
+  mp_size_t estimatedN;
+  estimatedN = (an + bn) / (size_t) 14 + 1;
+  return mpn_toom8_sqr_itch (estimatedN * 8);
 }
 
 static inline mp_size_t
diff -r a5b28ede335f -r 61fe62428855 mpn/Makefile.am
--- a/mpn/Makefile.am	Wed Dec 23 15:00:21 2009 +0100
+++ b/mpn/Makefile.am	Wed Dec 23 17:31:50 2009 +0100
@@ -48,13 +48,13 @@
   toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c	    \
   toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c			    \
   toom44_mul.c								    \
-  toom6h_mul.c								    \
+  toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c			    \
   toom_couple_handling.c						    \
   sqr_toom2.c sqr_toom3.c sqr_toom4.c					    \
   toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c				    \
   toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c    \
   toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c   \
-  toom_interpolate_8pts.c toom_interpolate_12pts.c			    \
+  toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
   invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c		    \
   mullo_n.c mullo_basecase.c nand_n.c neg_n.c nior_n.c perfsqr.c	    \
   popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
diff -r a5b28ede335f -r 61fe62428855 mpn/generic/mulmod_bnm1.c
--- a/mpn/generic/mulmod_bnm1.c	Wed Dec 23 15:00:21 2009 +0100
+++ b/mpn/generic/mulmod_bnm1.c	Wed Dec 23 17:31:50 2009 +0100
@@ -246,8 +246,8 @@
       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
       hi = cy << (GMP_NUMB_BITS - 1);
       cy = 0;
-      /* next add_ssaaaa will set cy = 1 only if rp[n-1]+=hi overflows,
-	 i.e. a further increment will not overflow again. */
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+	 overflows, i.e. a further increment will not overflow again. */
 #else /* ! _nc */
       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
@@ -255,7 +255,12 @@
       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
 	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
 #endif
+#if GMP_NAIL_BITS == 0
       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
 #if HAVE_NATIVE_mpn_add_nc
       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
diff -r a5b28ede335f -r 61fe62428855 mpn/generic/sqrmod_bnm1.c
--- a/mpn/generic/sqrmod_bnm1.c	Wed Dec 23 15:00:21 2009 +0100
+++ b/mpn/generic/sqrmod_bnm1.c	Wed Dec 23 17:31:50 2009 +0100
@@ -201,8 +201,8 @@
       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
       hi = cy << (GMP_NUMB_BITS - 1);
       cy = 0;
-      /* next add_ssaaaa will set cy = 1 only if rp[n-1]+=hi overflows,
-	 i.e. a further increment will not overflow again. */
+      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+	 overflows, i.e. a further increment will not overflow again. */
 #else /* ! _nc */
       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
@@ -210,7 +210,12 @@
       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
 	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
 #endif
+#if GMP_NAIL_BITS == 0
       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+      rp[n-1] ^= hi;
+#endif
 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
 #if HAVE_NATIVE_mpn_add_nc
       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
diff -r a5b28ede335f -r 61fe62428855 mpn/generic/toom6_sqr.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/toom6_sqr.c	Wed Dec 23 17:31:50 2009 +0100
@@ -0,0 +1,169 @@
+/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+
+#ifdef  SQR_TOOM8_THRESHOLD
+#define SQR_TOOM6_MAX (SQR_TOOM8_THRESHOLD+6*2-1)
+#else
+#define SQR_TOOM6_MAX (SQR_FFT_THRESHOLD+6*2-1)
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase   1
+#define MAYBE_sqr_toom2   1
+#define MAYBE_sqr_above_toom2   1
+#define MAYBE_sqr_toom3   1
+#define MAYBE_sqr_above_toom3   1
+#define MAYBE_sqr_above_toom4   1
+#else
+#define MAYBE_sqr_basecase					\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase				\
+  (SQR_TOOM6_MAX >= 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2						\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2					\
+  (SQR_TOOM6_MAX >= 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3						\
+  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3					\
+  (SQR_TOOM6_MAX >= 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom4					\
+  (SQR_TOOM6_MAX >= 6 * SQR_TOOM6_THRESHOLD)
+#endif
+
+#define TOOM6_SQR_REC(p, a, n, ws)					\
+  do {									\
+    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
+	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
+      mpn_sqr_basecase (p, a, n);					\
+    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
+      mpn_toom2_sqr (p, a, n, ws);					\
+    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
+	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
+      mpn_toom3_sqr (p, a, n, ws);					\
+    else if (! MAYBE_sqr_above_toom4					\
+	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
+      mpn_toom4_sqr (p, a, n, ws);					\
+    else								\
+      mpn_toom6_sqr (p, a, n, ws);					\
+  } while (0)
+
+void
+mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+  mp_size_t n, s;
+
+  /***************************** decomposition *******************************/
+
+  ASSERT( an >= 18 );
+