[Gmp-commit] /home/hgfiles/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Dec 23 17:32:21 CET 2009
details: /home/hgfiles/gmp/rev/3537e485a55b
changeset: 13202:3537e485a55b
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Dec 23 15:42:02 2009 +0100
description:
Nailify CRT code mod_bnm1.
details: /home/hgfiles/gmp/rev/e64503807a24
changeset: 13203:e64503807a24
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Dec 23 16:06:33 2009 +0100
description:
Split toom6h_mul and toom6_sqr.
details: /home/hgfiles/gmp/rev/61fe62428855
changeset: 13204:61fe62428855
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Wed Dec 23 17:31:50 2009 +0100
description:
New Toom-8.5, with test for _mul.
diffstat:
ChangeLog | 21 +-
configure.in | 4 +-
gmp-impl.h | 34 ++-
mpn/Makefile.am | 4 +-
mpn/generic/mulmod_bnm1.c | 9 +-
mpn/generic/sqrmod_bnm1.c | 9 +-
mpn/generic/toom6_sqr.c | 169 +++++++++++
mpn/generic/toom6h_mul.c | 146 +---------
mpn/generic/toom8_sqr.c | 206 ++++++++++++++
mpn/generic/toom8h_mul.c | 287 +++++++++++++++++++
mpn/generic/toom_interpolate_16pts.c | 512 +++++++++++++++++++++++++++++++++++
tests/mpn/Makefile.am | 2 +-
tests/mpn/t-toom8h.c | 21 +
13 files changed, 1266 insertions(+), 158 deletions(-)
diffs (truncated from 1594 to 300 lines):
diff -r a5b28ede335f -r 61fe62428855 ChangeLog
--- a/ChangeLog Wed Dec 23 15:00:21 2009 +0100
+++ b/ChangeLog Wed Dec 23 17:31:50 2009 +0100
@@ -1,17 +1,32 @@
2009-12-23 Marco Bodrato <bodrato at mail.dm.unipi.it>
- * mpn/generic/mullo_n.c: Split dc_mullo_n function; ALLOC memory at once.
+ * mpn/generic/toom8h_mul.c: New file.
+ * mpn/generic/toom8_sqr.c: New file.
+ * mpn/generic/toom_interpolate_16pts.c: New file.
+ * gmp-impl.h: Provide corresponding declarations.
+ * configure.in (gmp_mpn_functions): List toom_interpolate_16pts,
+ toom8h_mul, and toom8h_sqr.
+ * tests/mpn/t-toom8h.c: New test program.
+
+ * mpn/generic/toom6_sqr.c: New file, was part of toom6h_mul.
+ * mpn/generic/toom6h_mul.c: Removed _sqr.
+
+ * mpn/generic/mulmod_bnm1.c: Nailify CRT.
+ * mpn/generic/sqrmod_bnm1.c: Likewise.
+
+ * mpn/generic/mullo_n.c: Split dc_mullo_n function;
+ ALLOC memory at once.
* mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Update.
* mpn/generic/toom6h_mul.c: Add prefix to toom_interpolate_12pts.
* mpn/generic/toom_interpolate_12pts.c: Likewise.
- * mpn/generic/invertappr.c (mpn_bc_invertappr): If n=2, mpn_divrem_2.
+ * mpn/generic/invertappr.c (mpn_bc_invertappr): Use mpn_divrem_2.
* mpn/generic/invert.c: Faster basecase, use mpn_sbpi1_div_q.
* mpn/generic/toom_eval_pm2exp.c: Assert support for degree 3.
- * mpn/generic/toom6h_mul.c: Do not use obsolete _itch function any more.
+ * mpn/generic/toom6h_mul.c: Avoid obsolete _itch function.
2009-12-23 Torbjorn Granlund <tege at gmplib.org>
diff -r a5b28ede335f -r 61fe62428855 configure.in
--- a/configure.in Wed Dec 23 15:00:21 2009 +0100
+++ b/configure.in Wed Dec 23 17:31:50 2009 +0100
@@ -2506,13 +2506,13 @@
toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
toom33_mul toom43_mul toom53_mul toom63_mul \
toom44_mul \
- toom6h_mul \
+ toom6h_mul toom6_sqr toom8h_mul toom8_sqr \
toom_couple_handling \
toom2_sqr toom3_sqr toom4_sqr \
toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 \
toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp \
toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts \
- toom_interpolate_8pts toom_interpolate_12pts \
+ toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts \
invertappr invert binvert mulmod_bnm1 sqrmod_bnm1 \
sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \
dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q \
diff -r a5b28ede335f -r 61fe62428855 gmp-impl.h
--- a/gmp-impl.h Wed Dec 23 15:00:21 2009 +0100
+++ b/gmp-impl.h Wed Dec 23 17:31:50 2009 +0100
@@ -1051,6 +1051,9 @@
#define MPN_TOOM6H_MUL_MINSIZE 46
#define MPN_TOOM6_SQR_MINSIZE 46
+#define MPN_TOOM8H_MUL_MINSIZE 86
+#define MPN_TOOM8_SQR_MINSIZE 86
+
#define MPN_TOOM32_MUL_MINSIZE 10
#define MPN_TOOM42_MUL_MINSIZE 10
#define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */
@@ -1077,6 +1080,9 @@
#define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
__GMP_DECLSPEC void mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
+__GMP_DECLSPEC void mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+
#define mpn_toom_couple_handling __MPN(toom_couple_handling)
__GMP_DECLSPEC void toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
@@ -1143,6 +1149,12 @@
#define mpn_toom6_sqr __MPN(toom6_sqr)
__GMP_DECLSPEC void mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_toom8h_mul __MPN(toom8h_mul)
+__GMP_DECLSPEC void mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+
+#define mpn_toom8_sqr __MPN(toom8_sqr)
+__GMP_DECLSPEC void mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
#define mpn_fft_best_k __MPN(fft_best_k)
__GMP_DECLSPEC int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
@@ -1684,6 +1696,14 @@
#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
#endif
+#ifndef MUL_TOOM8H_THRESHOLD
+#define MUL_TOOM8H_THRESHOLD 450
+#endif
+
+#ifndef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
+#endif
+
#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
#endif
@@ -4467,7 +4487,19 @@
mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
mp_size_t estimatedN;
estimatedN = (an + bn) / (size_t) 10 + 1;
- return mpn_toom6_sqr_itch( estimatedN * 6 );
+ return mpn_toom6_sqr_itch (estimatedN * 6);
+}
+
+#define mpn_toom8_sqr_itch(n) \
+( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) + \
+ MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
+ mpn_toom6_sqr_itch(MUL_TOOM8H_THRESHOLD)) )
+
+static inline mp_size_t
+mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
+ mp_size_t estimatedN;
+ estimatedN = (an + bn) / (size_t) 14 + 1;
+ return mpn_toom8_sqr_itch (estimatedN * 8);
}
static inline mp_size_t
diff -r a5b28ede335f -r 61fe62428855 mpn/Makefile.am
--- a/mpn/Makefile.am Wed Dec 23 15:00:21 2009 +0100
+++ b/mpn/Makefile.am Wed Dec 23 17:31:50 2009 +0100
@@ -48,13 +48,13 @@
toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c \
toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c \
toom44_mul.c \
- toom6h_mul.c \
+ toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c \
toom_couple_handling.c \
sqr_toom2.c sqr_toom3.c sqr_toom4.c \
toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c \
toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c \
toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c \
- toom_interpolate_8pts.c toom_interpolate_12pts.c \
+ toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c \
mullo_n.c mullo_basecase.c nand_n.c neg_n.c nior_n.c perfsqr.c \
popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
diff -r a5b28ede335f -r 61fe62428855 mpn/generic/mulmod_bnm1.c
--- a/mpn/generic/mulmod_bnm1.c Wed Dec 23 15:00:21 2009 +0100
+++ b/mpn/generic/mulmod_bnm1.c Wed Dec 23 17:31:50 2009 +0100
@@ -246,8 +246,8 @@
cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
hi = cy << (GMP_NUMB_BITS - 1);
cy = 0;
- /* next add_ssaaaa will set cy = 1 only if rp[n-1]+=hi overflows,
- i.e. a further increment will not overflow again. */
+ /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+ overflows, i.e. a further increment will not overflow again. */
#else /* ! _nc */
cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
@@ -255,7 +255,12 @@
/* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
#endif
+#if GMP_NAIL_BITS == 0
add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+ cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+ rp[n-1] ^= hi;
+#endif
#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
#if HAVE_NATIVE_mpn_add_nc
cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
diff -r a5b28ede335f -r 61fe62428855 mpn/generic/sqrmod_bnm1.c
--- a/mpn/generic/sqrmod_bnm1.c Wed Dec 23 15:00:21 2009 +0100
+++ b/mpn/generic/sqrmod_bnm1.c Wed Dec 23 17:31:50 2009 +0100
@@ -201,8 +201,8 @@
cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
hi = cy << (GMP_NUMB_BITS - 1);
cy = 0;
- /* next add_ssaaaa will set cy = 1 only if rp[n-1]+=hi overflows,
- i.e. a further increment will not overflow again. */
+ /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
+ overflows, i.e. a further increment will not overflow again. */
#else /* ! _nc */
cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
@@ -210,7 +210,12 @@
/* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
#endif
+#if GMP_NAIL_BITS == 0
add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else
+ cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
+ rp[n-1] ^= hi;
+#endif
#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
#if HAVE_NATIVE_mpn_add_nc
cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
diff -r a5b28ede335f -r 61fe62428855 mpn/generic/toom6_sqr.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/toom6_sqr.c Wed Dec 23 17:31:50 2009 +0100
@@ -0,0 +1,169 @@
+/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NUMB_BITS < 21
+#error Not implemented.
+#endif
+
+
+#ifdef SQR_TOOM8_THRESHOLD
+#define SQR_TOOM6_MAX (SQR_TOOM8_THRESHOLD+6*2-1)
+#else
+#define SQR_TOOM6_MAX (SQR_FFT_THRESHOLD+6*2-1)
+#endif
+
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_sqr_basecase 1
+#define MAYBE_sqr_above_basecase 1
+#define MAYBE_sqr_toom2 1
+#define MAYBE_sqr_above_toom2 1
+#define MAYBE_sqr_toom3 1
+#define MAYBE_sqr_above_toom3 1
+#define MAYBE_sqr_above_toom4 1
+#else
+#define MAYBE_sqr_basecase \
+ (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_above_basecase \
+ (SQR_TOOM6_MAX >= 6 * SQR_TOOM2_THRESHOLD)
+#define MAYBE_sqr_toom2 \
+ (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_above_toom2 \
+ (SQR_TOOM6_MAX >= 6 * SQR_TOOM3_THRESHOLD)
+#define MAYBE_sqr_toom3 \
+ (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom3 \
+ (SQR_TOOM6_MAX >= 6 * SQR_TOOM4_THRESHOLD)
+#define MAYBE_sqr_above_toom4 \
+ (SQR_TOOM6_MAX >= 6 * SQR_TOOM6_THRESHOLD)
+#endif
+
+#define TOOM6_SQR_REC(p, a, n, ws) \
+ do { \
+ if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \
+ || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) \
+ mpn_sqr_basecase (p, a, n); \
+ else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
+ || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) \
+ mpn_toom2_sqr (p, a, n, ws); \
+ else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
+ || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) \
+ mpn_toom3_sqr (p, a, n, ws); \
+ else if (! MAYBE_sqr_above_toom4 \
+ || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)) \
+ mpn_toom4_sqr (p, a, n, ws); \
+ else \
+ mpn_toom6_sqr (p, a, n, ws); \
+ } while (0)
+
+void
+mpn_toom6_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
+{
+ mp_size_t n, s;
+
+ /***************************** decomposition *******************************/
+
+ ASSERT( an >= 18 );
+
More information about the gmp-commit
mailing list