[Gmp-commit] /home/hgfiles/gmp: 7 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Dec 8 17:01:35 CET 2009
details: /home/hgfiles/gmp/rev/44b566283c7f
changeset: 13007:44b566283c7f
user: Niels M?ller <nisse at lysator.liu.se>
date: Thu Dec 03 20:43:03 2009 +0100
description:
Re implemented binary gcdext_1, with proper canonicalization.
details: /home/hgfiles/gmp/rev/790fb23fbb19
changeset: 13008:790fb23fbb19
user: Niels M?ller <nisse at lysator.liu.se>
date: Thu Dec 03 22:55:18 2009 +0100
description:
Various masking tricks for binary gcdext_1.
details: /home/hgfiles/gmp/rev/3ed6567668c1
changeset: 13009:3ed6567668c1
user: Niels M?ller <nisse at lysator.liu.se>
date: Thu Dec 03 22:56:21 2009 +0100
description:
Trivial merge.
details: /home/hgfiles/gmp/rev/24de3c23543c
changeset: 13010:24de3c23543c
user: Niels M?ller <nisse at lysator.liu.se>
date: Tue Dec 08 16:16:02 2009 +0100
description:
Trivial merge.
details: /home/hgfiles/gmp/rev/2518904c4136
changeset: 13011:2518904c4136
user: Niels M?ller <nisse at lysator.liu.se>
date: Tue Dec 08 16:52:22 2009 +0100
description:
Use table lookup for count_trailing_zeros.
details: /home/hgfiles/gmp/rev/0b49591e33df
changeset: 13012:0b49591e33df
user: Niels M?ller <nisse at lysator.liu.se>
date: Tue Dec 08 16:52:51 2009 +0100
description:
Use divexact to compute second cofactor in mpn_gcdext.
details: /home/hgfiles/gmp/rev/190a9759ee23
changeset: 13013:190a9759ee23
user: Niels M?ller <nisse at lysator.liu.se>
date: Tue Dec 08 16:54:55 2009 +0100
description:
Deleted left-over debugging code.
diffstat:
ChangeLog | 143 +++++++++++++++++-
acinclude.m4 | 4 +-
configure.in | 44 +++-
gmp-impl.h | 48 +++--
mpn/Makefile.am | 2 +-
mpn/alpha/add_n.asm | 203 ++++++++++++++----------
mpn/alpha/com_n.asm | 165 ++++++++++++++++++++
mpn/alpha/diveby3.asm | 322 ----------------------------------------
mpn/alpha/ev5/add_n.asm | 146 ------------------
mpn/alpha/ev5/com_n.asm | 165 --------------------
mpn/alpha/ev5/diveby3.asm | 321 ++++++++++++++++++++++++++++++++++++++++
mpn/alpha/ev5/gmp-mparam.h | 6 +-
mpn/alpha/ev5/lshift.asm | 171 ---------------------
mpn/alpha/ev5/rshift.asm | 169 ---------------------
mpn/alpha/ev5/sub_n.asm | 146 ------------------
mpn/alpha/ev6/gmp-mparam.h | 6 +-
mpn/alpha/gmp-mparam.h | 6 +-
mpn/alpha/lshift.asm | 160 ++++++++++++++-----
mpn/alpha/rshift.asm | 160 ++++++++++++++-----
mpn/alpha/sub_n.asm | 207 ++++++++++++++-----------
mpn/arm/gmp-mparam.h | 6 +-
mpn/asm-defs.m4 | 2 +-
mpn/generic/bdiv_q.c | 12 +-
mpn/generic/bdiv_qr.c | 12 +-
mpn/generic/binvert.c | 2 +-
mpn/generic/dcpi1_bdiv_q.c | 4 +-
mpn/generic/gcdext.c | 79 ++++++++-
mpn/generic/gcdext_1.c | 238 +++++++++++++++++++++++++++++
mpn/generic/gcdext_lehmer.c | 2 +-
mpn/generic/gcdext_subdiv_step.c | 2 +-
mpn/generic/mod_1_1.c | 2 +-
mpn/generic/mu_bdiv_q.c | 12 +-
mpn/generic/mu_bdiv_qr.c | 10 +-
mpn/generic/mullo_basecase.c | 41 +++++
mpn/generic/mullo_n.c | 215 ++++++++++++++++++++++++++
mpn/generic/mullow_basecase.c | 41 -----
mpn/generic/mullow_n.c | 215 --------------------------
mpn/generic/mulmod_bnm1.c | 4 +-
mpn/generic/perfpow.c | 4 +-
mpn/generic/powlo.c | 6 +-
mpn/generic/powm.c | 15 +-
mpn/generic/powm_sec.c | 147 +++++++++++++++---
mpn/generic/redc_1.c | 7 +-
mpn/generic/redc_1_sec.c | 45 +++++
mpn/generic/redc_2.c | 1 +
mpn/generic/redc_n.c | 2 +-
mpn/generic/sbpi1_bdiv_q.c | 39 ++--
mpn/generic/toom33_mul.c | 4 +-
mpn/generic/toom3_sqr.c | 2 +-
mpn/generic/toom42_mul.c | 4 +-
mpn/generic/toom43_mul.c | 8 +-
mpn/generic/toom44_mul.c | 20 +-
mpn/generic/toom4_sqr.c | 3 +-
mpn/generic/toom52_mul.c | 72 +--------
mpn/generic/toom53_mul.c | 18 +-
mpn/generic/toom62_mul.c | 15 +-
mpn/generic/toom_eval_dgr3_pm1.c | 3 +-
mpn/generic/toom_eval_dgr3_pm2.c | 3 +-
mpn/generic/toom_eval_pm1.c | 2 +-
mpn/generic/toom_eval_pm2.c | 106 +++++++------
mpn/generic/toom_eval_pm2exp.c | 2 +-
mpn/generic/toom_interpolate_5pts.c | 18 +-
mpn/generic/toom_interpolate_7pts.c | 3 +-
mpn/ia64/gmp-mparam.h | 6 +-
mpn/minithres/gmp-mparam.h | 40 +++-
mpn/pa32/hppa2_0/gmp-mparam.h | 6 +-
mpn/pa64/gmp-mparam.h | 6 +-
mpn/powerpc32/750/gmp-mparam.h | 6 +-
mpn/powerpc32/gmp-mparam.h | 6 +-
mpn/powerpc64/mode64/diveby3.asm | 83 ----------
mpn/powerpc64/mode64/gmp-mparam.h | 6 +-
mpn/powerpc64/mode64/p3/gmp-mparam.h | 6 +-
mpn/powerpc64/mode64/p4/gmp-mparam.h | 6 +-
mpn/powerpc64/mode64/p5/gmp-mparam.h | 6 +-
mpn/sparc32/v9/gmp-mparam.h | 6 +-
mpn/sparc64/gmp-mparam.h | 6 +-
mpn/x86/atom/gmp-mparam.h | 6 +-
mpn/x86/k6/gmp-mparam.h | 6 +-
mpn/x86/k7/gmp-mparam.h | 6 +-
mpn/x86/p6/mmx/gmp-mparam.h | 6 +-
mpn/x86/p6/sse2/gmp-mparam.h | 6 +-
mpn/x86/pentium/mmx/gmp-mparam.h | 6 +-
mpn/x86/pentium4/sse2/gmp-mparam.h | 6 +-
mpn/x86_64/atom/gmp-mparam.h | 6 +-
mpn/x86_64/core2/gmp-mparam.h | 6 +-
mpn/x86_64/corei/gmp-mparam.h | 6 +-
mpn/x86_64/gmp-mparam.h | 6 +-
mpn/x86_64/nano/gmp-mparam.h | 6 +-
mpn/x86_64/pentium4/gmp-mparam.h | 6 +-
mpz/powm.c | 4 +-
tests/devel/try.c | 47 +++++-
tests/mpn/Makefile.am | 2 +-
tests/mpn/t-bdiv.c | 167 ++++++++++++++++++++
tests/mpn/t-mullo.c | 12 +-
tests/refmpn.c | 56 ++++--
tests/tests.h | 10 +-
tune/Makefile.am | 4 +-
tune/common.c | 27 +--
tune/speed.c | 12 +-
tune/speed.h | 54 +++++-
tune/tuneup.c | 81 +++++----
101 files changed, 2532 insertions(+), 2253 deletions(-)
diffs (truncated from 6734 to 300 lines):
diff -r f48329882a7b -r 190a9759ee23 ChangeLog
--- a/ChangeLog Thu Dec 03 19:13:46 2009 +0100
+++ b/ChangeLog Tue Dec 08 16:54:55 2009 +0100
@@ -1,5 +1,134 @@
+2009-12-08 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]: Use
+ table lookup for count_trailing_zeros. Binary algorithm still
+ disabled by default.
+
+ * mpn/generic/gcdext.c (divexact): Local definition of divexact,
+ using mpn_bdiv_q.
+ (compute_v): Use it.
+
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-bdiv.
+
+ * tests/mpn/t-bdiv.c: New file.
+
+ * mpn/generic/bdiv_q.c (mpn_bdiv_q): Fixed bad quotient length,
+ should have qn == nn.
+
+ * mpn/generic/bdiv_qr.c (mpn_bdiv_qr): Pass correct nn length to
+ the lower-level functions.
+
+2009-12-08 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/speed.c (routine): Add mpn_invert.
+
+ * tune/tuneup.c (tune_invert): New function.
+ * tune/speed.h (SPEED_ROUTINE_MPN_INVERT): New macro.
+ * tune/common.c (speed_mpn_invert): New function.
+ * gmp-impl.h: Provide declarations for corresponding threshold var.
+ * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add invert.c.
+
+2009-12-08 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * tests/devel/try.c: Test mpn_addlsh2_n and mpn_{add,sub}lsh_n;
+ mpn_rsblsh_n now tests all shift values.
+ * tests/refmpn.c (refmpn_addlsh_n, refmpn_sublsh_n): New functions.
+ (refmpn_addlsh1_n): Use generic refmpn_addlsh_n.
+ (refmpn_sublsh1_n): Use generic refmpn_sublsh_n.
+ (refmpn_addlsh2_n): New function.
+ * tests/tests.h: Declare new functions.
+
+2009-12-06 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/tuneup.c (tune_mulmod_bnm1): Up min_size to 12.
+
+ * Globally: Rename *mullow* to *mullo*, *MULLOW* to *MULLO*.
+
+ * configure.in: Don't include ev5 directory for ev6* and ev7. Misc
+ alpha path cleanups.
+ * mpn/alpha/add_n.asm: Replaced by mpn/alpha/ev5/add_n.asm.
+ * mpn/alpha/sub_n.asm: Replaced by mpn/alpha/ev5/sub_n.asm.
+ * mpn/alpha/lshift.asm: Replaced by mpn/alpha/ev5/lshift.asm.
+ * mpn/alpha/rshift.asm: Replaced by mpn/alpha/ev5/rshift.asm.
+ * mpn/alpha/com_n.asm: New, moved from mpn/alpha/ev5/rshift.asm.
+ * mpn/alpha/ev5/diveby3.asm: New, moved from mpn/alpha/diveby3.asm.
+
+ * mpn/powerpc64/mode64/diveby3.asm: Remove, it is slower than
+ mpn_bdiv_dbm1c on all hardware.
+
+ * mpn/generic/powm_sec.c: Rework logic for mpn_sqr_basecase size limit.
+
+ * gmp-impl.h (mpn_redc_1_sec): Declare.
+ * configure.in (gmp_mpn_functions): Add redc_1_sec.
+
+2009-12-06 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * tests/devel/try.c(try_one): DATA_SRC0_HIGHBIT sets the high bit.
+
+2009-12-05 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/generic/toom_eval_dgr3_pm1.c: Change return value: 0 or ~0.
+ * mpn/generic/toom_eval_dgr3_pm2.c: Likewise.
+ * mpn/generic/toom_eval_pm1.c: Likewise.
+ * mpn/generic/toom_eval_pm2exp.c: Likewise.
+ * mpn/generic/toom_eval_pm2.c: Rewrite to use mpn_addlsh2_n.
+
+ * mpn/generic/toom_interpolate_5pts.c: Param sa is a flag, not a sign.
+
+ * mpn/generic/toom33_mul.c: Adapt to changes above.
+ * mpn/generic/toom3_sqr.c: Likewise.
+ * mpn/generic/toom42_mul.c: Likewise.
+ * mpn/generic/toom43_mul.c: Reduce branches.
+ * mpn/generic/toom44_mul.c: Likewise.
+ * mpn/generic/toom53_mul.c: Likewise.
+ * mpn/generic/toom62_mul.c: Likewise.
+
+ * mpn/generic/toom52_mul.c: Use toom_eval_ functions.
+
+ * mpn/generic/toom4_sqr.c: Avoid C99 construct.
+ * mpn/generic/toom_interpolate_7pts.c: Likewise.
+
+2009-12-05 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/redc_1_sec.c: New file.
+ * mpn/generic/powm_sec.c: Use redc_1_sec. Use dummy full subtract
+ instead of mpn_cmp since the latter leaks to the side channel.
+ (mpn_local_sqr_n): New function, with associated macros.
+ (mpn_powm_sec): Use mpn_local_sqr_n.
+
+ * configure.in (HAVE_NATIVE): Add missing functions, then sort.
+
+2009-12-04 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/tuneup.c (tune_dc_div): Up min_size to 6.
+ (tune_mod_1): Set MOD_1_1_THRESHOLD min_size to 2.
+
+ * tune/speed.h: Negate "binvert"-type inverses, as required.
+
+ * mpn/generic/redc_1.c: Add ASSERTs.
+ * mpn/generic/redc_2.c: Likewise.
+
+ * mpn/generic/sbpi1_bdiv_q.c: Simplify loops, indexing.
+
+2009-12-03 Yann Droneaud <yann at droneaud.fr>
+
+ * acinclude.m4 ([long long reliability test 1]): Add a "static" for C99
+ inline semantics compatibility.
+
+2009-12-03 Torbjorn Granlund <tege at gmplib.org>
+
+ * configure.in: Move intptr_t test into common AC_CHECK_TYPES.
+
+ * mpn/generic/gcdext.c: Add a TMP_FREE.
+
2009-12-03 Niels Möller <nisse at lysator.liu.se>
+ * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]:
+ Added various masking tricks.
+
+ * mpn/generic/gcdext_1.c (mpn_gcdext_1) [GCDEXT_1_USE_BINARY]:
+ Reimplemented binary gcdext, with proper canonicalization.
+
* mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Handle v == 0
from mpn_gcdext_1.
* mpn/generic/gcdext_1.c (mpn_gcdext_1): Allow inputs with a < b,
@@ -21,9 +150,9 @@
2009-12-02 Marco Bodrato <bodrato at mail.dm.unipi.it>
* tests/devel/try.c: Test mpn_rsblsh2_n and mpn_rsblsh_n.
- * tests/refmpn.c (refmpn_rsblsh_n, refmpn_rsblsh2_n): New functions.
+ * tests/refmpn.c (refmpn_rsblsh_n, refmpn_rsblsh2_n): New functions.
(refmpn_rsblsh1_n): Use generic refmpn_rsblsh_n.
- * tests/tests.h: Declare new functions.
+ * tests/tests.h: Declare new functions.
2009-12-03 Niels Möller <nisse at lysator.liu.se>
@@ -53,7 +182,7 @@
(tune_binvert): New function.
* tune/speed.h (SPEED_ROUTINE_MPN_BINVERT): New macro.
* tune/common.c (speed_mpn_binvert): New function.
- * gmp-impl.h: Provide declarations for corresponding threshold vars.
+ * gmp-impl.h: Provide declarations for corresponding threshold var.
* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add binvert.c.
* tune/tuneup.c: Tune DC_BDIV_QR_THRESHOLD and DC_BDIV_Q_THRESHOLD.
@@ -121,8 +250,8 @@
2009-11-28 Marco Bodrato <bodrato at mail.dm.unipi.it>
- * mpn/generic/mulmod_bnm1.c: Basecases made simpler,
- this also corrects a bug affecting previous version.
+ * mpn/generic/mulmod_bnm1.c: Basecases made simpler, this also corrects
+ a bug affecting previous version.
2009-11-28 Torbjorn Granlund <tege at gmplib.org>
@@ -209,7 +338,7 @@
2009-11-21 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/generic/mullow_n.c: Disable Mulders' trick for small operands,
- use fft for bigger ones.
+ use fft for bigger ones.
* tests/mpn/t-mullo.c: New test file.
2009-11-22 Torbjorn Granlund <tege at gmplib.org>
@@ -349,7 +478,7 @@
* mpn/x86_64/x86_64-defs.m4 (CPUVEC_FUNCS_LIST): New, copied from
mpn/x86/x86-defs.m4.
* configure.in: Move down x86 fat setup code until after ABI has been
- determined; generalize to handle x86_64.
+ determined; generalise to handle x86_64.
2009-11-09 Torbjorn Granlund <tege at gmplib.org>
diff -r f48329882a7b -r 190a9759ee23 acinclude.m4
--- a/acinclude.m4 Thu Dec 03 19:13:46 2009 +0100
+++ b/acinclude.m4 Tue Dec 08 16:54:55 2009 +0100
@@ -112,7 +112,7 @@
dnl instead of gmp.h, since that file isn't generated until the end of the
dnl configure.
dnl
-dnl Dummy values for GMP_LIMB_BITS and GMP_LIMB_BITS are enough
+dnl Dummy value for GMP_LIMB_BITS is enough
dnl for all current configure-time uses of gmp.h.
define(GMP_INCLUDE_GMP_H,
@@ -591,7 +591,7 @@
#if defined (__GNUC__) && ! defined (__cplusplus)
typedef unsigned long long t1;typedef t1*t2;
-__inline__ t1 e(t2 rp,t2 up,int n,t1 v0)
+static __inline__ t1 e(t2 rp,t2 up,int n,t1 v0)
{t1 c,x,r;int i;if(v0){c=1;for(i=1;i<n;i++){x=up[i];r=x+1;rp[i]=r;}}return c;}
f(){static const struct{t1 n;t1 src[9];t1 want[9];}d[]={{1,{0},{1}},};t1 got[9];int i;
for(i=0;i<1;i++){if(e(got,got,9,d[i].n)==0)h();g(i,d[i].src,d[i].n,got,d[i].want,9);if(d[i].n)h();}}
diff -r f48329882a7b -r 190a9759ee23 configure.in
--- a/configure.in Thu Dec 03 19:13:46 2009 +0100
+++ b/configure.in Tue Dec 08 16:54:55 2009 +0100
@@ -407,11 +407,14 @@
alpha*-*-*)
AC_DEFINE(HAVE_HOST_CPU_FAMILY_alpha)
case $host_cpu in
- alphaev5* | alphapca5*) path="alpha/ev5 alpha" ;;
+ alphaev5* | alphapca5*)
+ path="alpha/ev5 alpha" ;;
alphaev67 | alphaev68 | alphaev7*)
- path="alpha/ev67 alpha/ev6 alpha/ev5 alpha" ;;
- alphaev6* | alphaev7*) path="alpha/ev6 alpha/ev5 alpha" ;;
- *) path="alpha" ;;
+ path="alpha/ev67 alpha/ev6 alpha" ;;
+ alphaev6)
+ path="alpha/ev6 alpha" ;;
+ *)
+ path="alpha" ;;
esac
extra_functions="cntlz"
gcc_cflags_optlist="asm cpu oldas" # need asm ahead of cpu, see below
@@ -2336,7 +2339,8 @@
#
# the default includes are sufficient for all these types
#
-AC_CHECK_TYPES([intmax_t, long double, long long, ptrdiff_t, quad_t, uint_least32_t])
+AC_CHECK_TYPES([intmax_t, long double, long long, ptrdiff_t, quad_t,
+ uint_least32_t, intptr_t])
AC_C_STRINGIZE
@@ -2475,7 +2479,7 @@
bdivmod gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step \
gcdext_lehmer gcdext_subdiv_step \
tdiv_qr jacbase get_d \
- matrix22_mul hgcd2 hgcd mullow_n mullow_basecase \
+ matrix22_mul hgcd2 hgcd mullo_n mullo_basecase \
toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
toom33_mul toom43_mul toom53_mul \
toom44_mul \
@@ -2493,7 +2497,7 @@
mu_bdiv_q mu_bdiv_qr \
bdiv_q bdiv_qr \
divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec subcnd_n \
- trialdiv remove \
+ redc_1_sec trialdiv remove \
and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n \
copyi copyd zero \
$gmp_mpn_functions_optional"
@@ -2948,10 +2952,12 @@
[/* Define to 1 each of the following for which a native (ie. CPU specific)
implementation of the corresponding routine exists. */
#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_n_sub_n
#undef HAVE_NATIVE_mpn_add_nc
-#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addaddmul_1msb0
#undef HAVE_NATIVE_mpn_addlsh1_n
#undef HAVE_NATIVE_mpn_addlsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
@@ -2960,8 +2966,6 @@
#undef HAVE_NATIVE_mpn_addmul_6
#undef HAVE_NATIVE_mpn_addmul_7
#undef HAVE_NATIVE_mpn_addmul_8
-#undef HAVE_NATIVE_mpn_add_n_sub_n
-#undef HAVE_NATIVE_mpn_addaddmul_1msb0
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdiv_dbm1c
@@ -2976,41 +2980,52 @@
#undef HAVE_NATIVE_mpn_divrem_1c
#undef HAVE_NATIVE_mpn_divrem_2
#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_hamdist
#undef HAVE_NATIVE_mpn_invert_limb
#undef HAVE_NATIVE_mpn_ior_n
#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
#undef HAVE_NATIVE_mpn_lshiftc
+#undef HAVE_NATIVE_mpn_lshsub_n
#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1_1p
#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mod_1s_2p
+#undef HAVE_NATIVE_mpn_mod_1s_4p
+#undef HAVE_NATIVE_mpn_mod_34lsub1
#undef HAVE_NATIVE_mpn_modexact_1_odd
More information about the gmp-commit
mailing list