[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Nov 15 20:37:21 CET 2011
details: /var/hg/gmp/rev/75043735b2eb
changeset: 14445:75043735b2eb
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Nov 15 20:36:09 2011 +0100
description:
Rewrite mpn/generic/powm_sec.c.
details: /var/hg/gmp/rev/ca545ea91b07
changeset: 14446:ca545ea91b07
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Nov 15 20:37:14 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 32 ++++++++++++++++++++++
mpn/generic/powm_sec.c | 69 +++++++++++++++++++++++++++++++-----------------
tune/Makefile.am | 2 +-
tune/common.c | 8 ++++-
tune/hgcd_appr_lehmer.c | 29 ++++++++++++++++++++
tune/speed.c | 1 +
tune/speed.h | 7 ++++-
tune/tuneup.c | 2 +
8 files changed, 122 insertions(+), 28 deletions(-)
diffs (truncated from 308 to 300 lines):
diff -r ea546be31e2a -r ca545ea91b07 ChangeLog
--- a/ChangeLog Tue Nov 15 01:33:25 2011 +0100
+++ b/ChangeLog Tue Nov 15 20:37:14 2011 +0100
@@ -1,5 +1,37 @@
+2011-11-15 Niels Möller <nisse at lysator.liu.se>
+
+ * tune/speed.h (speed_mpn_hgcd_appr_lehmer): New prototype.
+ (mpn_hgcd_lehmer_itch): Likewise.
+ (mpn_hgcd_appr_lehmer): Likewise.
+ (mpn_hgcd_appr_lehmer_itch): Likewise.
+ (MPN_HGCD_LEHMER_ITCH): Deleted macro.
+
+ * tune/speed.c (routine): Added mpn_hgcd_appr_lehmer.
+
+ * tune/common.c (speed_mpn_hgcd_lehmer): Use mpn_hgcd_lehmer_itch
+ rather than similarly named macro.
+ (speed_mpn_hgcd_appr_lehmer): New function.
+
+ * tune/Makefile.am (libspeed_la_SOURCES): Added
+ hgcd_appr_lehmer.c.
+
+ * tune/hgcd_appr_lehmer.c: New file.
+
+ * tune/tuneup.c (tune_hgcd_appr): Increased min_size to 50; some
+ machines got small thresholds which appear to be bogus.
+
2011-11-15 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/generic/powm_sec.c (mpn_local_sqr): Remove forgotten TMP_* calls.
+ (redcify): Likewise.
+ (mpn_powm_sec): Likewise.
+
+ * mpn/generic/powm_sec.c (mpn_powm_sec): Rework scratch usage
+ (mpn_powm_sec_itch): Rewrite.
+
+ * mpn/generic/powm_sec.c (mpn_powm_sec): Use mpn_tabselect also in
+ initialisation.
+
* configure.in: Amend 2011-11-03 gcc_cflags change.
* mpn/powerpc64/tabselect.asm: New file.
diff -r ea546be31e2a -r ca545ea91b07 mpn/generic/powm_sec.c
--- a/mpn/generic/powm_sec.c Tue Nov 15 01:33:25 2011 +0100
+++ b/mpn/generic/powm_sec.c Tue Nov 15 20:37:14 2011 +0100
@@ -133,8 +133,6 @@
if (n > 1)
{
mp_limb_t cy;
- TMP_DECL;
- TMP_MARK;
cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
tp[n - 1] = cy;
@@ -156,8 +154,6 @@
#endif
rp[2 * n - 1] += cy;
}
-
- TMP_FREE;
}
}
#endif
@@ -211,26 +207,24 @@
}
#endif
-/* Convert U to REDC form, U_r = B^n * U mod M */
+/* Convert U to REDC form, U_r = B^n * U mod M.
+ Uses scratch space at tp of size 2un + n + 1. */
static void
redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
{
mp_ptr qp;
- TMP_DECL;
- TMP_MARK;
- qp = tp + un + n;
+ qp = tp + un + n; /* un + n - n + 1 = un + 1 limbs */
MPN_ZERO (tp, n);
MPN_COPY (tp + n, up, un);
mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
- TMP_FREE;
}
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
- Requires that mp[n-1..0] is odd. FIXME: is this true?
- Requires that ep[en-1..0] is > 1.
- Uses scratch space at tp of 3n+1 limbs. */
+ Requires that mp[n-1..0] is odd.
+ Requires that ep[en-1..0] > 1.
+ Uses scratch space at tp as defined by mpn_powm_sec_itch. */
void
mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
mp_srcptr ep, mp_size_t en,
@@ -244,13 +238,10 @@
mp_ptr pp, this_pp;
long i;
int cnd;
- TMP_DECL;
ASSERT (en > 1 || (en == 1 && ep[0] > 0));
ASSERT (n >= 1 && ((mp[0] & 1) != 0));
- TMP_MARK;
-
count_leading_zeros (cnt, ep[en - 1]);
ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
@@ -259,15 +250,27 @@
binvert_limb (minv, mp[0]);
minv = -minv;
- pp = tp + 4 * n;
+ pp = tp;
+ tp += (n << windowsize); /* put tp after power table */
+ /* Compute pp[0] table entry */
+ /* scratch: | n | 1 | n+2 | */
+ /* | pp[0] | 1 | redcify | */
this_pp = pp;
this_pp[n] = 1;
- redcify (this_pp, this_pp + n, 1, mp, n, tp + 6 * n);
+ redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
this_pp += n;
- redcify (this_pp, bp, bn, mp, n, tp + 6 * n);
+
+ /* Compute pp[1] table entry. To avoid excessive scratch usage in the
+ degenerate situation where B >> M, we let redcify use scratch space which
+ will later be used by the pp table (element 2 and up). */
+ /* scratch: | n | n | bn + n + 1 | */
+ /* | pp[0] | pp[1] | redcify | */
+ redcify (this_pp, bp, bn, mp, n, this_pp + n);
/* Precompute powers of b and put them in the temporary area at pp. */
+ /* scratch: | n | n | ... | | 2n | */
+ /* | pp[0] | pp[1] | ... | pp[2^windowsize-1] | product | */
for (i = (1 << windowsize) - 2; i > 0; i--)
{
mpn_mul_basecase (tp, this_pp, n, pp + n, n);
@@ -281,8 +284,15 @@
else
ebi -= windowsize;
+#if WANT_CACHE_SECURITY
+ mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
+#else
MPN_COPY (rp, pp + n * expbits, n);
+#endif
+ /* Main exponentiation loop. */
+ /* scratch: | n | n | ... | | 3n-4n | */
+ /* | pp[0] | pp[1] | ... | pp[2^windowsize-1] | loop scratch | */
while (ebi != 0)
{
expbits = getbits (ep, ebi, windowsize);
@@ -317,7 +327,6 @@
MPN_REDC_1_SEC (rp, tp, mp, n, minv);
cnd = mpn_sub_n (tp, rp, mp, n); /* we need just retval */
mpn_subcnd_n (rp, rp, mp, n, !cnd);
- TMP_FREE;
}
mp_size_t
@@ -326,10 +335,20 @@
int windowsize;
mp_size_t redcify_itch, itch;
- windowsize = win_size (en * GMP_NUMB_BITS); /* slight over-estimate of exp */
- itch = 4 * n + (n << windowsize);
- redcify_itch = 2 * bn + n + 1;
- /* The 6n is due to the placement of reduce scratch 6n into the start of the
- scratch area. */
- return MAX (itch, redcify_itch + 6 * n);
+ /* The top scratch usage will either be when reducing B in the 2nd redcify
+ call, or more typically n*2^windowsize + 3n or 4n, in the main loop. (It
+ is 3n or 4n depending on if we use mpn_local_sqr or a native
+ mpn_sqr_basecase. We assume 4n always for now.) */
+
+ windowsize = win_size (en * GMP_LIMB_BITS); /* slight over-estimate of exp */
+
+ /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
+ the 2bn + n + 1 term is due to redcify's own usage. */
+ redcify_itch = (2 * n) + (2 * bn + n + 1);
+
+ /* The n * 2^windowsize term is due to the power table, the 4n term is due to
+ scratch needs of squaring/multiplication in the exponentiation loop. */
+ itch = (n << windowsize) + (4 * n);
+
+ return MAX (itch, redcify_itch);
}
diff -r ea546be31e2a -r ca545ea91b07 tune/Makefile.am
--- a/tune/Makefile.am Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/Makefile.am Tue Nov 15 20:37:14 2011 +0100
@@ -43,7 +43,7 @@
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
- hgcd_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
diff -r ea546be31e2a -r ca545ea91b07 tune/common.c
--- a/tune/common.c Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/common.c Tue Nov 15 20:37:14 2011 +0100
@@ -1529,7 +1529,7 @@
double
speed_mpn_hgcd_lehmer (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, MPN_HGCD_LEHMER_ITCH);
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
}
double
@@ -1539,6 +1539,12 @@
}
double
+speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
+}
+
+double
speed_mpn_hgcd_reduce (struct speed_params *s)
{
SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
diff -r ea546be31e2a -r ca545ea91b07 tune/hgcd_appr_lehmer.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd_appr_lehmer.c Tue Nov 15 20:37:14 2011 +0100
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_appr mpn_hgcd_appr_lehmer
+#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch
+
+#include "../mpn/generic/hgcd_appr.c"
diff -r ea546be31e2a -r ca545ea91b07 tune/speed.c
--- a/tune/speed.c Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/speed.c Tue Nov 15 20:37:14 2011 +0100
@@ -278,6 +278,7 @@
{ "mpn_hgcd", speed_mpn_hgcd },
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
{ "mpn_hgcd_appr", speed_mpn_hgcd_appr },
+ { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
{ "mpn_hgcd_reduce", speed_mpn_hgcd_reduce },
{ "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 },
diff -r ea546be31e2a -r ca545ea91b07 tune/speed.h
--- a/tune/speed.h Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/speed.h Tue Nov 15 20:37:14 2011 +0100
@@ -198,6 +198,7 @@
double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_appr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_appr_lehmer __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_reduce __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_reduce_1 __GMP_PROTO ((struct speed_params *s));
double speed_mpn_hgcd_reduce_2 __GMP_PROTO ((struct speed_params *s));
@@ -489,7 +490,11 @@
__GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
mp_size_t mpn_hgcd_lehmer
__GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
-#define MPN_HGCD_LEHMER_ITCH(n) (n)
+mp_size_t mpn_hgcd_lehmer_itch __GMP_PROTO ((mp_size_t));
+
+mp_size_t mpn_hgcd_appr_lehmer
+ __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+mp_size_t mpn_hgcd_appr_lehmer_itch __GMP_PROTO ((mp_size_t));
mp_size_t mpn_hgcd_reduce_1
__GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
diff -r ea546be31e2a -r ca545ea91b07 tune/tuneup.c
--- a/tune/tuneup.c Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/tuneup.c Tue Nov 15 20:37:14 2011 +0100
@@ -1761,6 +1761,8 @@
More information about the gmp-commit
mailing list