[Gmp-commit] /var/hg/gmp: 2 new changesets

Tue Nov 15 20:37:21 CET 2011

details:   /var/hg/gmp/rev/75043735b2eb
changeset: 14445:75043735b2eb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Nov 15 20:36:09 2011 +0100
description:
Rewrite mpn/generic/powm_sec.c.

details:   /var/hg/gmp/rev/ca545ea91b07
changeset: 14446:ca545ea91b07
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Nov 15 20:37:14 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog               |  32 ++++++++++++++++++++++
 mpn/generic/powm_sec.c  |  69 +++++++++++++++++++++++++++++++-----------------
 tune/Makefile.am        |   2 +-
 tune/common.c           |   8 ++++-
 tune/hgcd_appr_lehmer.c |  29 ++++++++++++++++++++
 tune/speed.c            |   1 +
 tune/speed.h            |   7 ++++-
 tune/tuneup.c           |   2 +
 8 files changed, 122 insertions(+), 28 deletions(-)

diffs (truncated from 308 to 300 lines):

diff -r ea546be31e2a -r ca545ea91b07 ChangeLog

--- a/ChangeLog	Tue Nov 15 01:33:25 2011 +0100
+++ b/ChangeLog	Tue Nov 15 20:37:14 2011 +0100
@@ -1,5 +1,37 @@
+2011-11-15  Niels Möller  <nisse at lysator.liu.se>
+
+	* tune/speed.h (speed_mpn_hgcd_appr_lehmer): New prototype.
+	(mpn_hgcd_lehmer_itch): Likewise.
+	(mpn_hgcd_appr_lehmer): Likewise.
+	(mpn_hgcd_appr_lehmer_itch): Likewise.
+	(MPN_HGCD_LEHMER_ITCH): Deleted macro.
+
+	* tune/speed.c (routine): Added mpn_hgcd_appr_lehmer.
+
+	* tune/common.c (speed_mpn_hgcd_lehmer): Use mpn_hgcd_lehmer_itch
+	rather than similarly named macro.
+	(speed_mpn_hgcd_appr_lehmer): New function.
+
+	* tune/Makefile.am (libspeed_la_SOURCES): Added
+	hgcd_appr_lehmer.c.
+
+	* tune/hgcd_appr_lehmer.c: New file.
+
+	* tune/tuneup.c (tune_hgcd_appr): Increased min_size to 50; some
+	machines got small thresholds which appear to be bogus.
+
 2011-11-15  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/generic/powm_sec.c (mpn_local_sqr): Remove forgotten TMP_* calls.
+	(redcify): Likewise.
+	(mpn_powm_sec): Likewise.
+
+	* mpn/generic/powm_sec.c (mpn_powm_sec): Rework scratch usage
+	(mpn_powm_sec_itch): Rewrite.
+
+	* mpn/generic/powm_sec.c (mpn_powm_sec): Use mpn_tabselect also in
+	initialisation.
+
 	* configure.in: Amend 2011-11-03 gcc_cflags change.
 
 	* mpn/powerpc64/tabselect.asm: New file.
diff -r ea546be31e2a -r ca545ea91b07 mpn/generic/powm_sec.c
--- a/mpn/generic/powm_sec.c	Tue Nov 15 01:33:25 2011 +0100
+++ b/mpn/generic/powm_sec.c	Tue Nov 15 20:37:14 2011 +0100
@@ -133,8 +133,6 @@
   if (n > 1)
     {
       mp_limb_t cy;
-      TMP_DECL;
-      TMP_MARK;
 
       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
       tp[n - 1] = cy;
@@ -156,8 +154,6 @@
 #endif
 	rp[2 * n - 1] += cy;
       }
-
-      TMP_FREE;
     }
 }
 #endif
@@ -211,26 +207,24 @@
 }
 #endif
 
-/* Convert U to REDC form, U_r = B^n * U mod M */
+/* Convert U to REDC form, U_r = B^n * U mod M.
+   Uses scratch space at tp of size 2un + n + 1.  */
 static void
 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
 {
   mp_ptr qp;
-  TMP_DECL;
-  TMP_MARK;
 
-  qp = tp + un + n;
+  qp = tp + un + n;		/* un + n - n + 1 = un + 1 limbs */
 
   MPN_ZERO (tp, n);
   MPN_COPY (tp + n, up, un);
   mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
-  TMP_FREE;
 }
 
 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
-   Requires that mp[n-1..0] is odd.  FIXME: is this true?
-   Requires that ep[en-1..0] is > 1.
-   Uses scratch space at tp of 3n+1 limbs.  */
+   Requires that mp[n-1..0] is odd.
+   Requires that ep[en-1..0] > 1.
+   Uses scratch space at tp as defined by mpn_powm_sec_itch.  */
 void
 mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 	      mp_srcptr ep, mp_size_t en,
@@ -244,13 +238,10 @@
   mp_ptr pp, this_pp;
   long i;
   int cnd;
-  TMP_DECL;
 
   ASSERT (en > 1 || (en == 1 && ep[0] > 0));
   ASSERT (n >= 1 && ((mp[0] & 1) != 0));
 
-  TMP_MARK;
-
   count_leading_zeros (cnt, ep[en - 1]);
   ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
 
@@ -259,15 +250,27 @@
   binvert_limb (minv, mp[0]);
   minv = -minv;
 
-  pp = tp + 4 * n;
+  pp = tp;
+  tp += (n << windowsize);	/* put tp after power table */
 
+  /* Compute pp[0] table entry */
+  /* scratch: |   n   | 1 |   n+2    |  */
+  /*          | pp[0] | 1 | redcify  |  */
   this_pp = pp;
   this_pp[n] = 1;
-  redcify (this_pp, this_pp + n, 1, mp, n, tp + 6 * n);
+  redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
   this_pp += n;
-  redcify (this_pp, bp, bn, mp, n, tp + 6 * n);
+
+  /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
+     degenerate situation where B >> M, we let redcify use scratch space which
+     will later be used by the pp table (element 2 and up).  */
+  /* scratch: |   n   |   n   |  bn + n + 1  |  */
+  /*          | pp[0] | pp[1] |   redcify    |  */
+  redcify (this_pp, bp, bn, mp, n, this_pp + n);
 
   /* Precompute powers of b and put them in the temporary area at pp.  */
+  /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
   for (i = (1 << windowsize) - 2; i > 0; i--)
     {
       mpn_mul_basecase (tp, this_pp, n, pp + n, n);
@@ -281,8 +284,15 @@
   else
     ebi -= windowsize;
 
+#if WANT_CACHE_SECURITY
+  mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
+#else
   MPN_COPY (rp, pp + n * expbits, n);
+#endif
 
+  /* Main exponentiation loop.  */
+  /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
   while (ebi != 0)
     {
       expbits = getbits (ep, ebi, windowsize);
@@ -317,7 +327,6 @@
   MPN_REDC_1_SEC (rp, tp, mp, n, minv);
   cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
   mpn_subcnd_n (rp, rp, mp, n, !cnd);
-  TMP_FREE;
 }
 
 mp_size_t
@@ -326,10 +335,20 @@
   int windowsize;
   mp_size_t redcify_itch, itch;
 
-  windowsize = win_size (en * GMP_NUMB_BITS); /* slight over-estimate of exp */
-  itch = 4 * n + (n << windowsize);
-  redcify_itch = 2 * bn + n + 1;
-  /* The 6n is due to the placement of reduce scratch 6n into the start of the
-     scratch area.  */
-  return MAX (itch, redcify_itch + 6 * n);
+  /* The top scratch usage will either be when reducing B in the 2nd redcify
+     call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
+     is 3n or 4n depending on if we use mpn_local_sqr or a native
+     mpn_sqr_basecase.  We assume 4n always for now.) */
+
+  windowsize = win_size (en * GMP_LIMB_BITS); /* slight over-estimate of exp */
+
+  /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
+     the 2bn + n + 1 term is due to redcify's own usage.  */
+  redcify_itch = (2 * n) + (2 * bn + n + 1);
+
+  /* The n * 2^windowsize term is due to the power table, the 4n term is due to
+     scratch needs of squaring/multiplication in the exponentiation loop.  */
+  itch = (n << windowsize) + (4 * n);
+
+  return MAX (itch, redcify_itch);
 }
diff -r ea546be31e2a -r ca545ea91b07 tune/Makefile.am
--- a/tune/Makefile.am	Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/Makefile.am	Tue Nov 15 20:37:14 2011 +0100
@@ -43,7 +43,7 @@
   common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c		\
   freq.c								\
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
-  hgcd_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c				\
+  hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c	\
   jacbase1.c jacbase2.c jacbase3.c jacbase4.c				\
   mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
diff -r ea546be31e2a -r ca545ea91b07 tune/common.c
--- a/tune/common.c	Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/common.c	Tue Nov 15 20:37:14 2011 +0100
@@ -1529,7 +1529,7 @@
 double
 speed_mpn_hgcd_lehmer (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, MPN_HGCD_LEHMER_ITCH);
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
 }
 
 double
@@ -1539,6 +1539,12 @@
 }
 
 double
+speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
+}
+
+double
 speed_mpn_hgcd_reduce (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
diff -r ea546be31e2a -r ca545ea91b07 tune/hgcd_appr_lehmer.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tune/hgcd_appr_lehmer.c	Tue Nov 15 20:37:14 2011 +0100
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_appr  mpn_hgcd_appr_lehmer
+#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch
+
+#include "../mpn/generic/hgcd_appr.c"
diff -r ea546be31e2a -r ca545ea91b07 tune/speed.c
--- a/tune/speed.c	Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/speed.c	Tue Nov 15 20:37:14 2011 +0100
@@ -278,6 +278,7 @@
   { "mpn_hgcd",          speed_mpn_hgcd             },
   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
+  { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
 
   { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
   { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
diff -r ea546be31e2a -r ca545ea91b07 tune/speed.h
--- a/tune/speed.h	Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/speed.h	Tue Nov 15 20:37:14 2011 +0100
@@ -198,6 +198,7 @@
 double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_appr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_appr_lehmer __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_reduce __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_reduce_1 __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_hgcd_reduce_2 __GMP_PROTO ((struct speed_params *s));
@@ -489,7 +490,11 @@
   __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
 mp_size_t mpn_hgcd_lehmer
   __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
-#define MPN_HGCD_LEHMER_ITCH(n) (n)
+mp_size_t mpn_hgcd_lehmer_itch __GMP_PROTO ((mp_size_t));
+
+mp_size_t mpn_hgcd_appr_lehmer
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+mp_size_t mpn_hgcd_appr_lehmer_itch __GMP_PROTO ((mp_size_t));
 
 mp_size_t mpn_hgcd_reduce_1
   __GMP_PROTO ((struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr));
diff -r ea546be31e2a -r ca545ea91b07 tune/tuneup.c
--- a/tune/tuneup.c	Tue Nov 15 01:33:25 2011 +0100
+++ b/tune/tuneup.c	Tue Nov 15 20:37:14 2011 +0100
@@ -1761,6 +1761,8 @@