[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu May 3 21:09:38 CEST 2012


details:   /var/hg/gmp/rev/eae028f4d2a0
changeset: 14937:eae028f4d2a0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed May 02 14:51:30 2012 +0200
description:
Add cycle numbers.

details:   /var/hg/gmp/rev/82526cbe462f
changeset: 14938:82526cbe462f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu May 03 21:09:35 2012 +0200
description:
Use redc_2.

diffstat:

 ChangeLog                          |    6 +
 mpn/generic/powm_sec.c             |  146 +++++++++++++++++++++++++++---------
 mpn/powerpc64/mode64/aorscnd_n.asm |    2 +-
 mpn/x86_64/aorscnd_n.asm           |    2 +
 4 files changed, 118 insertions(+), 38 deletions(-)

diffs (245 lines):

diff -r 97d68bbee018 -r 82526cbe462f ChangeLog
--- a/ChangeLog	Wed May 02 14:33:42 2012 +0200
+++ b/ChangeLog	Thu May 03 21:09:35 2012 +0200
@@ -1,3 +1,9 @@
+2012-05-03  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/powm_sec.c: Use redc_2.
+	(INNERLOOP): Use this mechanism, like plain powm.c.
+	(WANT_CACHE_SECURITY): Remove, feature now unconditional.
+
 2012-05-02  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpz/bin_uiui.c: Make use of CNST_LIMB.
diff -r 97d68bbee018 -r 82526cbe462f mpn/generic/powm_sec.c
--- a/mpn/generic/powm_sec.c	Wed May 02 14:33:42 2012 +0200
+++ b/mpn/generic/powm_sec.c	Thu May 03 21:09:35 2012 +0200
@@ -47,15 +47,17 @@
 
    * Choose window size without looping.  (Superoptimize or think(tm).)
 
-   * Call new division functions, not mpn_tdiv_qr.
+   * Call side-channel silent division function for converting to REDC residue.
+
+   * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
+     redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
+     slowdown.
 */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#define WANT_CACHE_SECURITY 1
-
 #undef MPN_REDC_1_SEC
 #define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
   do {									\
@@ -64,6 +66,18 @@
     mpn_subcnd_n (rp, rp, mp, n, cy);					\
   } while (0)
 
+#undef MPN_REDC_2_SEC
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip)				\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
+    mpn_subcnd_n (rp, rp, mp, n, cy);					\
+  } while (0)
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
 /* Define our own mpn squaring function.  We do this since we cannot use a
    native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
    SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
@@ -236,7 +250,7 @@
 	      mp_srcptr ep, mp_size_t en,
 	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
 {
-  mp_limb_t minv;
+  mp_limb_t ip[2], *mip;
   int cnt;
   mp_bitcnt_t ebi;
   int windowsize, this_windowsize;
@@ -253,8 +267,25 @@
 
   windowsize = win_size (ebi);
 
-  binvert_limb (minv, mp[0]);
-  minv = -minv;
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+  else
+    {
+      mip = ip;
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+    }
+#else
+  mip = ip;
+  binvert_limb (mip[0], mp[0]);
+  mip[0] = -mip[0];
+#endif
+
 
   pp = tp;
   tp += (n << windowsize);	/* put tp after power table */
@@ -281,7 +312,14 @@
     {
       mpn_mul_basecase (tp, this_pp, n, pp + n, n);
       this_pp += n;
-      MPN_REDC_1_SEC (this_pp, tp, mp, n, minv);
+#if WANT_REDC_2
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+      else
+	MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+#else
+      MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+#endif
     }
 
   expbits = getbits (ep, ebi, windowsize);
@@ -290,47 +328,81 @@
   else
     ebi -= windowsize;
 
-#if WANT_CACHE_SECURITY
   mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
-#else
-  MPN_COPY (rp, pp + n * expbits, n);
-#endif
 
   /* Main exponentiation loop.  */
   /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
   /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
-  while (ebi != 0)
+
+#define INNERLOOP							\
+  while (ebi != 0)							\
+    {									\
+      expbits = getbits (ep, ebi, windowsize);				\
+      this_windowsize = windowsize;					\
+      if (ebi < windowsize)						\
+	{								\
+	  this_windowsize -= windowsize - ebi;				\
+	  ebi = 0;							\
+	}								\
+      else								\
+	ebi -= windowsize;						\
+									\
+      do								\
+	{								\
+	  mpn_local_sqr (tp, rp, n, tp + 2 * n);			\
+	  MPN_REDUCE (rp, tp, mp, n, mip);				\
+	  this_windowsize--;						\
+	}								\
+      while (this_windowsize != 0);					\
+									\
+      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);	\
+      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);			\
+									\
+      MPN_REDUCE (rp, tp, mp, n, mip);					\
+    }
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     {
-      expbits = getbits (ep, ebi, windowsize);
-      this_windowsize = windowsize;
-      if (ebi < windowsize)
-	{
-	  this_windowsize -= windowsize - ebi;
-	  ebi = 0;
-	}
-      else
-	ebi -= windowsize;
-
-      do
-	{
-	  mpn_local_sqr (tp, rp, n, tp + 2 * n);
-	  MPN_REDC_1_SEC (rp, tp, mp, n, minv);
-	  this_windowsize--;
-	}
-      while (this_windowsize != 0);
-
-#if WANT_CACHE_SECURITY
-      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
-      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+      INNERLOOP;
+    }
+  else
+    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2_SEC (rp, tp, mp, n, mip)
+      INNERLOOP;
+    }
 #else
-      mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+  INNERLOOP;
 #endif
-      MPN_REDC_1_SEC (rp, tp, mp, n, minv);
-    }
 
   MPN_COPY (tp, rp, n);
   MPN_ZERO (tp + n, n);
-  MPN_REDC_1_SEC (rp, tp, mp, n, minv);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+  else
+    MPN_REDC_2_SEC (rp, tp, mp, n, mip);
+#else
+  MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+#endif
   cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
   mpn_subcnd_n (rp, rp, mp, n, !cnd);
 }
diff -r 97d68bbee018 -r 82526cbe462f mpn/powerpc64/mode64/aorscnd_n.asm
--- a/mpn/powerpc64/mode64/aorscnd_n.asm	Wed May 02 14:33:42 2012 +0200
+++ b/mpn/powerpc64/mode64/aorscnd_n.asm	Thu May 03 21:09:35 2012 +0200
@@ -24,7 +24,7 @@
 C POWER3/PPC630          ?
 C POWER4/PPC970          2.25
 C POWER5                 ?
-C POWER6                 ?
+C POWER6                 3
 C POWER7                 ?
 
 C INPUT PARAMETERS
diff -r 97d68bbee018 -r 82526cbe462f mpn/x86_64/aorscnd_n.asm
--- a/mpn/x86_64/aorscnd_n.asm	Wed May 02 14:33:42 2012 +0200
+++ b/mpn/x86_64/aorscnd_n.asm	Thu May 03 21:09:35 2012 +0200
@@ -22,6 +22,8 @@
 C	     cycles/limb
 C AMD K8,K9	 2.25
 C AMD K10	 2
+C AMD bd1	 3.55
+C AMD bobcat	 2.5
 C Intel P4	13
 C Intel core2	 2.9
 C Intel NHM	 2.9


More information about the gmp-commit mailing list