[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu May 3 21:09:38 CEST 2012
details: /var/hg/gmp/rev/eae028f4d2a0
changeset: 14937:eae028f4d2a0
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed May 02 14:51:30 2012 +0200
description:
Add cycle numbers.
details: /var/hg/gmp/rev/82526cbe462f
changeset: 14938:82526cbe462f
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu May 03 21:09:35 2012 +0200
description:
Use redc_2.
diffstat:
ChangeLog | 6 +
mpn/generic/powm_sec.c | 146 +++++++++++++++++++++++++++---------
mpn/powerpc64/mode64/aorscnd_n.asm | 2 +-
mpn/x86_64/aorscnd_n.asm | 2 +
4 files changed, 118 insertions(+), 38 deletions(-)
diffs (245 lines):
diff -r 97d68bbee018 -r 82526cbe462f ChangeLog
--- a/ChangeLog Wed May 02 14:33:42 2012 +0200
+++ b/ChangeLog Thu May 03 21:09:35 2012 +0200
@@ -1,3 +1,9 @@
+2012-05-03 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/powm_sec.c: Use redc_2.
+ (INNERLOOP): Use this mechanism, like plain powm.c.
+ (WANT_CACHE_SECURITY): Remove, feature now unconditional.
+
2012-05-02 Torbjorn Granlund <tege at gmplib.org>
* mpz/bin_uiui.c: Make use of CNST_LIMB.
diff -r 97d68bbee018 -r 82526cbe462f mpn/generic/powm_sec.c
--- a/mpn/generic/powm_sec.c Wed May 02 14:33:42 2012 +0200
+++ b/mpn/generic/powm_sec.c Thu May 03 21:09:35 2012 +0200
@@ -47,15 +47,17 @@
* Choose window size without looping. (Superoptimize or think(tm).)
- * Call new division functions, not mpn_tdiv_qr.
+ * Call side-channel silent division function for converting to REDC residue.
+
+ * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
+ redc_1 and redc_n. On such systems, we will switch to redc_2 causing
+ slowdown.
*/
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#define WANT_CACHE_SECURITY 1
-
#undef MPN_REDC_1_SEC
#define MPN_REDC_1_SEC(rp, up, mp, n, invm) \
do { \
@@ -64,6 +66,18 @@
mpn_subcnd_n (rp, rp, mp, n, cy); \
} while (0)
+#undef MPN_REDC_2_SEC
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_2 (rp, up, mp, n, mip); \
+ mpn_subcnd_n (rp, rp, mp, n, cy); \
+ } while (0)
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
/* Define our own mpn squaring function. We do this since we cannot use a
native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
SQR_TOOM2_THRESHOLD. This is so because of fixed size stack allocations
@@ -236,7 +250,7 @@
mp_srcptr ep, mp_size_t en,
mp_srcptr mp, mp_size_t n, mp_ptr tp)
{
- mp_limb_t minv;
+ mp_limb_t ip[2], *mip;
int cnt;
mp_bitcnt_t ebi;
int windowsize, this_windowsize;
@@ -253,8 +267,25 @@
windowsize = win_size (ebi);
- binvert_limb (minv, mp[0]);
- minv = -minv;
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+ }
+ else
+ {
+ mip = ip;
+ mpn_binvert (mip, mp, 2, tp);
+ mip[0] = -mip[0]; mip[1] = ~mip[1];
+ }
+#else
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+#endif
+
pp = tp;
tp += (n << windowsize); /* put tp after power table */
@@ -281,7 +312,14 @@
{
mpn_mul_basecase (tp, this_pp, n, pp + n, n);
this_pp += n;
- MPN_REDC_1_SEC (this_pp, tp, mp, n, minv);
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+ else
+ MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+#else
+ MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+#endif
}
expbits = getbits (ep, ebi, windowsize);
@@ -290,47 +328,81 @@
else
ebi -= windowsize;
-#if WANT_CACHE_SECURITY
mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
-#else
- MPN_COPY (rp, pp + n * expbits, n);
-#endif
/* Main exponentiation loop. */
/* scratch: | n | n | ... | | 3n-4n | */
/* | pp[0] | pp[1] | ... | pp[2^windowsize-1] | loop scratch | */
- while (ebi != 0)
+
+#define INNERLOOP \
+ while (ebi != 0) \
+ { \
+ expbits = getbits (ep, ebi, windowsize); \
+ this_windowsize = windowsize; \
+ if (ebi < windowsize) \
+ { \
+ this_windowsize -= windowsize - ebi; \
+ ebi = 0; \
+ } \
+ else \
+ ebi -= windowsize; \
+ \
+ do \
+ { \
+ mpn_local_sqr (tp, rp, n, tp + 2 * n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ this_windowsize--; \
+ } \
+ while (this_windowsize != 0); \
+ \
+ mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits); \
+ mpn_mul_basecase (tp, rp, n, tp + 2*n, n); \
+ \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ }
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
{
- expbits = getbits (ep, ebi, windowsize);
- this_windowsize = windowsize;
- if (ebi < windowsize)
- {
- this_windowsize -= windowsize - ebi;
- ebi = 0;
- }
- else
- ebi -= windowsize;
-
- do
- {
- mpn_local_sqr (tp, rp, n, tp + 2 * n);
- MPN_REDC_1_SEC (rp, tp, mp, n, minv);
- this_windowsize--;
- }
- while (this_windowsize != 0);
-
-#if WANT_CACHE_SECURITY
- mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
- mpn_mul_basecase (tp, rp, n, tp + 2*n, n);
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2_SEC (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
#else
- mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+ INNERLOOP;
#endif
- MPN_REDC_1_SEC (rp, tp, mp, n, minv);
- }
MPN_COPY (tp, rp, n);
MPN_ZERO (tp + n, n);
- MPN_REDC_1_SEC (rp, tp, mp, n, minv);
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+ else
+ MPN_REDC_2_SEC (rp, tp, mp, n, mip);
+#else
+ MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+#endif
cnd = mpn_sub_n (tp, rp, mp, n); /* we need just retval */
mpn_subcnd_n (rp, rp, mp, n, !cnd);
}
diff -r 97d68bbee018 -r 82526cbe462f mpn/powerpc64/mode64/aorscnd_n.asm
--- a/mpn/powerpc64/mode64/aorscnd_n.asm Wed May 02 14:33:42 2012 +0200
+++ b/mpn/powerpc64/mode64/aorscnd_n.asm Thu May 03 21:09:35 2012 +0200
@@ -24,7 +24,7 @@
C POWER3/PPC630 ?
C POWER4/PPC970 2.25
C POWER5 ?
-C POWER6 ?
+C POWER6 3
C POWER7 ?
C INPUT PARAMETERS
diff -r 97d68bbee018 -r 82526cbe462f mpn/x86_64/aorscnd_n.asm
--- a/mpn/x86_64/aorscnd_n.asm Wed May 02 14:33:42 2012 +0200
+++ b/mpn/x86_64/aorscnd_n.asm Thu May 03 21:09:35 2012 +0200
@@ -22,6 +22,8 @@
C cycles/limb
C AMD K8,K9 2.25
C AMD K10 2
+C AMD bd1 3.55
+C AMD bobcat 2.5
C Intel P4 13
C Intel core2 2.9
C Intel NHM 2.9
More information about the gmp-commit
mailing list