[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Jul 12 18:53:26 CEST 2013
details: /var/hg/gmp/rev/55ccd836a8ac
changeset: 15862:55ccd836a8ac
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Jul 11 22:39:24 2013 +0200
description:
Remove spurious ABI_SUPPORT decls.
details: /var/hg/gmp/rev/c30b7c2da2ff
changeset: 15863:c30b7c2da2ff
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Jul 11 23:34:00 2013 +0200
description:
Tweak for better speed on K8, bobcat, bd1, NHM, Atom.
details: /var/hg/gmp/rev/8de68af9fb4b
changeset: 15864:8de68af9fb4b
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Jul 12 12:21:42 2013 +0200
description:
Partial rewrite.
diffstat:
mpn/generic/sbpi1_div_sec.c | 94 +++++++++++++++++++++++++-------------------
mpn/x86/cnd_aors_n.asm | 3 -
mpn/x86_64/cnd_aors_n.asm | 46 +++++++++++-----------
3 files changed, 77 insertions(+), 66 deletions(-)
diffs (282 lines):
diff -r 45c38e4c1de2 -r 8de68af9fb4b mpn/generic/sbpi1_div_sec.c
--- a/mpn/generic/sbpi1_div_sec.c Fri Jul 05 14:45:17 2013 +0200
+++ b/mpn/generic/sbpi1_div_sec.c Fri Jul 12 12:21:42 2013 +0200
@@ -8,7 +8,7 @@
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2011, 2012 Free Software Foundation, Inc.
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -29,6 +29,28 @@
#include "gmp-impl.h"
#include "longlong.h"
+/* This side-channel silent division algorithm reduces the partial remainder by
+ GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
+ division algorithm. We do not insists on reducing by exactly
+ GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
+ too large (B is the limb base, D is the divisor, and i is the induction
+ variable); the subsequent step will handle the extra partial remainder bits.
+
+ WIth that partial remainder reduction, each step generates a quotient "half
+ limb". The outer loop generates two quotient half limbs, an upper (q1h) and
+ a lower (q0h) which are stored sparsely in separate limb arrays. These
+ arrays are added at the end; using separate arrays avoids data-dependent
+ carry propagation which could else pose a side-channel leakage problem.
+
+ The quotient half limbs may be between -3 to 0 from the accurate value
+ ("accurate" being the one which corresponds to a reduction to a principal
+ partial remainder). Too small quotient half limbs correspond to too large
+ remainders, which we reduce later, as described above.
+
+ In order to keep quotients from getting too big, corresponding to a negative
+ partial remainder, we use an inverse which is sligtly smaller than usually.
+*/
+
#if OPERATION_sbpi1_div_qr_sec
/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
#define FNAME mpn_sbpi1_div_qr_sec
@@ -49,7 +71,7 @@
mp_limb_t dinv,
mp_ptr tp)
{
- mp_limb_t nh, cy, q1h, q0h, dummy, h;
+ mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
mp_size_t i;
mp_ptr hp;
#if OPERATION_sbpi1_div_qr_sec
@@ -72,77 +94,69 @@
#endif
}
+ /* Decremenet inverse to keep quotient half limbs from being too large. */
+ dinv -= dinv != 0; /* FIXME: cmp-to-int */
+
/* Create a divisor copy shifted half a limb. */
hp = tp; /* (dn + 1) limbs */
- cy = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
- hp[dn] = dp[dn - 1] >> GMP_NUMB_BITS / 2;
+ hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
#if OPERATION_sbpi1_div_qr_sec
qlp = tp + (dn + 1); /* (nn - dn) limbs */
qhp = tp + (nn + 1); /* (nn - dn) limbs */
#endif
- np += nn;
+ np += nn - dn;
+ nh = 0;
- /* Main loop. Develop one full limb per iteration, but do it in two steps in
- order to avoid conditionals. Quotient bits will be either correct or
- underestimates. When a quotient is underestimated, the next quotient will
- compensate, since quotients are to be added at consecutive weight distance
- GMP_NUMB_BITS/2. We make two quotient arrays, each with GMP_NUMB_BITS/2+2
- bits per entry. The arrays are added late after the loop. Separate
- arrays avoid data-dependent carry propagation. */
- nh = 0;
for (i = nn - dn - 1; i >= 0; i--)
{
np--;
- nh = (nh << GMP_NUMB_BITS/2) + (np[0] >> GMP_NUMB_BITS/2);
+ nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
umul_ppmm (q1h, dummy, nh, dinv);
q1h += nh;
#if OPERATION_sbpi1_div_qr_sec
qhp[i] = q1h;
#endif
- cy = mpn_submul_1 (np - dn, hp, dn + 1, q1h);
+ mpn_submul_1 (np, hp, dn + 1, q1h);
- nh = np[0];
+ nh = np[dn];
umul_ppmm (q0h, dummy, nh, dinv);
q0h += nh;
#if OPERATION_sbpi1_div_qr_sec
qlp[i] = q0h;
#endif
- cy = mpn_submul_1 (np - dn, dp, dn, q0h);
-
- nh -= cy;
+ nh -= mpn_submul_1 (np, dp, dn, q0h);
}
- np[0] = nh;
+ /* 1st adjustment depends on extra high remainder limb. */
+ cnd = nh != 0; /* FIXME: cmp-to-int */
+#if OPERATION_sbpi1_div_qr_sec
+ qlp[0] += cnd;
+#endif
+ nh -= mpn_cnd_sub_n (cnd, np, np, dp, dn);
- np -= dn;
-
- /* 1st adjustment depends on extra high remainder limb. */
- h = np[dn];
-#if OPERATION_sbpi1_div_qr_sec
- qlp[0] += h;
-#endif
- h -= mpn_cnd_sub_n (h, np, np, dp, dn);
-
- /* 2nd adjustment depends on remainder/divisor comparision as well as whether
+ /* 2nd adjustment depends on remainder/divisor comparison as well as whether
extra remainder limb was nullified by previous subtract. */
cy = mpn_sub_n (np, np, dp, dn);
- cy = cy == h; /* FIXME: might leak on some archs */
+ cy = cy - nh;
#if OPERATION_sbpi1_div_qr_sec
- qlp[0] += cy;
+ qlp[0] += 1 - cy;
#endif
- mpn_cnd_add_n (1 - cy, np, np, dp, dn);
+ mpn_cnd_add_n (cy, np, np, dp, dn);
+ /* 3rd adjustment depends on remainder/divisor comparison. */
+ cy = mpn_sub_n (np, np, dp, dn);
+#if OPERATION_sbpi1_div_qr_sec
+ qlp[0] += 1 - cy;
+#endif
+ mpn_cnd_add_n (cy, np, np, dp, dn);
+
+#if OPERATION_sbpi1_div_qr_sec
/* Combine quotient halves into final quotient. */
-#if OPERATION_sbpi1_div_qr_sec
- qh = 0;
- if (nn - dn != 0)
- {
- qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
- qh += mpn_add_n (qp, qhp, qlp, nn - dn);
- }
+ qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
+ qh += mpn_add_n (qp, qhp, qlp, nn - dn);
return qh;
#else
diff -r 45c38e4c1de2 -r 8de68af9fb4b mpn/x86/cnd_aors_n.asm
--- a/mpn/x86/cnd_aors_n.asm Fri Jul 05 14:45:17 2013 +0200
+++ b/mpn/x86/cnd_aors_n.asm Fri Jul 12 12:21:42 2013 +0200
@@ -51,9 +51,6 @@
MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
ASM_START()
TEXT
ALIGN(16)
diff -r 45c38e4c1de2 -r 8de68af9fb4b mpn/x86_64/cnd_aors_n.asm
--- a/mpn/x86_64/cnd_aors_n.asm Fri Jul 05 14:45:17 2013 +0200
+++ b/mpn/x86_64/cnd_aors_n.asm Fri Jul 12 12:21:42 2013 +0200
@@ -20,15 +20,15 @@
include(`../config.m4')
C cycles/limb
-C AMD K8,K9 2.25
+C AMD K8,K9 2
C AMD K10 2
-C AMD bd1 3.55
-C AMD bobcat 2.5
+C AMD bd1 2.32
+C AMD bobcat 3
C Intel P4 13
C Intel core2 2.9
-C Intel NHM 2.9
+C Intel NHM 2.8
C Intel SBR 2.4
-C Intel atom 6.5
+C Intel atom 5.33
C VIA nano 3
C NOTES
@@ -94,19 +94,19 @@
L(b3): mov (vp,n,8), %r12
mov 8(vp,n,8), %r13
mov 16(vp,n,8), %r14
+ and cnd, %r12
mov (up,n,8), %r10
+ and cnd, %r13
mov 8(up,n,8), %rbx
+ and cnd, %r14
mov 16(up,n,8), %rbp
- and cnd, %r12
- and cnd, %r13
- and cnd, %r14
ADDSUB %r12, %r10
+ mov %r10, (rp,n,8)
ADCSBB %r13, %rbx
+ mov %rbx, 8(rp,n,8)
ADCSBB %r14, %rbp
+ mov %rbp, 16(rp,n,8)
sbb R32(%rax), R32(%rax) C save carry
- mov %r10, (rp,n,8)
- mov %rbx, 8(rp,n,8)
- mov %rbp, 16(rp,n,8)
add $3, n
js L(top)
jmp L(end)
@@ -114,14 +114,14 @@
L(b2): mov (vp,n,8), %r12
mov 8(vp,n,8), %r13
mov (up,n,8), %r10
+ and cnd, %r12
mov 8(up,n,8), %rbx
- and cnd, %r12
and cnd, %r13
ADDSUB %r12, %r10
+ mov %r10, (rp,n,8)
ADCSBB %r13, %rbx
+ mov %rbx, 8(rp,n,8)
sbb R32(%rax), R32(%rax) C save carry
- mov %r10, (rp,n,8)
- mov %rbx, 8(rp,n,8)
add $2, n
js L(top)
jmp L(end)
@@ -130,8 +130,8 @@
mov (up,n,8), %r10
and cnd, %r12
ADDSUB %r12, %r10
+ mov %r10, (rp,n,8)
sbb R32(%rax), R32(%rax) C save carry
- mov %r10, (rp,n,8)
add $1, n
jns L(end)
@@ -140,24 +140,24 @@
mov 8(vp,n,8), %r13
mov 16(vp,n,8), %r14
mov 24(vp,n,8), %r11
+ and cnd, %r12
mov (up,n,8), %r10
+ and cnd, %r13
mov 8(up,n,8), %rbx
+ and cnd, %r14
mov 16(up,n,8), %rbp
+ and cnd, %r11
mov 24(up,n,8), %r9
- and cnd, %r12
- and cnd, %r13
- and cnd, %r14
- and cnd, %r11
add R32(%rax), R32(%rax) C restore carry
ADCSBB %r12, %r10
+ mov %r10, (rp,n,8)
ADCSBB %r13, %rbx
+ mov %rbx, 8(rp,n,8)
ADCSBB %r14, %rbp
+ mov %rbp, 16(rp,n,8)
ADCSBB %r11, %r9
+ mov %r9, 24(rp,n,8)
sbb R32(%rax), R32(%rax) C save carry
- mov %r10, (rp,n,8)
- mov %rbx, 8(rp,n,8)
- mov %rbp, 16(rp,n,8)
- mov %r9, 24(rp,n,8)
add $4, n
js L(top)
More information about the gmp-commit
mailing list