[Gmp-commit] /var/hg/gmp: Fix arithmetic overflow bug in mpn_invert_3by2

Wed Nov 16 22:07:26 UTC 2016

details:   /var/hg/gmp/rev/e7a52acedab7
changeset: 17107:e7a52acedab7
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Wed Nov 16 23:07:02 2016 +0100
description:
Fix arithmetic overflow bug in mpn_invert_3by2

And improve documentation of the algorithm.

diffstat:

 mini-gmp/ChangeLog  |   5 +++++
 mini-gmp/mini-gmp.c |  50 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 46 insertions(+), 9 deletions(-)

diffs (98 lines):

diff -r b633a167ce0d -r e7a52acedab7 mini-gmp/ChangeLog

--- a/mini-gmp/ChangeLog	Tue Nov 15 19:17:51 2016 +0100
+++ b/mini-gmp/ChangeLog	Wed Nov 16 23:07:02 2016 +0100
@@ -1,3 +1,8 @@
+2016-11-16  Niels MÃ¶ller  <nisse at lysator.liu.se>
+
+	* mini-gmp/mini-gmp.c (mpn_invert_3by2): Fix arithmetic overflow
+	bug, and improve documentation of the algorithm.
+
 2016-11-15  Niels MÃ¶ller  <nisse at lysator.liu.se>
 
 	* mini-gmp/tests/t-limbs.c (testmain): Skip tests with zero
diff -r b633a167ce0d -r e7a52acedab7 mini-gmp/mini-gmp.c
--- a/mini-gmp/mini-gmp.c	Tue Nov 15 19:17:51 2016 +0100
+++ b/mini-gmp/mini-gmp.c	Wed Nov 16 23:07:02 2016 +0100
@@ -725,22 +725,44 @@
 
 

 /* MPN division interface. */
+
+/* The 3/2 inverse is defined as
+
+     m = floor( (B^3-1) / (B u1 + u0)) - B
+*/
 mp_limb_t
 mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
 {
-  mp_limb_t r, p, m;
-  unsigned ul, uh;
-  unsigned ql, qh;
-
-  /* First, do a 2/1 inverse. */
-  /* The inverse m is defined as floor( (B^2 - 1 - u1)/u1 ), so that 0 <
-   * B^2 - (B + m) u1 <= u1 */
+  mp_limb_t r, p, m, ql;
+  unsigned ul, uh, qh;
+
   assert (u1 >= GMP_LIMB_HIGHBIT);
 
+  /* For notation, let b denote the half-limb base, so that B = b^2.
+     Split u1 = b uh + ul. */
   ul = u1 & GMP_LLIMB_MASK;
   uh = u1 >> (GMP_LIMB_BITS / 2);
 
+  /* Approximation of the high half of quotient. Differs from the 2/1
+     inverse of the half limb uh, since we have already subtracted
+     u0. */
   qh = ~u1 / uh;
+
+  /* Adjust to get a half-limb 3/2 inverse, i.e., we want
+
+     qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
+         = floor( (b (~u) + b-1) / u),
+
+     and the remainder
+
+     r = b (~u) + b-1 - qh (b uh + ul)
+       = b (~u - qh uh) + b-1 - qh ul
+
+     Subtraction of qh ul may underflow, which implies adjustments.
+     But by normalization, 2 u >= B > qh ul, so we need to adjust by
+     at most 2.
+  */
+
   r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
 
   p = (mp_limb_t) qh * ul;
@@ -758,11 +780,19 @@
     }
   r -= p;
 
-  /* Do a 3/2 division (with half limb size) */
+  /* Low half of the quotient is
+
+       ql = floor ( (b r + b-1) / u1).
+
+     This is a 3/2 division (on half-limbs), for which qh is a
+     suitable inverse. */
+
   p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
+  /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
+     work, it is essential that ql is a full mp_limb_t. */
   ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
 
-  /* By the 3/2 method, we don't need the high half limb. */
+  /* By the 3/2 trick, we don't need the high half limb. */
   r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
 
   if (r >= (p << (GMP_LIMB_BITS / 2)))
@@ -777,6 +807,8 @@
       r -= u1;
     }
 
+  /* Now m is the 2/1 invers of u1. If u0 > 0, adjust it to become a
+     3/2 inverse. */
   if (u0 > 0)
     {
       mp_limb_t th, tl;