[Gmp-commit] /var/hg/gmp: 5 new changesets

Wed Oct 17 21:42:24 UTC 2018

details:   /var/hg/gmp/rev/dcd0053c83c1
changeset: 17652:dcd0053c83c1
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Oct 17 23:17:21 2018 +0200
description:
mpn/generic/fib2_ui.c: Simplify the possible -2 case.

details:   /var/hg/gmp/rev/5e4ac706014f
changeset: 17653:5e4ac706014f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Jul 18 18:24:57 2018 +0200
description:
mpz/millerrabin.c (mod_eq_m1): New function, check equality computing -1 on the fly.

details:   /var/hg/gmp/rev/26121a5c96c5
changeset: 17654:26121a5c96c5
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Jul 19 07:41:19 2018 +0200
description:
Declare and mark/free TEMP only where it's needed.

details:   /var/hg/gmp/rev/60c95465c8f5
changeset: 17655:60c95465c8f5
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Jul 19 08:02:49 2018 +0200
description:
mpz/powm_ui.c: Avoid a COPY

details:   /var/hg/gmp/rev/7673e5cb31ad
changeset: 17656:7673e5cb31ad
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Jul 19 08:18:34 2018 +0200
description:
mpz/powm_ui.c: Avoid a branch in the flow, handling el==1 early.

diffstat:

 mpn/generic/fib2_ui.c |  26 ++++++---------------
 mpz/millerrabin.c     |  60 ++++++++++++++++++++++++++++++++++++++------------
 mpz/powm_ui.c         |  49 +++++++++++++++++-----------------------
 3 files changed, 74 insertions(+), 61 deletions(-)

diffs (276 lines):

diff -r d393bac5d2d5 -r 7673e5cb31ad mpn/generic/fib2_ui.c

--- a/mpn/generic/fib2_ui.c	Fri Sep 07 08:58:55 2018 +0200
+++ b/mpn/generic/fib2_ui.c	Thu Jul 19 08:18:34 2018 +0200
@@ -47,19 +47,13 @@
    fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]
    (for n>0).
 
-   Notes:
+   Notes: F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
 
    In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
    low limb.
 
-   In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -
-   F[k-1]^2.  This F[2k+1] is an F[4m+3] and such numbers are congruent to
-   1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since
-   that would leave 6 or 7 mod 8).
-
-   This property of F[4m+3] can be verified by induction on F[4m+3] =
-   7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
-   identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
+   In F[2k+1] with k odd, -2 is applied to F[k-1]^2 just by ORing into the
+   low limb.
 */
 
 mp_size_t
@@ -131,15 +125,13 @@
 
 	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
 	     n&mask is the low bit of our implied k.  */
+
+	  ASSERT ((fp[0] & 2) == 0);
+	  /* fp is F[k-1]^2 == 0 or 1 mod 4, like all squares. */
+	  fp[0] |= (n & mask ? 2 : 0);			/* possible -2 */
 #if HAVE_NATIVE_mpn_rsblsh2_n
 	  fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
-	  if ((n & mask) == 0)
-	    MPN_INCR_U(fp, size + 1, 2);	/* possible +2 */
-	  else
-	  {
-	    ASSERT (fp[0] >= 2);
-	    fp[0] -= 2;				/* possible -2 */
-	  }
+	  MPN_INCR_U(fp, size + 1, (n & mask ? 0 : 2));	/* possible +2 */
 #else
 	  {
 	    mp_limb_t  c;
@@ -147,8 +139,6 @@
 	    c = mpn_lshift (xp, xp, size, 2);
 	    xp[0] |= (n & mask ? 0 : 2);	/* possible +2 */
 	    c -= mpn_sub_n (fp, xp, fp, size);
-	    ASSERT (n & mask ? fp[0] != 0 && fp[0] != 1 : 1);
-	    fp[0] -= (n & mask ? 2 : 0);	/* possible -2 */
 	    fp[size] = c;
 	  }
 #endif
diff -r d393bac5d2d5 -r 7673e5cb31ad mpz/millerrabin.c
--- a/mpz/millerrabin.c	Fri Sep 07 08:58:55 2018 +0200
+++ b/mpz/millerrabin.c	Thu Jul 19 08:18:34 2018 +0200
@@ -42,7 +42,7 @@
 
 #include "gmp-impl.h"
 
-static int millerrabin (mpz_srcptr, mpz_srcptr,
+static int millerrabin (mpz_srcptr,
 			mpz_ptr, mpz_ptr,
 			mpz_srcptr, unsigned long int);
 
@@ -50,22 +50,22 @@
 mpz_millerrabin (mpz_srcptr n, int reps)
 {
   int r;
-  mpz_t nm1, nm3, x, y, q;
+  mpz_t nm, x, y, q;
   unsigned long int k;
   gmp_randstate_t rstate;
   int is_prime;
   TMP_DECL;
   TMP_MARK;
 
-  MPZ_TMP_INIT (nm1, SIZ (n) + 1);
-  mpz_sub_ui (nm1, n, 1L);
+  MPZ_TMP_INIT (nm, SIZ (n) + 1);
+  mpz_sub_ui (nm, n, 1L); /* nm = n - 1 */
 
   MPZ_TMP_INIT (x, SIZ (n) + 1);
   MPZ_TMP_INIT (y, 2 * SIZ (n)); /* mpz_powm_ui needs excessive memory!!! */
 
   /* Perform a Fermat test.  */
   mpz_set_ui (x, 210L);
-  mpz_powm (y, x, nm1, n);
+  mpz_powm (y, x, nm, n);
   if (mpz_cmp_ui (y, 1L) != 0)
     {
       TMP_FREE;
@@ -75,13 +75,12 @@
   MPZ_TMP_INIT (q, SIZ (n));
 
   /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
-  k = mpz_scan1 (nm1, 0L);
-  mpz_tdiv_q_2exp (q, nm1, k);
+  k = mpz_scan1 (nm, 0L); /* or mpz_scan1 (n, 1) */
+  mpz_tdiv_q_2exp (q, n, k);
 
   /* n-3 */
-  MPZ_TMP_INIT (nm3, SIZ (n) + 1);
-  mpz_sub_ui (nm3, n, 3L);
-  ASSERT (mpz_cmp_ui (nm3, 1L) >= 0);
+  mpz_sub_ui (nm, nm, 2L); /* nm = n - 1 - 2 = n - 3 */
+  ASSERT (mpz_cmp_ui (nm, 1L) >= 0);
 
   gmp_randinit_default (rstate);
 
@@ -89,10 +88,10 @@
   for (r = 0; r < reps && is_prime; r++)
     {
       /* 2 to n-2 inclusive, don't want 1, 0 or -1 */
-      mpz_urandomm (x, rstate, nm3);
+      mpz_urandomm (x, rstate, nm);
       mpz_add_ui (x, x, 2L);
 
-      is_prime = millerrabin (n, nm1, x, y, q, k);
+      is_prime = millerrabin (n, x, y, q, k);
     }
 
   gmp_randclear (rstate);
@@ -102,20 +101,51 @@
 }
 
 static int
-millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
+mod_eq_m1 (mpz_srcptr x, mpz_srcptr m)
+{
+  mp_size_t ms;
+  mp_srcptr mp, xp;
+
+  ms = SIZ (m);
+  if (SIZ (x) != ms)
+    return 0;
+  ASSERT (ms > 0);
+
+  mp = PTR (m);
+  xp = PTR (x);
+  ASSERT ((mp[0] - 1) == (mp[0] ^ 1)); /* n is odd */
+
+  if ((*xp ^ CNST_LIMB(1) ^ *mp) != CNST_LIMB(0)) /* xp[0] == mp[0] - 1 */
+    return 0;
+  else
+    {
+      int cmp;
+
+      --ms;
+      ++xp;
+      ++mp;
+
+      MPN_CMP (cmp, xp, mp, ms);
+
+      return cmp == 0;
+    }
+}
+
+static int
+millerrabin (mpz_srcptr n, mpz_ptr x, mpz_ptr y,
 	     mpz_srcptr q, unsigned long int k)
 {
   unsigned long int i;
 
   mpz_powm (y, x, q, n);
 
-  if (mpz_cmp_ui (y, 1L) == 0 || mpz_cmp (y, nm1) == 0)
+  if (mpz_cmp_ui (y, 1L) == 0 || mod_eq_m1 (y, n))
     return 1;
 
   for (i = 1; i < k; i++)
     {
       mpz_powm_ui (y, y, 2L, n);
-      if (mpz_cmp (y, nm1) == 0)
+      if (mod_eq_m1 (y, n))
 	return 1;
       /* y == 1 means that the previous y was a non-trivial square root
 	 of 1 (mod n). y == 0 means that n is a power of the base.
diff -r d393bac5d2d5 -r 7673e5cb31ad mpz/powm_ui.c
--- a/mpz/powm_ui.c	Fri Sep 07 08:58:55 2018 +0200
+++ b/mpz/powm_ui.c	Thu Jul 19 08:18:34 2018 +0200
@@ -58,11 +58,7 @@
 static void
 mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
 {
-  mp_ptr qp;
-  TMP_DECL;
-  TMP_MARK;
-
-  qp = tp;
+  mp_ptr qp = tp;
 
   if (dn == 1)
     {
@@ -89,14 +85,20 @@
       /* We need to allocate separate remainder area, since mpn_mu_div_qr does
 	 not handle overlap between the numerator and remainder areas.
 	 FIXME: Make it handle such overlap.  */
-      mp_ptr rp = TMP_BALLOC_LIMBS (dn);
-      mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
-      mp_ptr scratch = TMP_BALLOC_LIMBS (itch);
+      mp_ptr rp, scratch;
+      mp_size_t itch;
+      TMP_DECL;
+      TMP_MARK;
+
+      itch = mpn_mu_div_qr_itch (nn, dn, 0);
+      rp = TMP_BALLOC_LIMBS (dn);
+      scratch = TMP_BALLOC_LIMBS (itch);
+
       mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
       MPN_COPY (np, rp, dn);
+
+      TMP_FREE;
     }
-
-  TMP_FREE;
 }
 
 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
@@ -134,8 +136,13 @@
       if (UNLIKELY (mn == 0))
 	DIVIDE_BY_ZERO;
 
-      if (el == 0)
+      if (el <= 1)
 	{
+	  if (el == 1)
+	    {
+	      mpz_mod (r, b, m);
+	      return;
+	    }
 	  /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
 	     M equals 1.  */
 	  SIZ(r) = mn != 1 || mp[0] != 1;
@@ -191,16 +198,7 @@
       e = (e << c) << 1;		/* shift the exp bits to the left, lose msb */
       c = GMP_LIMB_BITS - 1 - c;
 
-      if (c == 0)
-	{
-	  /* If m is already normalized (high bit of high limb set), and b is
-	     the same size, but a bigger value, and e==1, then there's no
-	     modular reductions done and we can end up with a result out of
-	     range at the end. */
-	  if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
-	    mpn_sub_n (xp, xp, mp, mn);
-	}
-      else
+      ASSERT (c != 0); /* el > 1 */
 	{
 	  /* Main loop. */
 	  do
@@ -249,17 +247,12 @@
 	  cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
 	  tp[xn] = cy; xn += cy != 0;
 
-	  if (xn < mn)
-	    {
-	      MPN_COPY (xp, tp, xn);
-	    }
-	  else
+	  if (xn >= mn)
 	    {
 	      mod (tp, xn, mp, mn, &dinv, scratch);
-	      MPN_COPY (xp, tp, mn);
 	      xn = mn;
 	    }
-	  mpn_rshift (xp, xp, xn, m_zero_cnt);
+	  mpn_rshift (xp, tp, xn, m_zero_cnt);
 	}
       MPN_NORMALIZE (xp, xn);