[Gmp-commit] /home/hgfiles/gmp: Slightly faster CRT for {mul, sqr}mod_bnm1.

mercurial at gmplib.org mercurial at gmplib.org
Sun Dec 20 23:59:45 CET 2009


details:   /home/hgfiles/gmp/rev/f378a4363215
changeset: 13149:f378a4363215
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Sun Dec 20 23:59:38 2009 +0100
description:
Slightly faster CRT for {mul,sqr}mod_bnm1.

diffstat:

 ChangeLog                 |   5 +++
 mpn/generic/mulmod_bnm1.c |  77 ++++++++++++++++++++++++++++++++--------------
 mpn/generic/sqrmod_bnm1.c |  77 ++++++++++++++++++++++++++++++++--------------
 3 files changed, 111 insertions(+), 48 deletions(-)

diffs (222 lines):

diff -r 8933a257b0c7 -r f378a4363215 ChangeLog
--- a/ChangeLog	Sun Dec 20 23:54:57 2009 +0100
+++ b/ChangeLog	Sun Dec 20 23:59:38 2009 +0100
@@ -1,3 +1,8 @@
+2009-12-20  Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/generic/mulmod_bnm1.c (mpn_mulmod_bnm1): New CRT.
+	* mpn/generic/sqrmod_bnm1.c (mpn_sqrmod_bnm1): Likewise.
+
 2009-12-20  Torbjorn Granlund  <tege at gmplib.org>
 
 	* Change all bit counts for bignums to use mp_bitcnt_t.
diff -r 8933a257b0c7 -r f378a4363215 mpn/generic/mulmod_bnm1.c
--- a/mpn/generic/mulmod_bnm1.c	Sun Dec 20 23:54:57 2009 +0100
+++ b/mpn/generic/mulmod_bnm1.c	Sun Dec 20 23:59:38 2009 +0100
@@ -27,6 +27,7 @@
 
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
 
 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
    mod B^rn - 1, and values are semi-normalised; zero is represented
@@ -132,7 +133,7 @@
       /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
 	 and crt together as
 
-	 x = xm + (B^n - 1) * [B^n/2 (xp - xm) mod (B^n+1)]
+         x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
       */
 
 #define a0 ap
@@ -229,33 +230,61 @@
 	  }
       }
 
-      /* xp = xm - xp mod (B^n + 1). Assumes normalised
-	 representation. Puts high bit in hi. */
-      if (UNLIKELY (xp[n]))
-	hi = mpn_add_1 (xp, rp, n, 1);
-      else
-	{
-	  cy = mpn_sub_n (xp, rp, xp, n);
-	  MPN_INCR_U (xp, n + 1 , cy);
-	  hi = xp[n];
-	}
-      /* Multiply by -B^n/2, using
+      /* Here the CRT recomposition begins.
 
-	 -B^n/2 * (2 x1 + x0) = x1 - B^n/2 x0 (mod (B^n + 1))
+	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+	 Division by 2 is a bitwise rotation.
+
+	 Assumes xp normalised mod (B^n+1).
+
+	 The residue class [0] is represented by [B^n-1]; except when
+	 both input are ZERO.
       */
 
-      cy = mpn_rshift (rp+n, xp, n, 1);
-      if (hi != cy)
-	rp[2*n-1] |= GMP_NUMB_HIGHBIT;
-      if (hi < cy)
-	/* Underflow */
-	hi = mpn_add_1 (rp+n, rp+n, n, 1);
-      else
-	hi = 0;
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next add_ssaaaa will set cy = 1 only if rp[n-1]+=hi overflows,
+	 i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
 
-      cy = mpn_sub_n (rp, rp, rp+n, n);
-      cy = mpn_sub_1 (rp+n, rp+n, n, cy + hi);
-      ASSERT (cy == hi);
+      /* Compute the highest half:
+         ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n 
+       */
+      cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+      /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+         DECR will affect _at most_ the lowest n limbs. */
+      MPN_DECR_U (rp, 2*n, cy);
+
 #undef a0
 #undef a1
 #undef b0
diff -r 8933a257b0c7 -r f378a4363215 mpn/generic/sqrmod_bnm1.c
--- a/mpn/generic/sqrmod_bnm1.c	Sun Dec 20 23:54:57 2009 +0100
+++ b/mpn/generic/sqrmod_bnm1.c	Sun Dec 20 23:59:38 2009 +0100
@@ -27,6 +27,7 @@
 
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
 
 /* Input is {ap,rn}; output is {rp,rn}, computation is
    mod B^rn - 1, and values are semi-normalised; zero is represented
@@ -118,7 +119,7 @@
       /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
 	 and crt together as
 
-	 x = xm + (B^n - 1) * [B^n/2 (xp - xm) mod (B^n+1)]
+         x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
       */
 
 #define a0 ap
@@ -184,33 +185,61 @@
 	  }
       }
 
-      /* xp = xm - xp mod (B^n + 1). Assumes normalised
-	 representation. Puts high bit in hi. */
-      if (UNLIKELY (xp[n]))
-	hi = mpn_add_1 (xp, rp, n, 1);
-      else
-	{
-	  cy = mpn_sub_n (xp, rp, xp, n);
-	  MPN_INCR_U (xp, n + 1 , cy);
-	  hi = xp[n];
-	}
-      /* Multiply by -B^n/2, using
+      /* Here the CRT recomposition begins.
 
-	 -B^n/2 * (2 x1 + x0) = x1 - B^n/2 x0 (mod (B^n + 1))
+	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
+	 Division by 2 is a bitwise rotation.
+
+	 Assumes xp normalised mod (B^n+1).
+
+	 The residue class [0] is represented by [B^n-1]; except when
+	 both input are ZERO.
       */
 
-      cy = mpn_rshift (rp+n, xp, n, 1);
-      if (hi != cy)
-	rp[2*n-1] |= GMP_NUMB_HIGHBIT;
-      if (hi < cy)
-	/* Underflow */
-	hi = mpn_add_1 (rp+n, rp+n, n, 1);
-      else
-	hi = 0;
+#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
+#if HAVE_NATIVE_mpn_rsh1add_nc
+      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
+      hi = cy << (GMP_NUMB_BITS - 1);
+      cy = 0;
+      /* next add_ssaaaa will set cy = 1 only if rp[n-1]+=hi overflows,
+	 i.e. a further increment will not overflow again. */
+#else /* ! _nc */
+      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
+	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
+#endif
+      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
+#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
+#if HAVE_NATIVE_mpn_add_nc
+      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
+#else /* ! _nc */
+      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
+#endif
+      cy += (rp[0]&1);
+      mpn_rshift(rp, rp, n, 1);
+      ASSERT (cy <= 2);
+      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
+      cy >>= 1;
+      /* We can have cy != 0 only if hi = 0... */
+      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
+      rp[n-1] |= hi;
+      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
+#endif
+      ASSERT (cy <= 1);
+      /* Next increment can not overflow, read the previous comments about cy. */
+      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
+      MPN_INCR_U(rp, n, cy);
 
-      cy = mpn_sub_n (rp, rp, rp+n, n);
-      cy = mpn_sub_1 (rp+n, rp+n, n, cy + hi);
-      ASSERT (cy == hi);
+      /* Compute the highest half:
+         ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n 
+       */
+      cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
+      /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
+         DECR will affect _at most_ the lowest n limbs. */
+      MPN_DECR_U (rp, 2*n, cy);
+
 #undef a0
 #undef a1
 #undef xp


More information about the gmp-commit mailing list