[Gmp-commit] /var/hg/gmp: mpn_brootinv: Interface change, limbs rather than b...

mercurial at gmplib.org mercurial at gmplib.org
Thu Nov 1 22:08:11 CET 2012


details:   /var/hg/gmp/rev/48f07964d11a
changeset: 15104:48f07964d11a
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Thu Nov 01 22:01:00 2012 +0100
description:
mpn_brootinv: Interface change, limbs rather than bits.

diffstat:

 ChangeLog              |   8 +++++
 gmp-impl.h             |   2 +-
 mpn/generic/brootinv.c |  73 ++++++++++++++++++++++++++++++++++++++-----------
 mpn/generic/perfpow.c  |   2 +-
 tests/mpn/t-brootinv.c |   2 +-
 tune/speed.h           |   2 +-
 6 files changed, 68 insertions(+), 21 deletions(-)

diffs (179 lines):

diff -r e93da8a39685 -r 48f07964d11a ChangeLog
--- a/ChangeLog	Thu Nov 01 21:22:24 2012 +0100
+++ b/ChangeLog	Thu Nov 01 22:01:00 2012 +0100
@@ -1,5 +1,13 @@
 2012-11-01  Niels Möller  <nisse at lysator.liu.se>
 
+	* mpn/generic/brootinv.c (mpn_brootinv): Input size in limbs
+	rather than bits. Use single-precision iterations for the first
+	limb.
+	* mpn/generic/perfpow.c (is_kth_power): Update mpn_brootinv call.
+	* tests/mpn/t-brootinv.c (main): Likewise.
+	* tune/speed.h (SPEED_ROUTINE_MPN_BROOTINV): Likewise.
+	* gmp-impl.h (mpn_brootinv): Updated prototype.
+
 	* mpn/generic/hgcd2.c (mpn_hgcd2): Removed redundant loop exit
 	tests in the single-precision loop.
 
diff -r e93da8a39685 -r 48f07964d11a gmp-impl.h
--- a/gmp-impl.h	Thu Nov 01 21:22:24 2012 +0100
+++ b/gmp-impl.h	Thu Nov 01 22:01:00 2012 +0100
@@ -1636,7 +1636,7 @@
 __GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
 
 #define mpn_brootinv __MPN(brootinv)
-__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_limb_t, mp_ptr);
+__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
 
 #define mpn_bsqrt __MPN(bsqrt)
 __GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
diff -r e93da8a39685 -r 48f07964d11a mpn/generic/brootinv.c
--- a/mpn/generic/brootinv.c	Thu Nov 01 21:22:24 2012 +0100
+++ b/mpn/generic/brootinv.c	Thu Nov 01 22:01:00 2012 +0100
@@ -22,7 +22,22 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 
-/* Compute r such that r^k * y = 1 (mod 2^b).
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r = 1;
+  mp_limb_t s = a;
+
+  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+    if (e & 1)
+      r *= s;
+
+  return r;
+}
+
+/* Compute r such that r^k * y = 1 (mod B^n).
 
    Iterates
      r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
@@ -31,7 +46,8 @@
    Works just for odd k.  Else the Hensel lifting degenerates.
 
    FIXME:
-     (1) Simplify to do precision book-keeping in limbs rather than bits.
+   
+     (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
 
      (2) Rewrite iteration as
 	   r' <-- r - k^{-1} r (r^k y - 1)
@@ -41,37 +57,60 @@
 
      (4) Use a small table to get starting value.
 
-   If we prefer to get y^{1/k} rather than y^{-1/k}, we could also switch to
-   the iteration
+   Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
+*/
 
-     r' <-- r - k^{-1} r (r^k y^{k-1} - 1)
-
-   which converges to y^{1/n - 1}, and we then get y^{1/n} by a single mullo.
-*/
 void
-mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_limb_t k, mp_ptr tp)
+mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
 {
   mp_ptr tp2, tp3;
-  mp_limb_t kinv, k2;
-  mp_size_t bn, order[GMP_LIMB_BITS + 1];
+  mp_limb_t kinv, k2, r0, y0;
+  mp_size_t order[GMP_LIMB_BITS + 1];
   int i, d;
 
-  ASSERT (bnb > 0);
+  ASSERT (bn > 0);
   ASSERT ((k & 1) != 0);
 
-  bn = 1 + (bnb - 1) / GMP_LIMB_BITS;
-
   tp2 = tp + bn;
   tp3 = tp + 2 * bn;
   k2 = k + 1;
 
   binvert_limb (kinv, k);
 
+  /* 4-bit initial approximation:
+        
+   y%16 | 1  3  5  7  9 11 13 15, 	
+    k%4 +-----------------------------
+     1  | 1 11 13  7  9  3  5 15
+     3  | 1  3  5  7  9 11 13 15
+
+  */
+  y0 = yp[0];
+  
+  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & ~(k << 2) & 8);		/* 4 bits */
+  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f));		/* 8 bits */
+  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0xffff));	/* 16 bits */
+  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));			/* 32 bits */
+#if GMP_NUMB_BITS > 32
+  {
+    unsigned prec = 32;
+    do
+      {
+	r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
+	prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (bn == 1)
+    return;
+  
   d = 0;
-  for (; bnb != 1; bnb = (bnb + 1) >> 1)
-    order[d++] = 1 + (bnb - 1) / GMP_LIMB_BITS;
+  for (; bn > 1; bn = (bn + 1) >> 1)
+    order[d++] = bn;
 
-  rp[0] = 1;
   for (i = d - 1; i >= 0; i--)
     {
       bn = order[i];
diff -r e93da8a39685 -r 48f07964d11a mpn/generic/perfpow.c
--- a/mpn/generic/perfpow.c	Thu Nov 01 21:22:24 2012 +0100
+++ b/mpn/generic/perfpow.c	Thu Nov 01 22:01:00 2012 +0100
@@ -142,8 +142,8 @@
   else
     {
       b = 1 + (f - 1) / k;
-      mpn_brootinv (rp, ip, b, k, tp);
       rn = 1 + (b - 1) / GMP_LIMB_BITS;
+      mpn_brootinv (rp, ip, rn, k, tp);
       if ((b % GMP_LIMB_BITS) != 0)
 	rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
       MPN_NORMALIZE (rp, rn);
diff -r e93da8a39685 -r 48f07964d11a tests/mpn/t-brootinv.c
--- a/tests/mpn/t-brootinv.c	Thu Nov 01 21:22:24 2012 +0100
+++ b/tests/mpn/t-brootinv.c	Thu Nov 01 22:01:00 2012 +0100
@@ -81,7 +81,7 @@
 	  else
 	    k |= 1;
 	}
-      mpn_brootinv (rp, ap, n * GMP_NUMB_BITS, k, scratch);
+      mpn_brootinv (rp, ap, n, k, scratch);
       mpn_powlo (pp, rp, &k, 1, n, scratch);
       mpn_mullo_n (app, ap, pp, n);
       
diff -r e93da8a39685 -r 48f07964d11a tune/speed.h
--- a/tune/speed.h	Thu Nov 01 21:22:24 2012 +0100
+++ b/tune/speed.h	Thu Nov 01 22:01:00 2012 +0100
@@ -2107,7 +2107,7 @@
     speed_starttime ();					\
     i = s->reps;					\
     do							\
-      (*function) (wp, s->xp, s->size * GMP_NUMB_BITS, s->r, tp);	\
+      (*function) (wp, s->xp, s->size, s->r, tp);	\
     while (--i != 0);					\
     t = speed_endtime ();				\
 							\


More information about the gmp-commit mailing list