[Gmp-commit] /var/hg/gmp: Implemented mpn_broot.

Sun Oct 28 10:08:51 CET 2012

details:   /var/hg/gmp/rev/9623471fb083
changeset: 15092:9623471fb083
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Sun Oct 28 10:08:46 2012 +0100
description:
Implemented mpn_broot.

diffstat:

 ChangeLog             |    8 ++
 configure.in          |    2 +-
 gmp-impl.h            |    2 +
 mpn/generic/broot.c   |  184 ++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/mpn/Makefile.am |    2 +-
 tests/mpn/t-broot.c   |  100 +++++++++++++++++++++++++++
 6 files changed, 296 insertions(+), 2 deletions(-)

diffs (truncated from 343 to 300 lines):

diff -r 0b25e4cfd719 -r 9623471fb083 ChangeLog

--- a/ChangeLog	Sat Oct 27 13:21:09 2012 +0200
+++ b/ChangeLog	Sun Oct 28 10:08:46 2012 +0100
@@ -1,3 +1,11 @@
+2012-10-28  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/generic/broot.c (mpn_broot): New file and function.
+	* configure.in (gmp_mpn_functions): Add broot.
+	* gmp-impl.h (mpn_broot): Declare.
+	* tests/mpn/t-broot.c: New testcase.
+	* tests/mpn/Makefile.am (check_PROGRAMS): Added t-broot.
+
 2012-10-27  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/remove.c: Get remainder allocation right.
diff -r 0b25e4cfd719 -r 9623471fb083 configure.in
--- a/configure.in	Sat Oct 27 13:21:09 2012 +0200
+++ b/configure.in	Sun Oct 28 10:08:46 2012 +0100
@@ -2679,7 +2679,7 @@
   sbpi1_bdiv_q sbpi1_bdiv_qr						   \
   dcpi1_bdiv_q dcpi1_bdiv_qr						   \
   mu_bdiv_q mu_bdiv_qr							   \
-  bdiv_q bdiv_qr							   \
+  bdiv_q bdiv_qr broot							   \
   divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec		   \
   trialdiv remove							   \
   and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n			   \
diff -r 0b25e4cfd719 -r 9623471fb083 gmp-impl.h
--- a/gmp-impl.h	Sat Oct 27 13:21:09 2012 +0200
+++ b/gmp-impl.h	Sun Oct 28 10:08:46 2012 +0100
@@ -1629,6 +1629,8 @@
 #define   mpn_rootrem __MPN(rootrem)
 __GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
 
+#define mpn_broot __MPN(broot)
+__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
 
 #if defined (_CRAY)
 #define MPN_COPY_INCR(dst, src, n)					\
diff -r 0b25e4cfd719 -r 9623471fb083 mpn/generic/broot.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/broot.c	Sun Oct 28 10:08:46 2012 +0100
@@ -0,0 +1,184 @@
+/* mpn_broot -- Compute hensel sqrt
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r = 1;
+  mp_limb_t s = a;
+
+  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+    if (e & 1)
+      r *= s;
+
+  return r;
+}
+
+/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
+
+   Iterates
+
+     r' <-- r - r * (a^{k-1} r^k - 1) / n
+
+   If
+
+     a^{k-1} r^k = 1 (mod 2^m),
+
+   then
+
+     a^{k-1} r'^k = 1 (mod 2^{2m}),
+ */
+static void
+mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_size_t sizes[GMP_LIMB_BITS * 2];  
+  mp_ptr akm1, tp, rnp, ep, scratch;
+  mp_limb_t a0, r0, km1, kinv;
+  mp_size_t rn;
+  unsigned i;
+
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+  ASSERT (k >= 3);
+
+  TMP_MARK;
+  
+  akm1 = TMP_ALLOC_LIMBS (4*n);
+  tp = akm1 + n;
+
+  km1 = k-1;
+  /* FIXME: Could arrange the iteration so we don't need to compute
+     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
+     that we can use wraparound also for a*r, since the low half is
+     unchanged from the previous iteration. Or possibly mulmid. Also,
+     a r = a^{1/k}, so we get that value too, for free? */
+  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
+
+  a0 = ap[0];
+  binvert_limb (kinv, k);
+
+  /* 4 bits: a^{1/k - 1} (mod 16):
+
+        a % 8
+        1 3 5 7   
+   k%4 +-------
+     1 |1 1 1 1
+     3 |1 9 9 1
+  */
+  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+  {
+    unsigned prec = 32;
+    do
+      {
+	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
+	prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (n == 1)
+    {
+      TMP_FREE;
+      return;
+    }
+
+  /* FIXME: Special case for two limb iteration. */
+  rnp = TMP_ALLOC_LIMBS (2*n);  
+  ep = rnp + n;
+
+  /* FIXME: Possible to this on the fly with some bit fiddling. */
+  for (i = 0; n > 1; n = (n + 1)/2)
+    sizes[i++] = n;
+
+  rn = 1;
+
+  while (i-- > 0)
+    {
+      /* Compute x^k. What's the best way to handle the doubled
+	 precision? */
+      MPN_ZERO (rp + rn, sizes[i] - rn);
+      mpn_powlo (rnp, rp, &k, 1, sizes[i], tp); 
+
+      /* Multiply by a^{k-1}. Can use wraparound; low part is
+	 000...01. */
+
+      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
+      ASSERT (ep[0] == 1);
+      ASSERT (rn == 1 || mpn_zero_p (ep + 1, rn - 1));
+
+      ASSERT (sizes[i] <= 2*rn);
+      mpn_pi1_bdiv_q_1 (ep, ep + rn, sizes[i] - rn, k, kinv, 0);
+
+      /* Multiply by x, plain mullo. */
+      mpn_mullo_n (rp + rn, ep, rp, sizes[i] - rn);
+
+      /* FIXME: Avoid negation, e.g., by using a bdiv_q_1 variant
+	 returning -q. */
+      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
+
+      rn = sizes[i];
+    }
+  TMP_FREE;
+}
+
+/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
+void
+mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+
+  if (k == 1)
+    {
+      MPN_COPY (rp, ap, n);
+      return;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n);
+
+  mpn_broot_invm1 (tp, ap, n, k);
+  mpn_mullo_n (rp, tp, ap, n);
+
+  TMP_FREE;  
+}
diff -r 0b25e4cfd719 -r 9623471fb083 tests/mpn/Makefile.am
--- a/tests/mpn/Makefile.am	Sat Oct 27 13:21:09 2012 +0200
+++ b/tests/mpn/Makefile.am	Sun Oct 28 10:08:46 2012 +0100
@@ -28,7 +28,7 @@
   t-toom52 t-toom53 t-toom54 t-toom62 t-toom63 t-toom6h t-toom8h	\
   t-toom2-sqr t-toom3-sqr t-toom4-sqr t-toom6-sqr t-toom8-sqr		\
   t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1 t-mulmid			\
-  t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv
+  t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv t-broot
 
 EXTRA_DIST = toom-shared.h toom-sqr-shared.h
 
diff -r 0b25e4cfd719 -r 9623471fb083 tests/mpn/t-broot.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/mpn/t-broot.c	Sun Oct 28 10:08:46 2012 +0100
@@ -0,0 +1,100 @@
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>		/* for strtol */
+#include <stdio.h>		/* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+
+  mp_ptr ap, rp, pp, scratch;
+  int count = COUNT;
+  unsigned i;
+  TMP_DECL;
+  
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+	{
+	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+	  return 1;
+	}
+    }
+  
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  scratch = TMP_ALLOC_LIMBS (3*MAX_LIMBS); /* For mpn_powlo */
+
+  for (i = 0; i < count; i++)
+    {