[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Wed Oct 31 14:00:03 CET 2012


details:   /var/hg/gmp/rev/c3bf4d1124f1
changeset: 15099:c3bf4d1124f1
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Wed Oct 31 12:00:39 2012 +0100
description:
Benchmarking of broot functions.

details:   /var/hg/gmp/rev/f84ea35b6178
changeset: 15100:f84ea35b6178
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Wed Oct 31 12:17:12 2012 +0100
description:
Optimization of mpn_broot.

details:   /var/hg/gmp/rev/1e767ab37a8a
changeset: 15101:1e767ab37a8a
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Wed Oct 31 13:56:30 2012 +0100
description:
Test mpn_brootinv.

diffstat:

 ChangeLog              |   20 +++++++++
 gmp-impl.h             |    3 +
 mpn/generic/broot.c    |   40 +++++++++--------
 tests/mpn/Makefile.am  |    3 +-
 tests/mpn/t-brootinv.c |  104 +++++++++++++++++++++++++++++++++++++++++++++++++
 tune/common.c          |   16 +++++++
 tune/speed.c           |    4 +
 tune/speed.h           |   39 ++++++++++++++++++
 8 files changed, 209 insertions(+), 20 deletions(-)

diffs (truncated from 333 to 300 lines):

diff -r 7a309cd32a28 -r 1e767ab37a8a ChangeLog
--- a/ChangeLog	Sun Oct 28 15:51:08 2012 +0100
+++ b/ChangeLog	Wed Oct 31 13:56:30 2012 +0100
@@ -1,3 +1,23 @@
+2012-10-31  Niels Möller  <nisse at lysator.liu.se>
+
+	* tests/mpn/Makefile.am (check_PROGRAMS): Added t-brootinv.
+	* tests/mpn/t-brootinv.c: New file
+
+	* mpn/generic/broot.c (mpn_broot_invm1): Avoid a mullo_n in the
+	loop, and do powering as a plain mpn_sqr followed by mpn_powlo.
+
+	* tune/speed.c (routine): Added mpn_broot, mpn_broot_invm1,
+	mpn_brootinv.
+
+	* tune/common.c (speed_mpn_broot, speed_mpn_broot_invm1)
+	(speed_mpn_brootinv): New functions.
+	* tune/speed.h (SPEED_ROUTINE_MPN_BROOT)
+	(SPEED_ROUTINE_MPN_BROOTINV): New macros.
+
+	* mpn/generic/broot.c (mpn_broot_invm1): Made non-static (mainly
+	for benchmarking).
+	* gmp-impl.h (mpn_broot_invm1): Declare it.
+
 2012-10-28  Torbjorn Granlund  <tege at gmplib.org>
 
 	* configure.in (gmp_mpn_functions): Add new files.
diff -r 7a309cd32a28 -r 1e767ab37a8a gmp-impl.h
--- a/gmp-impl.h	Sun Oct 28 15:51:08 2012 +0100
+++ b/gmp-impl.h	Wed Oct 31 13:56:30 2012 +0100
@@ -1632,6 +1632,9 @@
 #define mpn_broot __MPN(broot)
 __GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
 
+#define mpn_broot_invm1 __MPN(broot_invm1)
+__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
 #define mpn_brootinv __MPN(brootinv)
 __GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_limb_t, mp_ptr);
 
diff -r 7a309cd32a28 -r 1e767ab37a8a mpn/generic/broot.c
--- a/mpn/generic/broot.c	Sun Oct 28 15:51:08 2012 +0100
+++ b/mpn/generic/broot.c	Wed Oct 31 13:56:30 2012 +0100
@@ -54,13 +54,20 @@
    then
 
      a^{k-1} r'^k = 1 (mod 2^{2m}),
+
+   Compute the update term as
+
+     r' = r - (a^{k-1} r^{k+1} - r) / k
+
+   where we still have cancelation of low limbs.
+
  */
-static void
+void
 mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
 {
   mp_size_t sizes[GMP_LIMB_BITS * 2];
   mp_ptr akm1, tp, rnp, ep, scratch;
-  mp_limb_t a0, r0, km1, kinv;
+  mp_limb_t a0, r0, km1, kp1h, kinv;
   mp_size_t rn;
   unsigned i;
 
@@ -118,8 +125,11 @@
       return;
     }
 
+  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */ 
+  kp1h = k/2 + 1;
+
   /* FIXME: Special case for two limb iteration. */
-  rnp = TMP_ALLOC_LIMBS (2*n);
+  rnp = TMP_ALLOC_LIMBS (2*n + 1);
   ep = rnp + n;
 
   /* FIXME: Possible to this on the fly with some bit fiddling. */
@@ -130,28 +140,20 @@
 
   while (i-- > 0)
     {
-      /* Compute x^k. What's the best way to handle the doubled
-	 precision? */
-      MPN_ZERO (rp + rn, sizes[i] - rn);
-      mpn_powlo (rnp, rp, &k, 1, sizes[i], tp);
+      /* Compute x^{k+1}. */
+      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
+			       final iteration.*/
+      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
 
-      /* Multiply by a^{k-1}. Can use wraparound; low part is
-	 000...01. */
+      /* Multiply by a^{k-1}. Can use wraparound; low part equals
+	 r. */
 
       mpn_mullo_n (ep, rnp, akm1, sizes[i]);
-      ASSERT (ep[0] == 1);
-      ASSERT (rn == 1 || mpn_zero_p (ep + 1, rn - 1));
+      ASSERT (mpn_cmp (ep, rp, rn) == 0);
 
       ASSERT (sizes[i] <= 2*rn);
-      mpn_pi1_bdiv_q_1 (ep, ep + rn, sizes[i] - rn, k, kinv, 0);
-
-      /* Multiply by x, plain mullo. */
-      mpn_mullo_n (rp + rn, ep, rp, sizes[i] - rn);
-
-      /* FIXME: Avoid negation, e.g., by using a bdiv_q_1 variant
-	 returning -q. */
+      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
       mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
-
       rn = sizes[i];
     }
   TMP_FREE;
diff -r 7a309cd32a28 -r 1e767ab37a8a tests/mpn/Makefile.am
--- a/tests/mpn/Makefile.am	Sun Oct 28 15:51:08 2012 +0100
+++ b/tests/mpn/Makefile.am	Wed Oct 31 13:56:30 2012 +0100
@@ -28,7 +28,8 @@
   t-toom52 t-toom53 t-toom54 t-toom62 t-toom63 t-toom6h t-toom8h	\
   t-toom2-sqr t-toom3-sqr t-toom4-sqr t-toom6-sqr t-toom8-sqr		\
   t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1 t-mulmid			\
-  t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv t-broot
+  t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv			\
+  t-broot t-brootinv
 
 EXTRA_DIST = toom-shared.h toom-sqr-shared.h
 
diff -r 7a309cd32a28 -r 1e767ab37a8a tests/mpn/t-brootinv.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/mpn/t-brootinv.c	Wed Oct 31 13:56:30 2012 +0100
@@ -0,0 +1,104 @@
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>		/* for strtol */
+#include <stdio.h>		/* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+
+  mp_ptr ap, rp, pp, app, scratch;
+  int count = COUNT;
+  unsigned i;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+	{
+	  fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+	  return 1;
+	}
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  app = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  scratch = TMP_ALLOC_LIMBS (5*MAX_LIMBS);
+
+  for (i = 0; i < count; i++)
+    {
+      mp_size_t n;
+      mp_limb_t k;
+      int c;
+
+      n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+      if (i & 1)
+	mpn_random2 (ap, n);
+      else
+	mpn_random (ap, n);
+
+      ap[0] |= 1;
+
+      if (i < 100)
+	k = 3 + 2*i;
+      else
+	{
+	  mpn_random (&k, 1);
+	  if (k < 3)
+	    k = 3;
+	  else
+	    k |= 1;
+	}
+      mpn_brootinv (rp, ap, n * GMP_NUMB_BITS, k, scratch);
+      mpn_powlo (pp, rp, &k, 1, n, scratch);
+      mpn_mullo_n (app, ap, pp, n);
+      
+      if (app[0] != 1 || !mpn_zero_p (app+1, n-1))
+	{
+	  gmp_fprintf (stderr,
+		       "mpn_brootinv returned bad result: %u limbs\n",
+		       (unsigned) n);
+	  gmp_fprintf (stderr, "k     = %Mx\n", k);
+	  gmp_fprintf (stderr, "a     = %Nx\n", ap, n);
+	  gmp_fprintf (stderr, "r     = %Nx\n", rp, n);
+	  gmp_fprintf (stderr, "r^n   = %Nx\n", pp, n);
+	  gmp_fprintf (stderr, "a r^n = %Nx\n", app, n);
+	  abort ();
+	}
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff -r 7a309cd32a28 -r 1e767ab37a8a tune/common.c
--- a/tune/common.c	Sun Oct 28 15:51:08 2012 +0100
+++ b/tune/common.c	Wed Oct 31 13:56:30 2012 +0100
@@ -867,6 +867,22 @@
 }
 
 double
+speed_mpn_broot (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BROOT (mpn_broot);
+}
+double
+speed_mpn_broot_invm1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
+}
+double
+speed_mpn_brootinv (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
+}
+
+double
 speed_mpn_binvert (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);
diff -r 7a309cd32a28 -r 1e767ab37a8a tune/speed.c
--- a/tune/speed.c	Sun Oct 28 15:51:08 2012 +0100
+++ b/tune/speed.c	Wed Oct 31 13:56:30 2012 +0100
@@ -370,6 +370,10 @@
   { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
   { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
 
+  { "mpn_broot",               speed_mpn_broot,    FLAG_R },
+  { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
+  { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
+  
   { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
   { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
   { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
diff -r 7a309cd32a28 -r 1e767ab37a8a tune/speed.h
--- a/tune/speed.h	Sun Oct 28 15:51:08 2012 +0100
+++ b/tune/speed.h	Wed Oct 31 13:56:30 2012 +0100
@@ -281,6 +281,9 @@
 double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
 double speed_mpn_mu_bdiv_q (struct speed_params *);
 double speed_mpn_mu_bdiv_qr (struct speed_params *);
+double speed_mpn_broot (struct speed_params *);
+double speed_mpn_broot_invm1 (struct speed_params *);
+double speed_mpn_brootinv (struct speed_params *);
 double speed_mpn_invert (struct speed_params *);
 double speed_mpn_invertappr (struct speed_params *);
 double speed_mpn_ni_invertappr (struct speed_params *);
@@ -2076,6 +2079,42 @@
     return t;								\
   }
 
+#define SPEED_ROUTINE_MPN_BROOT(function)	\
+  {						\
+    SPEED_RESTRICT_COND (s->r & 1);		\
+    s->xp[0] |= 1;				\
+    SPEED_ROUTINE_MPN_UNARY_1_CALL		\
+      ((*function) (wp, s->xp, s->size, s->r));	\


More information about the gmp-commit mailing list