[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Wed Oct 31 14:00:03 CET 2012
details: /var/hg/gmp/rev/c3bf4d1124f1
changeset: 15099:c3bf4d1124f1
user: Niels M?ller <nisse at lysator.liu.se>
date: Wed Oct 31 12:00:39 2012 +0100
description:
Benchmarking of broot functions.
details: /var/hg/gmp/rev/f84ea35b6178
changeset: 15100:f84ea35b6178
user: Niels M?ller <nisse at lysator.liu.se>
date: Wed Oct 31 12:17:12 2012 +0100
description:
Optimization of mpn_broot.
details: /var/hg/gmp/rev/1e767ab37a8a
changeset: 15101:1e767ab37a8a
user: Niels M?ller <nisse at lysator.liu.se>
date: Wed Oct 31 13:56:30 2012 +0100
description:
Test mpn_brootinv.
diffstat:
ChangeLog | 20 +++++++++
gmp-impl.h | 3 +
mpn/generic/broot.c | 40 +++++++++--------
tests/mpn/Makefile.am | 3 +-
tests/mpn/t-brootinv.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++
tune/common.c | 16 +++++++
tune/speed.c | 4 +
tune/speed.h | 39 ++++++++++++++++++
8 files changed, 209 insertions(+), 20 deletions(-)
diffs (truncated from 333 to 300 lines):
diff -r 7a309cd32a28 -r 1e767ab37a8a ChangeLog
--- a/ChangeLog Sun Oct 28 15:51:08 2012 +0100
+++ b/ChangeLog Wed Oct 31 13:56:30 2012 +0100
@@ -1,3 +1,23 @@
+2012-10-31 Niels Möller <nisse at lysator.liu.se>
+
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-brootinv.
+ * tests/mpn/t-brootinv.c: New file
+
+ * mpn/generic/broot.c (mpn_broot_invm1): Avoid a mullo_n in the
+ loop, and do powering as a plain mpn_sqr followed by mpn_powlo.
+
+ * tune/speed.c (routine): Added mpn_broot, mpn_broot_invm1,
+ mpn_brootinv.
+
+ * tune/common.c (speed_mpn_broot, speed_mpn_broot_invm1)
+ (speed_mpn_brootinv): New functions.
+ * tune/speed.h (SPEED_ROUTINE_MPN_BROOT)
+ (SPEED_ROUTINE_MPN_BROOTINV): New macros.
+
+ * mpn/generic/broot.c (mpn_broot_invm1): Made non-static (mainly
+ for benchmarking).
+ * gmp-impl.h (mpn_broot_invm1): Declare it.
+
2012-10-28 Torbjorn Granlund <tege at gmplib.org>
* configure.in (gmp_mpn_functions): Add new files.
diff -r 7a309cd32a28 -r 1e767ab37a8a gmp-impl.h
--- a/gmp-impl.h Sun Oct 28 15:51:08 2012 +0100
+++ b/gmp-impl.h Wed Oct 31 13:56:30 2012 +0100
@@ -1632,6 +1632,9 @@
#define mpn_broot __MPN(broot)
__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#define mpn_broot_invm1 __MPN(broot_invm1)
+__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
#define mpn_brootinv __MPN(brootinv)
__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_limb_t, mp_ptr);
diff -r 7a309cd32a28 -r 1e767ab37a8a mpn/generic/broot.c
--- a/mpn/generic/broot.c Sun Oct 28 15:51:08 2012 +0100
+++ b/mpn/generic/broot.c Wed Oct 31 13:56:30 2012 +0100
@@ -54,13 +54,20 @@
then
a^{k-1} r'^k = 1 (mod 2^{2m}),
+
+ Compute the update term as
+
+ r' = r - (a^{k-1} r^{k+1} - r) / k
+
+ where we still have cancelation of low limbs.
+
*/
-static void
+void
mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
{
mp_size_t sizes[GMP_LIMB_BITS * 2];
mp_ptr akm1, tp, rnp, ep, scratch;
- mp_limb_t a0, r0, km1, kinv;
+ mp_limb_t a0, r0, km1, kp1h, kinv;
mp_size_t rn;
unsigned i;
@@ -118,8 +125,11 @@
return;
}
+ /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
+ kp1h = k/2 + 1;
+
/* FIXME: Special case for two limb iteration. */
- rnp = TMP_ALLOC_LIMBS (2*n);
+ rnp = TMP_ALLOC_LIMBS (2*n + 1);
ep = rnp + n;
/* FIXME: Possible to this on the fly with some bit fiddling. */
@@ -130,28 +140,20 @@
while (i-- > 0)
{
- /* Compute x^k. What's the best way to handle the doubled
- precision? */
- MPN_ZERO (rp + rn, sizes[i] - rn);
- mpn_powlo (rnp, rp, &k, 1, sizes[i], tp);
+ /* Compute x^{k+1}. */
+ mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
+ final iteration.*/
+ mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
- /* Multiply by a^{k-1}. Can use wraparound; low part is
- 000...01. */
+ /* Multiply by a^{k-1}. Can use wraparound; low part equals
+ r. */
mpn_mullo_n (ep, rnp, akm1, sizes[i]);
- ASSERT (ep[0] == 1);
- ASSERT (rn == 1 || mpn_zero_p (ep + 1, rn - 1));
+ ASSERT (mpn_cmp (ep, rp, rn) == 0);
ASSERT (sizes[i] <= 2*rn);
- mpn_pi1_bdiv_q_1 (ep, ep + rn, sizes[i] - rn, k, kinv, 0);
-
- /* Multiply by x, plain mullo. */
- mpn_mullo_n (rp + rn, ep, rp, sizes[i] - rn);
-
- /* FIXME: Avoid negation, e.g., by using a bdiv_q_1 variant
- returning -q. */
+ mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
-
rn = sizes[i];
}
TMP_FREE;
diff -r 7a309cd32a28 -r 1e767ab37a8a tests/mpn/Makefile.am
--- a/tests/mpn/Makefile.am Sun Oct 28 15:51:08 2012 +0100
+++ b/tests/mpn/Makefile.am Wed Oct 31 13:56:30 2012 +0100
@@ -28,7 +28,8 @@
t-toom52 t-toom53 t-toom54 t-toom62 t-toom63 t-toom6h t-toom8h \
t-toom2-sqr t-toom3-sqr t-toom4-sqr t-toom6-sqr t-toom8-sqr \
t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1 t-mulmid \
- t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv t-broot
+ t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv \
+ t-broot t-brootinv
EXTRA_DIST = toom-shared.h toom-sqr-shared.h
diff -r 7a309cd32a28 -r 1e767ab37a8a tests/mpn/t-brootinv.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/mpn/t-brootinv.c Wed Oct 31 13:56:30 2012 +0100
@@ -0,0 +1,104 @@
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see http://www.gnu.org/licenses/. */
+
+
+#include <stdlib.h> /* for strtol */
+#include <stdio.h> /* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+ gmp_randstate_ptr rands;
+
+ mp_ptr ap, rp, pp, app, scratch;
+ int count = COUNT;
+ unsigned i;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ if (argc > 1)
+ {
+ char *end;
+ count = strtol (argv[1], &end, 0);
+ if (*end || count <= 0)
+ {
+ fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+ return 1;
+ }
+ }
+
+ tests_start ();
+ rands = RANDS;
+
+ ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ app = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ scratch = TMP_ALLOC_LIMBS (5*MAX_LIMBS);
+
+ for (i = 0; i < count; i++)
+ {
+ mp_size_t n;
+ mp_limb_t k;
+ int c;
+
+ n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+ if (i & 1)
+ mpn_random2 (ap, n);
+ else
+ mpn_random (ap, n);
+
+ ap[0] |= 1;
+
+ if (i < 100)
+ k = 3 + 2*i;
+ else
+ {
+ mpn_random (&k, 1);
+ if (k < 3)
+ k = 3;
+ else
+ k |= 1;
+ }
+ mpn_brootinv (rp, ap, n * GMP_NUMB_BITS, k, scratch);
+ mpn_powlo (pp, rp, &k, 1, n, scratch);
+ mpn_mullo_n (app, ap, pp, n);
+
+ if (app[0] != 1 || !mpn_zero_p (app+1, n-1))
+ {
+ gmp_fprintf (stderr,
+ "mpn_brootinv returned bad result: %u limbs\n",
+ (unsigned) n);
+ gmp_fprintf (stderr, "k = %Mx\n", k);
+ gmp_fprintf (stderr, "a = %Nx\n", ap, n);
+ gmp_fprintf (stderr, "r = %Nx\n", rp, n);
+ gmp_fprintf (stderr, "r^n = %Nx\n", pp, n);
+ gmp_fprintf (stderr, "a r^n = %Nx\n", app, n);
+ abort ();
+ }
+ }
+ TMP_FREE;
+ tests_end ();
+ return 0;
+}
diff -r 7a309cd32a28 -r 1e767ab37a8a tune/common.c
--- a/tune/common.c Sun Oct 28 15:51:08 2012 +0100
+++ b/tune/common.c Wed Oct 31 13:56:30 2012 +0100
@@ -867,6 +867,22 @@
}
double
+speed_mpn_broot (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOT (mpn_broot);
+}
+double
+speed_mpn_broot_invm1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
+}
+double
+speed_mpn_brootinv (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
+}
+
+double
speed_mpn_binvert (struct speed_params *s)
{
SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);
diff -r 7a309cd32a28 -r 1e767ab37a8a tune/speed.c
--- a/tune/speed.c Sun Oct 28 15:51:08 2012 +0100
+++ b/tune/speed.c Wed Oct 31 13:56:30 2012 +0100
@@ -370,6 +370,10 @@
{ "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q },
{ "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q },
+ { "mpn_broot", speed_mpn_broot, FLAG_R },
+ { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R },
+ { "mpn_brootinv", speed_mpn_brootinv, FLAG_R },
+
{ "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },
{ "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL },
{ "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL },
diff -r 7a309cd32a28 -r 1e767ab37a8a tune/speed.h
--- a/tune/speed.h Sun Oct 28 15:51:08 2012 +0100
+++ b/tune/speed.h Wed Oct 31 13:56:30 2012 +0100
@@ -281,6 +281,9 @@
double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
double speed_mpn_mu_bdiv_q (struct speed_params *);
double speed_mpn_mu_bdiv_qr (struct speed_params *);
+double speed_mpn_broot (struct speed_params *);
+double speed_mpn_broot_invm1 (struct speed_params *);
+double speed_mpn_brootinv (struct speed_params *);
double speed_mpn_invert (struct speed_params *);
double speed_mpn_invertappr (struct speed_params *);
double speed_mpn_ni_invertappr (struct speed_params *);
@@ -2076,6 +2079,42 @@
return t; \
}
+#define SPEED_ROUTINE_MPN_BROOT(function) \
+ { \
+ SPEED_RESTRICT_COND (s->r & 1); \
+ s->xp[0] |= 1; \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL \
+ ((*function) (wp, s->xp, s->size, s->r)); \
More information about the gmp-commit
mailing list