[Gmp-commit] /home/hgfiles/gmp: mpn_invert: faster basecase, using sbpi1_div_q.

mercurial at gmplib.org mercurial at gmplib.org
Wed Dec 23 11:50:11 CET 2009


details:   /home/hgfiles/gmp/rev/112804c86fdf
changeset: 13198:112804c86fdf
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Wed Dec 23 11:49:47 2009 +0100
description:
mpn_invert: faster basecase, using sbpi1_div_q.

diffstat:

 ChangeLog                |   3 +++
 mpn/generic/invert.c     |  30 ++++++++++++++++++++----------
 mpn/generic/invertappr.c |   3 +--
 3 files changed, 24 insertions(+), 12 deletions(-)

diffs (70 lines):

diff -r 94bcfa37e666 -r 112804c86fdf ChangeLog
--- a/ChangeLog	Wed Dec 23 05:39:48 2009 +0100
+++ b/ChangeLog	Wed Dec 23 11:49:47 2009 +0100
@@ -1,5 +1,8 @@
 2009-12-23  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
+	* mpn/generic/invertappr.c (mpn_bc_invertappr): If n=2, mpn_divrem_2.
+	* mpn/generic/invert.c: Faster basecase, use mpn_sbpi1_div_q.
+
 	* mpn/generic/toom_eval_pm2exp.c: Assert support for degree 3.
 	* mpn/generic/toom6h_mul.c: Do not use obsolete _itch function any more.
 
diff -r 94bcfa37e666 -r 112804c86fdf mpn/generic/invert.c
--- a/mpn/generic/invert.c	Wed Dec 23 05:39:48 2009 +0100
+++ b/mpn/generic/invert.c	Wed Dec 23 11:49:47 2009 +0100
@@ -1,4 +1,4 @@
-/* invert.c -- Compute floor((B^{2n}-1)/U).
+/* invert.c -- Compute floor((B^{2n}-1)/U) - B^n.
 
    Contributed to the GNU project by Marco Bodrato.
 
@@ -54,16 +54,26 @@
       scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (n));
 
     if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) {
-      /* Maximum scratch needed by this branch: 3*n + 2 */
-      mp_size_t i;
-      mp_ptr xp;
+      if (n == 1)
+	invert_limb (*ip, *dp);
+      else {
+	/* Maximum scratch needed by this branch: 2*n */
+	mp_size_t i;
+	mp_ptr xp;
 
-      xp = scratch + n + 2;				/* 2 * n limbs */
-      for (i = n - 1; i >= 0; i--)
-	xp[i] = GMP_NUMB_MAX;
-      mpn_com_n (xp + n, dp, n);
-      mpn_tdiv_qr (scratch, ip, 0, xp, 2 * n, dp, n);
-      MPN_COPY (ip, scratch, n);
+	xp = scratch;				/* 2 * n limbs */
+	for (i = n - 1; i >= 0; i--)
+	  xp[i] = GMP_NUMB_MAX;
+	mpn_com_n (xp + n, dp, n);
+	if (n == 2) {
+	  mpn_divrem_2 (ip, 0, xp, 4, dp);
+	} else {
+	  gmp_pi1_t inv;
+	  invert_pi1 (inv, dp[n-1], dp[n-2]);
+	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
+	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
+	}
+      }
     } else { /* Use approximated inverse; correct the result if needed. */
       mp_limb_t e; /* The possible error in the approximate inverse */
 
diff -r 94bcfa37e666 -r 112804c86fdf mpn/generic/invertappr.c
--- a/mpn/generic/invertappr.c	Wed Dec 23 05:39:48 2009 +0100
+++ b/mpn/generic/invertappr.c	Wed Dec 23 11:49:47 2009 +0100
@@ -111,8 +111,7 @@
 
     /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
     if (n == 2) {
-      mpn_tdiv_qr (tp, ip, 0, xp, 2 * n, dp, n);
-      MPN_COPY (ip, tp, n);
+      mpn_divrem_2 (ip, 0, xp, 4, dp);
     } else {
       gmp_pi1_t inv;
       invert_pi1 (inv, dp[n-1], dp[n-2]);


More information about the gmp-commit mailing list