[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Aug 6 22:07:47 UTC 2015


details:   /var/hg/gmp/rev/d4912c0f0d1e
changeset: 16758:d4912c0f0d1e
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Aug 06 22:50:23 2015 +0200
description:
mpn/generic/sqrlo_basecase.c: Use parenthesis in macros.

details:   /var/hg/gmp/rev/d400e4546312
changeset: 16759:d400e4546312
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Aug 06 23:17:22 2015 +0200
description:
mpn/generic/sqrlo_basecase.c: Add special code for n==2 (and n==3, unused)

details:   /var/hg/gmp/rev/8676496b72ed
changeset: 16760:8676496b72ed
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Aug 06 23:54:27 2015 +0200
description:
mpn/generic/sqrlo_basecase.c: Add code for shortcut multiplication

details:   /var/hg/gmp/rev/b60eb1552c0a
changeset: 16761:b60eb1552c0a
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Fri Aug 07 00:06:49 2015 +0200
description:
ChangeLog

diffstat:

 ChangeLog                    |  19 +++++++++
 mpn/generic/sqrlo_basecase.c |  92 ++++++++++++++++++++++++++++++++++++-------
 2 files changed, 96 insertions(+), 15 deletions(-)

diffs (155 lines):

diff -r 8950fdf6a129 -r b60eb1552c0a ChangeLog
--- a/ChangeLog	Thu Aug 06 04:23:43 2015 +0200
+++ b/ChangeLog	Fri Aug 07 00:06:49 2015 +0200
@@ -1,3 +1,22 @@
+2015-08-04 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* tests/refmpn.c (refmpn_sqrlo): New function.
+	* tests/tests.h: Define it.
+
+	* mpn/generic/sqrlo.c: New file, new function.
+	* mpn/generic/sqrlo_basecase.c: New file, new function.
+	* gmp-impl.h (mpn_sqrlo, mpn_sqrlo_basecase): Declare them.
+	* configure.ac (gmp_mpn_functions): Add new files.
+
+	* tests/mpn/t-sqrlo.c: New file, new test.
+	* tests/mpn/Makefile.am (check_PROGRAMS): Add new test.
+	* tests/devel/try.c: Support mpn_sqrlo and mpn_sqrlo_basecase.
+
+	* tune/common.c (speed_mpn_sqrlo{,_basecase}): New functions. 
+	* tune/speed.c: Support new functions.
+	* tune/speed.h (SPEED_ROUTINE_MPN_MULLO_BASECASE): Update.
+	(SPEED_ROUTINE_MPN_SQRLO): New macro.
+
 2015-07-28 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/sqrtrem.c (mpn_dc_sqrt): Support odd sizes.
diff -r 8950fdf6a129 -r b60eb1552c0a mpn/generic/sqrlo_basecase.c
--- a/mpn/generic/sqrlo_basecase.c	Thu Aug 06 04:23:43 2015 +0200
+++ b/mpn/generic/sqrlo_basecase.c	Fri Aug 07 00:06:49 2015 +0200
@@ -59,25 +59,39 @@
 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
 #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
   do {									\
-    MPN_SQR_DIAGONAL (rp, up, n>>1);					\
-    if ((n & 1) != 0)							\
-      (rp)[n - 1] = ((up)[n>>1] * (up)[n>>1]) & GMP_NUMB_MASK;		\
-    mpn_addlsh1_n_ip1 (rp + 1, tp, n - 1);				\
+    mp_size_t nhalf;							\
+    nhalf = (n) >> 1;							\
+    MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
+    if (((n) & 1) != 0)							\
+      {									\
+	mp_limb_t op;							\
+	op = (up)[nhalf];						\
+	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
+      }									\
+    mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
   } while (0)
 #else
 #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
   do {									\
-    MPN_SQR_DIAGONAL (rp, up, n>>1);					\
-    if ((n & 1) != 0)							\
-      (rp)[n - 1] = ((up)[n>>1] * (up)[n>>1]) & GMP_NUMB_MASK;		\
-    mpn_lshift (tp, tp, n - 1, 1);					\
-    mpn_add_n (rp + 1, rp + 1, tp, n - 1);				\
+    mp_size_t nhalf;							\
+    nhalf = (n) >> 1;							\
+    MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
+    if (((n) & 1) != 0)							\
+      {									\
+	mp_limb_t op;							\
+	op = (up)[nhalf];						\
+	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
+      }									\
+    mpn_lshift ((tp), (tp), (n) - 1, 1);				\
+    mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
   } while (0)
 #endif
 
 
 /* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
-
+#ifndef SQRLO_SPECIAL_CASES
+#define SQRLO_SPECIAL_CASES 2
+#endif
 void
 mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 {
@@ -86,11 +100,46 @@
   ASSERT (n >= 1);
   ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
 
-  if (n <= 1)
+  if (n <= SQRLO_SPECIAL_CASES)
     {
       mp_limb_t ul;
       ul = up[0];
+#if SQRLO_SPECIAL_CASES == 1
       rp[0] = (ul * ul) & GMP_NUMB_MASK;
+#else
+      if (n == 1)
+	rp[0] = (ul * ul) & GMP_NUMB_MASK;
+      else
+	{
+	  mp_limb_t hi, lo, ul1;
+	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
+	  rp[0] = lo >> GMP_NAIL_BITS;
+	  ul1 = up[1];
+#if SQRLO_SPECIAL_CASES == 2
+	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+#else
+	  if (n == 2)
+	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
+	  else
+	    {
+	      mp_limb_t hi1;
+#if GMP_NAIL_BITS != 0
+	      ul <<= 1;
+#endif
+	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
+	      hi1 += ul * up[2];
+#if GMP_NAIL_BITS == 0
+	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
+	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
+#else
+	      hi += lo >> GMP_NAIL_BITS;
+	      rp[1] = hi & GMP_NUMB_MASK;
+	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;	      
+#endif
+	    }
+#endif
+	}
+#endif
     }
   else
     {
@@ -99,10 +148,23 @@
       /* must fit n-1 limbs in tp */
       ASSERT (n <= 2 * SQR_TOOM2_THRESHOLD);
 
-      mpn_mul_1 (tp, up + 1, n - 1, up[0]);
-      for (i = 2; 2 * i - 1 < n; ++i)
-	mpn_addmul_1 (tp + 2 * i - 2, up + i, n - 2 * i + 1, up[i - 1]);
+      --n;
+#ifdef SHORTCUT_MULTIPLICATIONS
+      {
+	mp_limb_t cy;
+      
+	cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]) + up[0] * up[n];
+	for (i = 1; 2 * i + 1 < n; ++i)
+	  cy += mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, up[i]) + up[i] * up[n - i];
+	tp [n-1] = (cy + ((n & 1)?up[i] * up[n - i]:0)) & GMP_NUMB_MASK;
+      }
+#else
+      mpn_mul_1 (tp, up + 1, n, up[0]);
+      for (i = 1; 2 * i < n; ++i)
+	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
+#endif
 
-      MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n);
+      MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
     }
 }
+#undef SQRLO_SPECIAL_CASES


More information about the gmp-commit mailing list