[Gmp-commit] /home/hgfiles/gmp: 4 new changesets

Thu Mar 25 22:51:17 CET 2010

details:   /home/hgfiles/gmp/rev/20f1005890fb
changeset: 13529:20f1005890fb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 25 22:46:51 2010 +0100
description:
Rename addsub_n.c to add_n_sub_n.c.

details:   /home/hgfiles/gmp/rev/16a1680a2ea9
changeset: 13530:16a1680a2ea9
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 25 22:49:22 2010 +0100
description:
Fix mpn_add_n_sub_n usage.

details:   /home/hgfiles/gmp/rev/fabab187e9e1
changeset: 13531:fabab187e9e1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 25 22:50:25 2010 +0100
description:
Disable mpn_add_n_sub_n call.

details:   /home/hgfiles/gmp/rev/d31f676c4628
changeset: 13532:d31f676c4628
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 25 22:51:11 2010 +0100
description:
Add itanium mpn_add_n_sub_n.

diffstat:

 ChangeLog                        |   13 +
 mpn/generic/add_n_sub_n.c        |  162 +++++++++++++++++++++
 mpn/generic/addsub_n.c           |  162 ---------------------
 mpn/generic/toom33_mul.c         |   10 +-
 mpn/generic/toom3_sqr.c          |    5 +-
 mpn/generic/toom63_mul.c         |    6 +-
 mpn/generic/toom_eval_dgr3_pm2.c |    2 +-
 mpn/generic/toom_eval_pm2.c      |    2 +-
 mpn/ia64/add_n_sub_n.asm         |  295 +++++++++++++++++++++++++++++++++++++++
 9 files changed, 484 insertions(+), 173 deletions(-)

diffs (truncated from 760 to 300 lines):

diff -r bbd2c2155a37 -r d31f676c4628 ChangeLog

--- a/ChangeLog	Tue Mar 23 10:37:50 2010 +0100
+++ b/ChangeLog	Thu Mar 25 22:51:11 2010 +0100
@@ -1,3 +1,16 @@
+2010-03-25  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/ia64/add_n_sub_n.asm: New file.
+
+	* mpn/generic/toom_eval_pm2.c: Disable mpn_add_n_sub_n call.
+	* mpn/generic/toom_eval_dgr3_pm2.c: Likewise.
+
+	* mpn/generic/toom33_mul.c: Fix mpn_add_n_sub_n usage.
+	* mpn/generic/toom3_sqr.c: Likewise.
+	* mpn/generic/toom63_mul.c: Likewise.
+
+	* mpn/generic/add_n_sub_n.c: Renamed from addsub_n.c.
+
 2010-03-23  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/divrem_2.asm: Use mpn_invert_limb instead of div insn.
diff -r bbd2c2155a37 -r d31f676c4628 mpn/generic/add_n_sub_n.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/add_n_sub_n.c	Thu Mar 25 22:51:11 2010 +0100
@@ -0,0 +1,162 @@
+/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192	/* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
+
+
+/* mpn_add_n_sub_n.
+   r1[] = s1[] + s2[]
+   r2[] = s1[] - s2[]
+   All operands have n limbs.
+   In-place operations allowed.  */
+mp_limb_t
+mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+{
+  mp_limb_t acyn, acyo;		/* carry for add */
+  mp_limb_t scyn, scyo;		/* carry for subtract */
+  mp_size_t off;		/* offset in operands */
+  mp_size_t this_n;		/* size of current chunk */
+
+  /* We alternatingly add and subtract in chunks that fit into the (L1)
+     cache.  Since the chunks are several hundred limbs, the function call
+     overhead is insignificant, but we get much better locality.  */
+
+  /* We have three variant of the inner loop, the proper loop is chosen
+     depending on whether r1 or r2 are the same operand as s1 or s2.  */
+
+  if (r1p != s1p && r1p != s2p)
+    {
+      /* r1 is not identical to either input operand.  We can therefore write
+	 to r1 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+	}
+    }
+  else if (r2p != s1p && r2p != s2p)
+    {
+      /* r2 is not identical to either input operand.  We can therefore write
+	 to r2 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+	}
+    }
+  else
+    {
+      /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
+	 Need temporary storage.  */
+      mp_limb_t tp[PART_SIZE];
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+	{
+	  this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+	  acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+	  acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+	  acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+	  MPN_COPY (r1p + off, tp, this_n);
+	}
+    }
+
+  return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr r1p, r2p, s1p, s2p;
+  double t;
+  mp_size_t n;
+
+  n = strtol (argv[1], 0, 0);
+
+  r1p = malloc (n * BYTES_PER_MP_LIMB);
+  r2p = malloc (n * BYTES_PER_MP_LIMB);
+  s1p = malloc (n * BYTES_PER_MP_LIMB);
+  s2p = malloc (n * BYTES_PER_MP_LIMB);
+  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+  printf ("              separate add and sub: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+  printf ("combined addsub separate variables: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r1 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r2 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+  printf ("          combined addsub in-place: %.3f\n", t);
+
+  return 0;
+}
+#endif
diff -r bbd2c2155a37 -r d31f676c4628 mpn/generic/addsub_n.c
--- a/mpn/generic/addsub_n.c	Tue Mar 23 10:37:50 2010 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#ifndef L1_CACHE_SIZE
-#define L1_CACHE_SIZE 8192	/* only 68040 has less than this */
-#endif
-
-#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
-
-
-/* mpn_add_n_sub_n.
-   r1[] = s1[] + s2[]
-   r2[] = s1[] - s2[]
-   All operands have n limbs.
-   In-place operations allowed.  */
-mp_limb_t
-mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
-{
-  mp_limb_t acyn, acyo;		/* carry for add */
-  mp_limb_t scyn, scyo;		/* carry for subtract */
-  mp_size_t off;		/* offset in operands */
-  mp_size_t this_n;		/* size of current chunk */
-
-  /* We alternatingly add and subtract in chunks that fit into the (L1)
-     cache.  Since the chunks are several hundred limbs, the function call
-     overhead is insignificant, but we get much better locality.  */
-
-  /* We have three variant of the inner loop, the proper loop is chosen
-     depending on whether r1 or r2 are the same operand as s1 or s2.  */
-
-  if (r1p != s1p && r1p != s2p)
-    {
-      /* r1 is not identical to either input operand.  We can therefore write
-	 to r1 directly, without using temporary storage.  */
-      acyo = 0;
-      scyo = 0;
-      for (off = 0; off < n; off += PART_SIZE)
-	{
-	  this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_add_nc
-	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
-#else
-	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
-	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
-#endif
-#if HAVE_NATIVE_mpn_sub_nc
-	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
-	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
-	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
-	}
-    }
-  else if (r2p != s1p && r2p != s2p)
-    {
-      /* r2 is not identical to either input operand.  We can therefore write
-	 to r2 directly, without using temporary storage.  */
-      acyo = 0;
-      scyo = 0;
-      for (off = 0; off < n; off += PART_SIZE)
-	{
-	  this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_sub_nc
-	  scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
-	  scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
-	  scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
-#if HAVE_NATIVE_mpn_add_nc
-	  acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
-#else
-	  acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
-	  acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
-#endif
-	}
-    }
-  else
-    {
-      /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
-	 Need temporary storage.  */
-      mp_limb_t tp[PART_SIZE];
-      acyo = 0;
-      scyo = 0;
-      for (off = 0; off < n; off += PART_SIZE)
-	{
-	  this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_add_nc