[Gmp-commit] /var/hg/gmp: 9 new changesets

Sun Mar 17 19:15:27 CET 2013

details:   /var/hg/gmp/rev/462021f3e38a
changeset: 15585:462021f3e38a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Mar 16 22:07:31 2013 +0100
description:
Tweak for better A9 performance.

details:   /var/hg/gmp/rev/d6fc8e2406eb
changeset: 15586:d6fc8e2406eb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 18:36:49 2013 +0100
description:
New arm64 files.

details:   /var/hg/gmp/rev/59b4fc55a461
changeset: 15587:59b4fc55a461
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 18:37:25 2013 +0100
description:
Tweak.

details:   /var/hg/gmp/rev/adbbed8b91e6
changeset: 15588:adbbed8b91e6
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 18:38:26 2013 +0100
description:
New arm/v6 file.

details:   /var/hg/gmp/rev/02954eedeb24
changeset: 15589:02954eedeb24
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 18:39:53 2013 +0100
description:
Cleanup spacing.

details:   /var/hg/gmp/rev/8af924d5f258
changeset: 15590:8af924d5f258
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 18:41:01 2013 +0100
description:
(mod): Use TMP_BALLOC in mu code.

details:   /var/hg/gmp/rev/49b9bd77d5fc
changeset: 15591:49b9bd77d5fc
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 19:11:57 2013 +0100
description:
Use TMP_BALLOC*, but combine several areas.

details:   /var/hg/gmp/rev/a26c755f7585
changeset: 15592:a26c755f7585
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 19:12:11 2013 +0100
description:
ChangeLog.

details:   /var/hg/gmp/rev/e49a084a7ec6
changeset: 15593:e49a084a7ec6
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 17 19:14:51 2013 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                                                   |   22 +
 longlong.h                                                  |  212 ++++++------
 mpn/arm/v6/addmul_2.asm                                     |   51 +-
 mpn/arm/v6/addmul_3.asm                                     |  163 +++++++++
 mpn/arm/v7a/cora15/copyd.asm                                |    5 +-
 mpn/arm64/copyd.asm                                         |   82 ++++
 mpn/arm64/copyi.asm                                         |   66 +++
 mpn/generic/mul_fft.c                                       |   52 +-
 mpz/powm_ui.c                                               |    4 +-
 tests/cxx/Makefile.am                                       |    8 +-
 tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc |   38 ++
 11 files changed, 547 insertions(+), 156 deletions(-)

diffs (truncated from 1066 to 300 lines):

diff -r 6dbd2044d1ce -r e49a084a7ec6 ChangeLog

--- a/ChangeLog	Thu Mar 14 22:45:42 2013 +0100
+++ b/ChangeLog	Sun Mar 17 19:14:51 2013 +0100
@@ -1,3 +1,25 @@
+2012-03-17  Marc Glisse  <marc.glisse at inria.fr>
+
+	* tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc: New file.
+	* tests/cxx/Makefile.am: Add new file. Reorder the tests.
+
+2013-03-17  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/mul_fft.c: Use TMP_BALLOC*, but combine several areas.
+
+	* mpz/powm_ui.c (mod): Use TMP_BALLOC in mu code.
+
+	* mpn/arm/v6/addmul_3.asm: New file.
+
+	* mpn/arm/v7a/cora15/copyd.asm: Tweak.
+
+	* mpn/arm64/copyi.asm: New file.
+	* mpn/arm64/copyd.asm: New file.
+
+2013-03-16  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/arm/v6/addmul_2.asm: Tweak for better A9 performance.
+
 2013-03-14  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/ia64/cnd_aors_n.asm: New file.
diff -r 6dbd2044d1ce -r e49a084a7ec6 longlong.h
--- a/longlong.h	Thu Mar 14 22:45:42 2013 +0100
+++ b/longlong.h	Sun Mar 17 19:14:51 2013 +0100
@@ -1,7 +1,7 @@
 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
 
 Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+2004, 2005, 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify it under the
 terms of the GNU Lesser General Public License as published by the Free
@@ -139,30 +139,30 @@
    or want.  */
 
 #ifdef _LONG_LONG_LIMB
-#define count_leading_zeros_gcc_clz(count,x)    \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_clzll (x);              \
+#define count_leading_zeros_gcc_clz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_clzll (x);		\
   } while (0)
 #else
-#define count_leading_zeros_gcc_clz(count,x)    \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_clzl (x);               \
+#define count_leading_zeros_gcc_clz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_clzl (x);		\
   } while (0)
 #endif
 
 #ifdef _LONG_LONG_LIMB
-#define count_trailing_zeros_gcc_ctz(count,x)   \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_ctzll (x);              \
+#define count_trailing_zeros_gcc_ctz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_ctzll (x);		\
   } while (0)
 #else
-#define count_trailing_zeros_gcc_ctz(count,x)   \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_ctzl (x);               \
+#define count_trailing_zeros_gcc_ctz(count,x)	\
+  do {						\
+    ASSERT ((x) != 0);				\
+    (count) = __builtin_ctzl (x);		\
   } while (0)
 #endif
 
@@ -223,27 +223,27 @@
   __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
 #endif /* clz/ctz using cix */
 
-#if ! defined (count_leading_zeros)                             \
+#if ! defined (count_leading_zeros)				\
   && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE)
 /* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0.
    "$31" is written explicitly in the asm, since an "r" constraint won't
    select reg 31.  There seems no need to worry about "r31" syntax for cray,
-   since gcc itself (pre-release 3.4) emits just $31 in various places.  */
-#define ALPHA_CMPBGE_0(dst, src)                                        \
+   since gcc itself (pre-release 3.4) emits just $31 in various places.	 */
+#define ALPHA_CMPBGE_0(dst, src)					\
   do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0)
 /* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts
    them, locating the highest non-zero byte.  A second __clz_tab lookup
    counts the leading zero bits in that byte, giving the result.  */
-#define count_leading_zeros(count, x)                                   \
-  do {                                                                  \
-    UWtype  __clz__b, __clz__c, __clz__x = (x);                         \
-    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);           /* zero bytes */    \
-    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */   \
-    __clz__b = __clz__b * 8 - 7;                    /* 57 to 1 shift */ \
-    __clz__x >>= __clz__b;                                              \
-    __clz__c = __clz_tab [__clz__x];                /* 8 to 1 bit */    \
-    __clz__b = 65 - __clz__b;                                           \
-    (count) = __clz__b - __clz__c;                                      \
+#define count_leading_zeros(count, x)					\
+  do {									\
+    UWtype  __clz__b, __clz__c, __clz__x = (x);				\
+    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);	    /* zero bytes */	\
+    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */	\
+    __clz__b = __clz__b * 8 - 7;		    /* 57 to 1 shift */ \
+    __clz__x >>= __clz__b;						\
+    __clz__c = __clz_tab [__clz__x];		    /* 8 to 1 bit */	\
+    __clz__b = 65 - __clz__b;						\
+    (count) = __clz__b - __clz__c;					\
   } while (0)
 #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
 #endif /* clz using cmpbge */
@@ -299,14 +299,14 @@
    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
    register, which takes an extra cycle.  */
 #define sub_ddmmss(sh, sl, ah, al, bh, bl)      \
-  do {                                          \
-    UWtype __x;                                 \
-    __x = (al) - (bl);                          \
-    if ((al) < (bl))                            \
-      (sh) = (ah) - (bh) - 1;                   \
-    else                                        \
-      (sh) = (ah) - (bh);                       \
-    (sl) = __x;                                 \
+  do {						\
+    UWtype __x;					\
+    __x = (al) - (bl);				\
+    if ((al) < (bl))				\
+      (sh) = (ah) - (bh) - 1;			\
+    else					\
+      (sh) = (ah) - (bh);			\
+    (sl) = __x;					\
   } while (0)
 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
 /* Do both product parts in assembly, since that gives better code with
@@ -946,7 +946,7 @@
    being 1 code byte smaller.  "31-__cbtmp" is a workaround, probably at the
    cost of one extra instruction.  Do this for "i386" too, since that means
    generic x86.  */
-#if ! defined (count_leading_zeros) && __GNUC__ < 3                     \
+#if ! defined (count_leading_zeros) && __GNUC__ < 3			\
   && (HAVE_HOST_CPU_i386						\
       || HAVE_HOST_CPU_i686						\
       || HAVE_HOST_CPU_pentiumpro					\
@@ -1156,7 +1156,7 @@
      || defined (__mc68030__) || defined (mc68030) \
      || defined (__mc68040__) || defined (mc68040) \
      || defined (__mc68060__) || defined (mc68060) \
-     || defined (__NeXT__))                        \
+     || defined (__NeXT__))			   \
   && ! defined (__mcpu32__)
 #define count_leading_zeros(count, x) \
   __asm__ ("bfffo %1{%b2:%b2},%0"					\
@@ -1309,37 +1309,37 @@
    the system vendor compilers.  (Is that vendor compilers with inline asm,
    or what?)  */
 
-#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc)        \
+#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc)	\
   && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (bh) && (bh) == 0)				\
-      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
-	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
-      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
-	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
     else								\
-      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
 	     : "=r" (sh), "=&r" (sl)					\
 	     : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
   } while (0)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
-      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
-      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
     else if (__builtin_constant_p (bh) && (bh) == 0)			\
-      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
-      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
     else								\
-      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
 	       : "=r" (sh), "=&r" (sl)					\
 	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
   } while (0)
@@ -1392,55 +1392,55 @@
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (bh) && (bh) == 0)				\
-      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
-	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
-      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
-	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
     else								\
-      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
 	     : "=r" (sh), "=&r" (sl)					\
 	     : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
   } while (0)
 /* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
    This might seem strange, but gcc folds away the dead code late.  */
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  do {									      \
-    if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {	      \
-	if (__builtin_constant_p (ah) && (ah) == 0)			      \
-	  __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"		      \
+  do {									\
+    if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {	\
+	if (__builtin_constant_p (ah) && (ah) == 0)			\
+	  __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
-	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	      \
-	  __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"		      \
+	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
+	  __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
-	else if (__builtin_constant_p (bh) && (bh) == 0)		      \
-	  __asm__ ("addic %1,%3,%4\n\taddme %0,%2"		      \
+	else if (__builtin_constant_p (bh) && (bh) == 0)		\
+	  __asm__ ("addic %1,%3,%4\n\taddme %0,%2"			\
 		   : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
-	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	      \
-	  __asm__ ("addic %1,%3,%4\n\taddze %0,%2"		      \
+	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	\
+	  __asm__ ("addic %1,%3,%4\n\taddze %0,%2"			\
 		   : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
-	else								      \
-	  __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"	      \
-		   : "=r" (sh), "=&r" (sl)				      \
-		   : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl));	      \
-      } else {								      \
-	if (__builtin_constant_p (ah) && (ah) == 0)			      \
-	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	      \
-		   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
-	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	      \
-	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	      \
-		   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
-	else if (__builtin_constant_p (bh) && (bh) == 0)		      \
-	  __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"	      \
-		   : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
-	else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)	      \
-	  __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"	      \
-		   : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
-	else								      \
-	  __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	      \
-		   : "=r" (sh), "=&r" (sl)				      \
-		   : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		      \
-      }									      \
+	else								\
+	  __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"			\
+		   : "=r" (sh), "=&r" (sl)				\
+		   : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl));	\
+    } else {								\
+	if (__builtin_constant_p (ah) && (ah) == 0)			\
+	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
+		   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
+	else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)	\
+	  __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
+		   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
+	else if (__builtin_constant_p (bh) && (bh) == 0)		\
+	  __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\