[Gmp-commit] /var/hg/gmp: 9 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Mar 17 19:15:27 CET 2013
details: /var/hg/gmp/rev/462021f3e38a
changeset: 15585:462021f3e38a
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Mar 16 22:07:31 2013 +0100
description:
Tweak for better A9 performance.
details: /var/hg/gmp/rev/d6fc8e2406eb
changeset: 15586:d6fc8e2406eb
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 18:36:49 2013 +0100
description:
New arm64 files.
details: /var/hg/gmp/rev/59b4fc55a461
changeset: 15587:59b4fc55a461
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 18:37:25 2013 +0100
description:
Tweak.
details: /var/hg/gmp/rev/adbbed8b91e6
changeset: 15588:adbbed8b91e6
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 18:38:26 2013 +0100
description:
New arm/v6 file.
details: /var/hg/gmp/rev/02954eedeb24
changeset: 15589:02954eedeb24
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 18:39:53 2013 +0100
description:
Cleanup spacing.
details: /var/hg/gmp/rev/8af924d5f258
changeset: 15590:8af924d5f258
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 18:41:01 2013 +0100
description:
(mod): Use TMP_BALLOC in mu code.
details: /var/hg/gmp/rev/49b9bd77d5fc
changeset: 15591:49b9bd77d5fc
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 19:11:57 2013 +0100
description:
Use TMP_BALLOC*, but combine several areas.
details: /var/hg/gmp/rev/a26c755f7585
changeset: 15592:a26c755f7585
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 19:12:11 2013 +0100
description:
ChangeLog.
details: /var/hg/gmp/rev/e49a084a7ec6
changeset: 15593:e49a084a7ec6
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 17 19:14:51 2013 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 22 +
longlong.h | 212 ++++++------
mpn/arm/v6/addmul_2.asm | 51 +-
mpn/arm/v6/addmul_3.asm | 163 +++++++++
mpn/arm/v7a/cora15/copyd.asm | 5 +-
mpn/arm64/copyd.asm | 82 ++++
mpn/arm64/copyi.asm | 66 +++
mpn/generic/mul_fft.c | 52 +-
mpz/powm_ui.c | 4 +-
tests/cxx/Makefile.am | 8 +-
tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc | 38 ++
11 files changed, 547 insertions(+), 156 deletions(-)
diffs (truncated from 1066 to 300 lines):
diff -r 6dbd2044d1ce -r e49a084a7ec6 ChangeLog
--- a/ChangeLog Thu Mar 14 22:45:42 2013 +0100
+++ b/ChangeLog Sun Mar 17 19:14:51 2013 +0100
@@ -1,3 +1,25 @@
+2012-03-17 Marc Glisse <marc.glisse at inria.fr>
+
+ * tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc: New file.
+ * tests/cxx/Makefile.am: Add new file. Reorder the tests.
+
+2013-03-17 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/mul_fft.c: Use TMP_BALLOC*, but combine several areas.
+
+ * mpz/powm_ui.c (mod): Use TMP_BALLOC in mu code.
+
+ * mpn/arm/v6/addmul_3.asm: New file.
+
+ * mpn/arm/v7a/cora15/copyd.asm: Tweak.
+
+ * mpn/arm64/copyi.asm: New file.
+ * mpn/arm64/copyd.asm: New file.
+
+2013-03-16 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/arm/v6/addmul_2.asm: Tweak for better A9 performance.
+
2013-03-14 Torbjorn Granlund <tege at gmplib.org>
* mpn/ia64/cnd_aors_n.asm: New file.
diff -r 6dbd2044d1ce -r e49a084a7ec6 longlong.h
--- a/longlong.h Thu Mar 14 22:45:42 2013 +0100
+++ b/longlong.h Sun Mar 17 19:14:51 2013 +0100
@@ -1,7 +1,7 @@
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+2004, 2005, 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free
@@ -139,30 +139,30 @@
or want. */
#ifdef _LONG_LONG_LIMB
-#define count_leading_zeros_gcc_clz(count,x) \
- do { \
- ASSERT ((x) != 0); \
- (count) = __builtin_clzll (x); \
+#define count_leading_zeros_gcc_clz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_clzll (x); \
} while (0)
#else
-#define count_leading_zeros_gcc_clz(count,x) \
- do { \
- ASSERT ((x) != 0); \
- (count) = __builtin_clzl (x); \
+#define count_leading_zeros_gcc_clz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_clzl (x); \
} while (0)
#endif
#ifdef _LONG_LONG_LIMB
-#define count_trailing_zeros_gcc_ctz(count,x) \
- do { \
- ASSERT ((x) != 0); \
- (count) = __builtin_ctzll (x); \
+#define count_trailing_zeros_gcc_ctz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_ctzll (x); \
} while (0)
#else
-#define count_trailing_zeros_gcc_ctz(count,x) \
- do { \
- ASSERT ((x) != 0); \
- (count) = __builtin_ctzl (x); \
+#define count_trailing_zeros_gcc_ctz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_ctzl (x); \
} while (0)
#endif
@@ -223,27 +223,27 @@
__asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
#endif /* clz/ctz using cix */
-#if ! defined (count_leading_zeros) \
+#if ! defined (count_leading_zeros) \
&& defined (__GNUC__) && ! defined (LONGLONG_STANDALONE)
/* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0.
"$31" is written explicitly in the asm, since an "r" constraint won't
select reg 31. There seems no need to worry about "r31" syntax for cray,
- since gcc itself (pre-release 3.4) emits just $31 in various places. */
-#define ALPHA_CMPBGE_0(dst, src) \
+ since gcc itself (pre-release 3.4) emits just $31 in various places. */
+#define ALPHA_CMPBGE_0(dst, src) \
do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0)
/* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts
them, locating the highest non-zero byte. A second __clz_tab lookup
counts the leading zero bits in that byte, giving the result. */
-#define count_leading_zeros(count, x) \
- do { \
- UWtype __clz__b, __clz__c, __clz__x = (x); \
- ALPHA_CMPBGE_0 (__clz__b, __clz__x); /* zero bytes */ \
- __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F]; /* 8 to 1 byte */ \
- __clz__b = __clz__b * 8 - 7; /* 57 to 1 shift */ \
- __clz__x >>= __clz__b; \
- __clz__c = __clz_tab [__clz__x]; /* 8 to 1 bit */ \
- __clz__b = 65 - __clz__b; \
- (count) = __clz__b - __clz__c; \
+#define count_leading_zeros(count, x) \
+ do { \
+ UWtype __clz__b, __clz__c, __clz__x = (x); \
+ ALPHA_CMPBGE_0 (__clz__b, __clz__x); /* zero bytes */ \
+ __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F]; /* 8 to 1 byte */ \
+ __clz__b = __clz__b * 8 - 7; /* 57 to 1 shift */ \
+ __clz__x >>= __clz__b; \
+ __clz__c = __clz_tab [__clz__x]; /* 8 to 1 bit */ \
+ __clz__b = 65 - __clz__b; \
+ (count) = __clz__b - __clz__c; \
} while (0)
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
#endif /* clz using cmpbge */
@@ -299,14 +299,14 @@
code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
register, which takes an extra cycle. */
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
- do { \
- UWtype __x; \
- __x = (al) - (bl); \
- if ((al) < (bl)) \
- (sh) = (ah) - (bh) - 1; \
- else \
- (sh) = (ah) - (bh); \
- (sl) = __x; \
+ do { \
+ UWtype __x; \
+ __x = (al) - (bl); \
+ if ((al) < (bl)) \
+ (sh) = (ah) - (bh) - 1; \
+ else \
+ (sh) = (ah) - (bh); \
+ (sl) = __x; \
} while (0)
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
/* Do both product parts in assembly, since that gives better code with
@@ -946,7 +946,7 @@
being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the
cost of one extra instruction. Do this for "i386" too, since that means
generic x86. */
-#if ! defined (count_leading_zeros) && __GNUC__ < 3 \
+#if ! defined (count_leading_zeros) && __GNUC__ < 3 \
&& (HAVE_HOST_CPU_i386 \
|| HAVE_HOST_CPU_i686 \
|| HAVE_HOST_CPU_pentiumpro \
@@ -1156,7 +1156,7 @@
|| defined (__mc68030__) || defined (mc68030) \
|| defined (__mc68040__) || defined (mc68040) \
|| defined (__mc68060__) || defined (mc68060) \
- || defined (__NeXT__)) \
+ || defined (__NeXT__)) \
&& ! defined (__mcpu32__)
#define count_leading_zeros(count, x) \
__asm__ ("bfffo %1{%b2:%b2},%0" \
@@ -1309,37 +1309,37 @@
the system vendor compilers. (Is that vendor compilers with inline asm,
or what?) */
-#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc) \
+#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc) \
&& W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+ __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
- __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+ __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
else \
- __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
+ __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
else \
- __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
+ __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
} while (0)
@@ -1392,55 +1392,55 @@
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+ __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
- __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+ __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
else \
- __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
+ __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
} while (0)
/* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
This might seem strange, but gcc folds away the dead code late. */
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
- do { \
- if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) { \
- if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \
+ do { \
+ if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) { \
+ if (__builtin_constant_p (ah) && (ah) == 0) \
+ __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
- else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
- __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \
+ else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
+ __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
- else if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("addic %1,%3,%4\n\taddme %0,%2" \
+ else if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("addic %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
- else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
- __asm__ ("addic %1,%3,%4\n\taddze %0,%2" \
+ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
+ __asm__ ("addic %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
- else \
- __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl)); \
- } else { \
- if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
- else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
- else if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \
- else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
- __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \
- else \
- __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
- } \
+ else \
+ __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl)); \
+ } else { \
+ if (__builtin_constant_p (ah) && (ah) == 0) \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
+ else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
+ else if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
More information about the gmp-commit
mailing list