[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Feb 24 20:58:11 UTC 2017
details: /var/hg/gmp/rev/ef1792bf399e
changeset: 17299:ef1792bf399e
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Feb 24 20:32:18 2017 +0100
description:
(powerpc add_sssaaaa): Fix typo.
details: /var/hg/gmp/rev/4758daf902b2
changeset: 17300:4758daf902b2
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Feb 24 21:01:18 2017 +0100
description:
Fix typo.
details: /var/hg/gmp/rev/f169ebae5637
changeset: 17301:f169ebae5637
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Feb 24 21:01:25 2017 +0100
description:
Fix typo.
details: /var/hg/gmp/rev/613212bc409a
changeset: 17302:613212bc409a
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Feb 24 21:52:10 2017 +0100
description:
Define add_sssaaaa also for arm64.
details: /var/hg/gmp/rev/7e57d6fda760
changeset: 17303:7e57d6fda760
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Feb 24 21:53:58 2017 +0100
description:
Avoid branching on flags.
diffstat:
mpn/arm64/copyd.asm | 8 ++++----
mpn/arm64/copyi.asm | 8 ++++----
mpn/generic/div_qr_1n_pi2.c | 17 +++++++++++++----
mpn/generic/div_qr_1u_pi2.c | 15 ++++++++++++---
mpn/generic/div_qr_2.c | 21 ++++++++++++++-------
5 files changed, 47 insertions(+), 22 deletions(-)
diffs (181 lines):
diff -r fb46501c49fc -r 7e57d6fda760 mpn/arm64/copyd.asm
--- a/mpn/arm64/copyd.asm Thu Feb 23 14:33:10 2017 +0100
+++ b/mpn/arm64/copyd.asm Fri Feb 24 21:53:58 2017 +0100
@@ -58,9 +58,9 @@
L(al2): sub up, up, #16
ld1 {v26.2d}, [up]
- subs n, n, #6
+ sub n, n, #6
sub rp, rp, #16 C offset rp for loop
- b.lt L(end)
+ tbnz n, #63, L(end)
sub up, up, #16 C offset up for loop
mov x12, #-16
@@ -70,8 +70,8 @@
st1 {v26.2d}, [rp], x12
ld1 {v26.2d}, [up], x12
st1 {v22.2d}, [rp], x12
- subs n, n, #4
- b.ge L(top)
+ sub n, n, #4
+ tbz n, #63, L(top)
add up, up, #16 C undo up offset
diff -r fb46501c49fc -r 7e57d6fda760 mpn/arm64/copyi.asm
--- a/mpn/arm64/copyi.asm Thu Feb 23 14:33:10 2017 +0100
+++ b/mpn/arm64/copyi.asm Fri Feb 24 21:53:58 2017 +0100
@@ -53,16 +53,16 @@
st1 {v22.1d}, [rp], #8
L(al2): ld1 {v26.2d}, [up], #16
- subs n, n, #6
- b.lt L(end)
+ sub n, n, #6
+ tbnz n, #63, L(end)
ALIGN(16)
L(top): ld1 {v22.2d}, [up], #16
st1 {v26.2d}, [rp], #16
ld1 {v26.2d}, [up], #16
st1 {v22.2d}, [rp], #16
- subs n, n, #4
- b.ge L(top)
+ sub n, n, #4
+ tbz n, #63, L(top)
L(end): st1 {v26.2d}, [rp], #16
diff -r fb46501c49fc -r 7e57d6fda760 mpn/generic/div_qr_1n_pi2.c
--- a/mpn/generic/div_qr_1n_pi2.c Thu Feb 23 14:33:10 2017 +0100
+++ b/mpn/generic/div_qr_1n_pi2.c Fri Feb 24 21:53:58 2017 +0100
@@ -1,4 +1,4 @@
-/* mpn_div_qr_1u_pi2.
+/* mpn_div_qr_1n_pi2.
THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS
ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
@@ -46,8 +46,10 @@
#include "longlong.h"
/* Define some longlong.h-style macros, but for wider operations.
- * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
- carry-out into an additional sum operand.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+ an additional sum operand.
+ * add_csaac accepts two addends and a carry in, and generates a sum and a
+ carry out. A little like a "full adder".
*/
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
@@ -69,12 +71,19 @@
"%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
#endif
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0))
+#endif
+
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
processor running in 32-bit mode, since the carry flag then gets the 32-bit
carry. */
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
- __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3" \
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
: "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
#endif
diff -r fb46501c49fc -r 7e57d6fda760 mpn/generic/div_qr_1u_pi2.c
--- a/mpn/generic/div_qr_1u_pi2.c Thu Feb 23 14:33:10 2017 +0100
+++ b/mpn/generic/div_qr_1u_pi2.c Fri Feb 24 21:53:58 2017 +0100
@@ -46,8 +46,10 @@
#include "longlong.h"
/* Define some longlong.h-style macros, but for wider operations.
- * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
- carry-out into an additional sum operand.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+ an additional sum operand.
+ * add_csaac accepts two addends and a carry in, and generates a sum and a
+ carry out. A little like a "full adder".
*/
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
@@ -69,12 +71,19 @@
"%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
#endif
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0))
+#endif
+
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
processor running in 32-bit mode, since the carry flag then gets the 32-bit
carry. */
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
- __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3" \
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
: "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
#endif
diff -r fb46501c49fc -r 7e57d6fda760 mpn/generic/div_qr_2.c
--- a/mpn/generic/div_qr_2.c Thu Feb 23 14:33:10 2017 +0100
+++ b/mpn/generic/div_qr_2.c Fri Feb 24 21:53:58 2017 +0100
@@ -49,10 +49,10 @@
#endif
/* Define some longlong.h-style macros, but for wider operations.
- * add_sssaaaa is like longlong.h's add_ssaaaa but the propagating
- carry-out into an additional sum operand.
- * add_csaac accepts two addends and a carry in, and generates a sum
- and a carry out. A little like a "full adder".
+ * add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
+ an additional sum operand.
+ * add_csaac accepts two addends and a carry in, and generates a sum and a
+ carry out. A little like a "full adder".
*/
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
@@ -84,12 +84,19 @@
"%1" ((UDItype)(a)), "g" ((UDItype)(b)))
#endif
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) __CLOBBER_CC)
+#endif
+
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
processor running in 32-bit mode, since the carry flag then gets the 32-bit
carry. */
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
- __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3" \
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
: "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
#endif
@@ -282,9 +289,9 @@
Return the most significant limb of the quotient.
Preconditions:
- 1. qp must either not overlap with the input operands at all, or
+ 1. qp must either not overlap with the other operands at all, or
qp >= np + 2 must hold true. (This means that it's possible to put
- the quotient in the high part of {np,nn}, right above the remainder.
+ the quotient in the high part of {np,nn}, right above the remainder.)
2. nn >= 2. */
mp_limb_t
More information about the gmp-commit
mailing list