[Gmp-commit] /var/hg/gmp: 8 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Feb 25 21:20:31 UTC 2017
details: /var/hg/gmp/rev/aee2791eb8de
changeset: 17304:aee2791eb8de
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Feb 24 22:53:27 2017 +0100
description:
Amend last change.
details: /var/hg/gmp/rev/bb4e841e80e8
changeset: 17305:bb4e841e80e8
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 25 00:00:30 2017 +0100
description:
(arm32/arm64 add_sssaaaa): Use "subs" for some immediates.
(arm32/arm64 sub_sssaaaa): Use "adds" for some immediates.
details: /var/hg/gmp/rev/b5b9560e419b
changeset: 17306:b5b9560e419b
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 25 22:08:02 2017 +0100
description:
Handle BMOD_1_TO_MOD_1_THRESHOLD=MP_SIZE_T_MAX.
Streamline non-reduction path.
details: /var/hg/gmp/rev/9b2d7ae88821
changeset: 17307:9b2d7ae88821
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 25 22:08:25 2017 +0100
description:
Streamline small operands cases similarly to top-level code.
details: /var/hg/gmp/rev/b4b48ab45a0d
changeset: 17308:b4b48ab45a0d
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 25 22:09:54 2017 +0100
description:
Comments.
details: /var/hg/gmp/rev/1f2ed853d842
changeset: 17309:1f2ed853d842
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 25 22:11:36 2017 +0100
description:
Allow MP_SIZE_T_MAX for threasholds exported to config.m4.
details: /var/hg/gmp/rev/09ed9bd13bed
changeset: 17310:09ed9bd13bed
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 25 22:13:12 2017 +0100
description:
Add a copyright year.
details: /var/hg/gmp/rev/c5b84d3614b1
changeset: 17311:c5b84d3614b1
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Feb 25 22:20:27 2017 +0100
description:
ChangeLog
diffstat:
ChangeLog | 26 ++++++++++++++++
configure.ac | 2 +-
longlong.h | 48 ++++++++++++++++++++++++-------
mpn/generic/div_qr_1n_pi2.c | 4 +-
mpn/generic/div_qr_1u_pi2.c | 4 +-
mpn/generic/div_qr_2.c | 2 +-
mpn/x86/k7/gcd_1.asm | 4 +-
mpn/x86_64/core2/gcd_1.asm | 69 ++++++++++++++++++++++++--------------------
mpn/x86_64/gcd_1.asm | 57 ++++++++++++++++++++----------------
9 files changed, 139 insertions(+), 77 deletions(-)
diffs (truncated from 411 to 300 lines):
diff -r 7e57d6fda760 -r c5b84d3614b1 ChangeLog
--- a/ChangeLog Fri Feb 24 21:53:58 2017 +0100
+++ b/ChangeLog Sat Feb 25 22:20:27 2017 +0100
@@ -1,3 +1,29 @@
+2017-02-25 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac: Allow MP_SIZE_T_MAX for threasholds exported to
+ config.m4.
+
+ * mpn/x86_64/gcd_1.asm: Handle BMOD_1_TO_MOD_1_THRESHOLD=MP_SIZE_T_MAX.
+ Streamline non-reduction path.
+ * mpn/x86_64/core2/gcd_1.asm: Streamline small operands cases similarly
+ to top-level code.
+
+2017-02-24 Torbjörn Granlund <tg at gmplib.org>
+
+ * longlong.h (arm32/arm64 add_sssaaaa): Use "subs" for some immediates.
+ * longlong.h (arm32/arm64 sub_sssaaaa): Use "adds" for some immediates.
+
+ * mpn/arm64/copyi.asm: Avoid branching on flags.
+ * mpn/arm64/copyd.asm: Likewise.
+
+ * mpn/generic/div_qr_2.c (aarch64 add_sssaaaa): New.
+ * mpn/generic/div_qr_1n_pi2.c: Same.
+ * mpn/generic/div_qr_1u_pi2.c: Same.
+
+ * mpn/generic/div_qr_2.c (powerpc add_sssaaaa): Fix typo.
+ * mpn/generic/div_qr_1n_pi2.c: Same.
+ * mpn/generic/div_qr_1u_pi2.c: Same.
+
2017-02-23 Marco Bodrato <bodrato at mail.dm.unipi.it>
* tests/devel/sqrtrem_1_2.c: New exhaustive test for sqrtrem_[12].
diff -r 7e57d6fda760 -r c5b84d3614b1 configure.ac
--- a/configure.ac Fri Feb 24 21:53:58 2017 +0100
+++ b/configure.ac Sat Feb 25 22:20:27 2017 +0100
@@ -3762,7 +3762,7 @@
#
if test -z "$fat_path"; then
for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
- value=`sed -n 's/^#define '$i'[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+ value=`sed -n 's/^#define '$i'[ ]*\([0-9A-Z][0-9A-Z_]*\).*$/\1/p' $gmp_mparam_source`
if test -n "$value"; then
GMP_DEFINE_RAW(["define(<$i>,<$value>)"])
fi
diff -r 7e57d6fda760 -r c5b84d3614b1 longlong.h
--- a/longlong.h Fri Feb 24 21:53:58 2017 +0100
+++ b/longlong.h Sat Feb 25 22:20:27 2017 +0100
@@ -1,6 +1,6 @@
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
-Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2016 Free Software
+Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2017 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
@@ -440,9 +440,19 @@
#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
&& W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
- __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
+ do { \
+ if (__builtin_constant_p (bl) && (bl) < 0 && (-(USItype) (bl) < 0x1000)) \
+ __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+ : "r" (ah), "rI" (bh), \
+ "%r" (al), "rI" (-(USItype)(bl)) __CLOBBER_CC); \
+ else \
+ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC); \
+ } while (0)
+/* FIXME: Extend the immediate range for the low word by using both
+ ADDS and SUBS, since they set carry in the same way. */
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (al)) \
@@ -541,15 +551,31 @@
/* FIXME: Extend the immediate range for the low word by using both
ADDS and SUBS, since they set carry in the same way. */
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
- __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
- : "=r" (sh), "=&r" (sl) \
- : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
+ do { \
+ if (__builtin_constant_p (bl) && (bl) < 0 && (-(UDItype) (bl) < 0x1000)) \
+ __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
+ "%r" ((UDItype)(al)), "rI" (-(UDItype)(bl)) __CLOBBER_CC);\
+ else \
+ __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC);\
+ } while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
- __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
- : "=r,r" (sh), "=&r,&r" (sl) \
- : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
- "r,Z" ((UDItype)(al)), "rI,r" ((UDItype)(bl)) __CLOBBER_CC)
+ do { \
+ if (__builtin_constant_p (bl) && (bl) < 0 && (-(UDItype) (bl) < 0x1000)) \
+ __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
+ : "=r,r" (sh), "=&r,&r" (sl) \
+ : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
+ "r,Z" ((UDItype)(al)), "rI,r" (-(UDItype)(bl)) __CLOBBER_CC);\
+ else \
+ __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
+ : "=r,r" (sh), "=&r,&r" (sl) \
+ : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
+ "r,Z" ((UDItype)(al)), "rI,r" ((UDItype)(bl)) __CLOBBER_CC);\
+ } while(0);
#define umul_ppmm(ph, pl, m0, m1) \
do { \
UDItype __m0 = (m0), __m1 = (m1); \
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/generic/div_qr_1n_pi2.c
--- a/mpn/generic/div_qr_1n_pi2.c Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/generic/div_qr_1n_pi2.c Sat Feb 25 22:20:27 2017 +0100
@@ -4,7 +4,7 @@
ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2013 Free Software Foundation, Inc.
+Copyright 2013, 2017 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -75,7 +75,7 @@
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
__asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
- : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0))
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) __CLOBBER_CC)
#endif
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/generic/div_qr_1u_pi2.c
--- a/mpn/generic/div_qr_1u_pi2.c Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/generic/div_qr_1u_pi2.c Sat Feb 25 22:20:27 2017 +0100
@@ -4,7 +4,7 @@
ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2013 Free Software Foundation, Inc.
+Copyright 2013, 2017 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -75,7 +75,7 @@
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
__asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
- : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0))
+ : "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) __CLOBBER_CC)
#endif
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/generic/div_qr_2.c
--- a/mpn/generic/div_qr_2.c Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/generic/div_qr_2.c Sat Feb 25 22:20:27 2017 +0100
@@ -8,7 +8,7 @@
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
+Copyright 1993-1996, 1999-2002, 2011, 2017 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/x86/k7/gcd_1.asm
--- a/mpn/x86/k7/gcd_1.asm Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/x86/k7/gcd_1.asm Sat Feb 25 22:20:27 2017 +0100
@@ -113,7 +113,7 @@
cmp $1, n
jnz L(reduce_nby1)
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+C Both U and V are single limbs, reduce with div if u0 >> v0.
mov (up), %ecx
mov %ecx, %eax
shr $DIV_THRES_LOG2, %ecx
@@ -145,7 +145,7 @@
L(called):
ifdef(`PIC_WITH_EBX',`dnl
- add $16, %esp C deallocate params
+ add $16, %esp C deallocate params
pop %ebx
',`
add $12, %esp C deallocate params
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/x86_64/core2/gcd_1.asm
--- a/mpn/x86_64/core2/gcd_1.asm Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/x86_64/core2/gcd_1.asm Sat Feb 25 22:20:27 2017 +0100
@@ -3,7 +3,8 @@
dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn
dnl Granlund.
-dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -80,16 +81,14 @@
ALIGN(16)
PROLOGUE(mpn_gcd_1)
FUNC_ENTRY(3)
- mov (up), %rax C U low limb
- or v0, %rax
- bsf %rax, %rax C min(ctz(u0),ctz(v0))
+ mov (up), %rax C U low limb
+ or v0, %rax C x | y
+ bsf %rax, %rax C min(ctz(u0),ctz(v0))
bsf v0, %rcx
shr R8(%rcx), v0
- push %rax C preserve common twos over call
- push v0 C preserve v0 argument over call
- sub $STACK_ALLOC, %rsp C maintain ABI required rsp alignment
+ push %rax C preserve common twos over call
cmp $1, n
jnz L(reduce_nby1)
@@ -100,46 +99,52 @@
shr $BMOD_THRES_LOG2, %r8
cmp %r8, v0
ja L(reduced)
- jmp L(bmod)
-L(reduce_nby1):
- cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
- jl L(bmod)
-IFDOS(` mov %rdx, %r8 ')
-IFDOS(` mov %rsi, %rdx ')
-IFDOS(` mov %rdi, %rcx ')
- ASSERT(nz, `test $15, %rsp')
- CALL( mpn_mod_1)
- jmp L(reduced)
L(bmod):
+ push v0 C preserve v0 argument over call
+ sub $STACK_ALLOC, %rsp C maintain ABI required rsp alignment
IFDOS(` mov %rdx, %r8 ')
IFDOS(` mov %rsi, %rdx ')
IFDOS(` mov %rdi, %rcx ')
ASSERT(nz, `test $15, %rsp')
CALL( mpn_modexact_1_odd)
-L(reduced):
+L(called):
add $STACK_ALLOC, %rsp
- pop %rdx
+ pop v0
+L(reduced):
bsf %rax, %rcx
-C test %rax, %rax C FIXME: does this lower latency?
+C test %rax, %rax C FIXME: does this lower latency?
jnz L(mid)
jmp L(end)
- ALIGN(16) C K10 BD C2 NHM SBR
-L(top): cmovc %r10, %rax C if x-y < 0 0,3 0,3 0,6 0,5 0,5
- cmovc %r9, %rdx C use x,y-x 0,3 0,3 2,8 1,7 1,7
-L(mid): shr R8(%rcx), %rax C 1,7 1,6 2,8 2,8 2,8
- mov %rdx, %r10 C 1 1 4 3 3
- sub %rax, %r10 C 2 2 5 4 4
- bsf %r10, %rcx C 3 3 6 5 5
- mov %rax, %r9 C 2 2 3 3 4
- sub %rdx, %rax C 2 2 4 3 4
- jnz L(top) C
+L(reduce_nby1):
+ cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
+ jl L(bmod)
-L(end): pop %rcx
- mov %rdx, %rax
+ push v0 C preserve v0 argument over call
+ sub $STACK_ALLOC, %rsp C maintain ABI required rsp alignment
+IFDOS(` mov %rdx, %r8 ')
+IFDOS(` mov %rsi, %rdx ')
+IFDOS(` mov %rdi, %rcx ')
+ ASSERT(nz, `test $15, %rsp')
+ CALL( mpn_mod_1)
+ jmp L(called)
+
+ ALIGN(16) C K10 BD C2 NHM SBR
+L(top): cmovc %r10, %rax C if x-y < 0 0,3 0,3 0,6 0,5 0,5
+ cmovc %r9, v0 C use x,y-x 0,3 0,3 2,8 1,7 1,7
+L(mid): shr R8(%rcx), %rax C 1,7 1,6 2,8 2,8 2,8
+ mov v0, %r10 C 1 1 4 3 3
+ sub %rax, %r10 C 2 2 5 4 4
+ bsf %r10, %rcx C 3 3 6 5 5
+ mov %rax, %r9 C 2 2 3 3 4
+ sub v0, %rax C 2 2 4 3 4
+ jnz L(top) C
+
More information about the gmp-commit
mailing list