[Gmp-commit] /var/hg/gmp: 8 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Feb 25 21:20:31 UTC 2017


details:   /var/hg/gmp/rev/aee2791eb8de
changeset: 17304:aee2791eb8de
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Feb 24 22:53:27 2017 +0100
description:
Amend last change.

details:   /var/hg/gmp/rev/bb4e841e80e8
changeset: 17305:bb4e841e80e8
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 25 00:00:30 2017 +0100
description:
(arm32/arm64 add_sssaaaa): Use "subs" for some immediates.
(arm32/arm64 sub_sssaaaa): Use "adds" for some immediates.

details:   /var/hg/gmp/rev/b5b9560e419b
changeset: 17306:b5b9560e419b
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 25 22:08:02 2017 +0100
description:
Handle BMOD_1_TO_MOD_1_THRESHOLD=MP_SIZE_T_MAX.
Streamline non-reduction path.

details:   /var/hg/gmp/rev/9b2d7ae88821
changeset: 17307:9b2d7ae88821
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 25 22:08:25 2017 +0100
description:
Streamline small operands cases similarly to top-level code.

details:   /var/hg/gmp/rev/b4b48ab45a0d
changeset: 17308:b4b48ab45a0d
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 25 22:09:54 2017 +0100
description:
Comments.

details:   /var/hg/gmp/rev/1f2ed853d842
changeset: 17309:1f2ed853d842
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 25 22:11:36 2017 +0100
description:
Allow MP_SIZE_T_MAX for threasholds exported to config.m4.

details:   /var/hg/gmp/rev/09ed9bd13bed
changeset: 17310:09ed9bd13bed
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 25 22:13:12 2017 +0100
description:
Add a copyright year.

details:   /var/hg/gmp/rev/c5b84d3614b1
changeset: 17311:c5b84d3614b1
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Feb 25 22:20:27 2017 +0100
description:
ChangeLog

diffstat:

 ChangeLog                   |  26 ++++++++++++++++
 configure.ac                |   2 +-
 longlong.h                  |  48 ++++++++++++++++++++++++-------
 mpn/generic/div_qr_1n_pi2.c |   4 +-
 mpn/generic/div_qr_1u_pi2.c |   4 +-
 mpn/generic/div_qr_2.c      |   2 +-
 mpn/x86/k7/gcd_1.asm        |   4 +-
 mpn/x86_64/core2/gcd_1.asm  |  69 ++++++++++++++++++++++++--------------------
 mpn/x86_64/gcd_1.asm        |  57 ++++++++++++++++++++----------------
 9 files changed, 139 insertions(+), 77 deletions(-)

diffs (truncated from 411 to 300 lines):

diff -r 7e57d6fda760 -r c5b84d3614b1 ChangeLog
--- a/ChangeLog	Fri Feb 24 21:53:58 2017 +0100
+++ b/ChangeLog	Sat Feb 25 22:20:27 2017 +0100
@@ -1,3 +1,29 @@
+2017-02-25  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac: Allow MP_SIZE_T_MAX for threasholds exported to
+	config.m4.
+
+	* mpn/x86_64/gcd_1.asm: Handle BMOD_1_TO_MOD_1_THRESHOLD=MP_SIZE_T_MAX.
+	Streamline non-reduction path.
+	* mpn/x86_64/core2/gcd_1.asm: Streamline small operands cases similarly
+	to top-level code.
+
+2017-02-24  Torbjörn Granlund  <tg at gmplib.org>
+
+	* longlong.h (arm32/arm64 add_sssaaaa): Use "subs" for some immediates.
+	* longlong.h (arm32/arm64 sub_sssaaaa): Use "adds" for some immediates.
+
+	* mpn/arm64/copyi.asm: Avoid branching on flags.
+	* mpn/arm64/copyd.asm: Likewise.
+
+	* mpn/generic/div_qr_2.c (aarch64 add_sssaaaa): New.
+	* mpn/generic/div_qr_1n_pi2.c: Same.
+	* mpn/generic/div_qr_1u_pi2.c: Same.
+
+	* mpn/generic/div_qr_2.c (powerpc add_sssaaaa): Fix typo.
+	* mpn/generic/div_qr_1n_pi2.c: Same.
+	* mpn/generic/div_qr_1u_pi2.c: Same.
+
 2017-02-23 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* tests/devel/sqrtrem_1_2.c: New exhaustive test for sqrtrem_[12].
diff -r 7e57d6fda760 -r c5b84d3614b1 configure.ac
--- a/configure.ac	Fri Feb 24 21:53:58 2017 +0100
+++ b/configure.ac	Sat Feb 25 22:20:27 2017 +0100
@@ -3762,7 +3762,7 @@
 #
 if test -z "$fat_path"; then
   for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
-    value=`sed -n 's/^#define '$i'[ 	]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+    value=`sed -n 's/^#define '$i'[ 	]*\([0-9A-Z][0-9A-Z_]*\).*$/\1/p' $gmp_mparam_source`
     if test -n "$value"; then
       GMP_DEFINE_RAW(["define(<$i>,<$value>)"])
     fi
diff -r 7e57d6fda760 -r c5b84d3614b1 longlong.h
--- a/longlong.h	Fri Feb 24 21:53:58 2017 +0100
+++ b/longlong.h	Sat Feb 25 22:20:27 2017 +0100
@@ -1,6 +1,6 @@
 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
 
-Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2016 Free Software
+Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2017 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
@@ -440,9 +440,19 @@
 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
     && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
+  do {									\
+    if (__builtin_constant_p (bl) && (bl) < 0 && (-(USItype) (bl) < 0x1000)) \
+      __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
 	   : "=r" (sh), "=&r" (sl)					\
-	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+	       : "r" (ah), "rI" (bh),					\
+		 "%r" (al), "rI" (-(USItype)(bl)) __CLOBBER_CC);	\
+    else								\
+      __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"			\
+	   : "=r" (sh), "=&r" (sl)					\
+	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC);	\
+  } while (0)
+/* FIXME: Extend the immediate range for the low word by using both
+   ADDS and SUBS, since they set carry in the same way.  */
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (al))					\
@@ -541,15 +551,31 @@
 /* FIXME: Extend the immediate range for the low word by using both
    ADDS and SUBS, since they set carry in the same way.  */
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"			\
-	   : "=r" (sh), "=&r" (sl)					\
-	   : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),		\
-	     "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
+  do {									\
+    if (__builtin_constant_p (bl) && (bl) < 0 && (-(UDItype) (bl) < 0x1000)) \
+      __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),		\
+		 "%r" ((UDItype)(al)), "rI" (-(UDItype)(bl)) __CLOBBER_CC);\
+    else								\
+      __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),		\
+		 "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC);\
+  } while (0)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"			\
-	   : "=r,r" (sh), "=&r,&r" (sl)					\
-	   : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),		\
-	     "r,Z"   ((UDItype)(al)), "rI,r"  ((UDItype)(bl)) __CLOBBER_CC)
+  do {									\
+    if (__builtin_constant_p (bl) && (bl) < 0 && (-(UDItype) (bl) < 0x1000)) \
+      __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"			\
+	       : "=r,r" (sh), "=&r,&r" (sl)				\
+	       : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),	\
+		 "r,Z"   ((UDItype)(al)), "rI,r" (-(UDItype)(bl)) __CLOBBER_CC);\
+    else								\
+      __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"			\
+	       : "=r,r" (sh), "=&r,&r" (sl)				\
+	       : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),	\
+	     "r,Z"   ((UDItype)(al)), "rI,r"  ((UDItype)(bl)) __CLOBBER_CC);\
+  } while(0);
 #define umul_ppmm(ph, pl, m0, m1) \
   do {									\
     UDItype __m0 = (m0), __m1 = (m1);					\
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/generic/div_qr_1n_pi2.c
--- a/mpn/generic/div_qr_1n_pi2.c	Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/generic/div_qr_1n_pi2.c	Sat Feb 25 22:20:27 2017 +0100
@@ -4,7 +4,7 @@
    ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2013 Free Software Foundation, Inc.
+Copyright 2013, 2017 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -75,7 +75,7 @@
 #define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
   __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
 	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0))
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) __CLOBBER_CC)
 #endif
 
 #if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/generic/div_qr_1u_pi2.c
--- a/mpn/generic/div_qr_1u_pi2.c	Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/generic/div_qr_1u_pi2.c	Sat Feb 25 22:20:27 2017 +0100
@@ -4,7 +4,7 @@
    ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2013 Free Software Foundation, Inc.
+Copyright 2013, 2017 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -75,7 +75,7 @@
 #define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
   __asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
 	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0))
+	   : "rZ" (s2), "%rZ"  (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) __CLOBBER_CC)
 #endif
 
 #if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/generic/div_qr_2.c
--- a/mpn/generic/div_qr_2.c	Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/generic/div_qr_2.c	Sat Feb 25 22:20:27 2017 +0100
@@ -8,7 +8,7 @@
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
 
-Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
+Copyright 1993-1996, 1999-2002, 2011, 2017 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/x86/k7/gcd_1.asm
--- a/mpn/x86/k7/gcd_1.asm	Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/x86/k7/gcd_1.asm	Sat Feb 25 22:20:27 2017 +0100
@@ -113,7 +113,7 @@
 	cmp	$1, n
 	jnz	L(reduce_nby1)
 
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+C Both U and V are single limbs, reduce with div if u0 >> v0.
 	mov	(up), %ecx
 	mov	%ecx, %eax
 	shr	$DIV_THRES_LOG2, %ecx
@@ -145,7 +145,7 @@
 
 L(called):
 ifdef(`PIC_WITH_EBX',`dnl
-	add	$16, %esp	C deallocate params
+	add	$16, %esp		C deallocate params
 	pop	%ebx
 ',`
 	add	$12, %esp		C deallocate params
diff -r 7e57d6fda760 -r c5b84d3614b1 mpn/x86_64/core2/gcd_1.asm
--- a/mpn/x86_64/core2/gcd_1.asm	Fri Feb 24 21:53:58 2017 +0100
+++ b/mpn/x86_64/core2/gcd_1.asm	Sat Feb 25 22:20:27 2017 +0100
@@ -3,7 +3,8 @@
 dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
 dnl  Granlund.
 
-dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -80,16 +81,14 @@
 	ALIGN(16)
 PROLOGUE(mpn_gcd_1)
 	FUNC_ENTRY(3)
-	mov	(up), %rax	C U low limb
-	or	v0, %rax
-	bsf	%rax, %rax	C min(ctz(u0),ctz(v0))
+	mov	(up), %rax		C U low limb
+	or	v0, %rax		C x | y
+	bsf	%rax, %rax		C min(ctz(u0),ctz(v0))
 
 	bsf	v0, %rcx
 	shr	R8(%rcx), v0
 
-	push	%rax		C preserve common twos over call
-	push	v0		C preserve v0 argument over call
-	sub	$STACK_ALLOC, %rsp	C maintain ABI required rsp alignment
+	push	%rax			C preserve common twos over call
 
 	cmp	$1, n
 	jnz	L(reduce_nby1)
@@ -100,46 +99,52 @@
 	shr	$BMOD_THRES_LOG2, %r8
 	cmp	%r8, v0
 	ja	L(reduced)
-	jmp	L(bmod)
 
-L(reduce_nby1):
-	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, n
-	jl	L(bmod)
-IFDOS(`	mov	%rdx, %r8	')
-IFDOS(`	mov	%rsi, %rdx	')
-IFDOS(`	mov	%rdi, %rcx	')
-	ASSERT(nz, `test $15, %rsp')
-	CALL(	mpn_mod_1)
-	jmp	L(reduced)
 L(bmod):
+	push	v0			C preserve v0 argument over call
+	sub	$STACK_ALLOC, %rsp	C maintain ABI required rsp alignment
 IFDOS(`	mov	%rdx, %r8	')
 IFDOS(`	mov	%rsi, %rdx	')
 IFDOS(`	mov	%rdi, %rcx	')
 	ASSERT(nz, `test $15, %rsp')
 	CALL(	mpn_modexact_1_odd)
-L(reduced):
 
+L(called):
 	add	$STACK_ALLOC, %rsp
-	pop	%rdx
+	pop	v0
 
+L(reduced):
 	bsf	%rax, %rcx
-C	test	%rax, %rax	C FIXME: does this lower latency?
+C	test	%rax, %rax		C FIXME: does this lower latency?
 	jnz	L(mid)
 	jmp	L(end)
 
-	ALIGN(16)		C               K10   BD    C2    NHM   SBR
-L(top):	cmovc	%r10, %rax	C if x-y < 0    0,3   0,3   0,6   0,5   0,5
-	cmovc	%r9, %rdx	C use x,y-x     0,3   0,3   2,8   1,7   1,7
-L(mid):	shr	R8(%rcx), %rax	C               1,7   1,6   2,8   2,8   2,8
-	mov	%rdx, %r10	C               1     1     4     3     3
-	sub	%rax, %r10	C               2     2     5     4     4
-	bsf	%r10, %rcx	C               3     3     6     5     5
-	mov	%rax, %r9	C               2     2     3     3     4
-	sub	%rdx, %rax	C               2     2     4     3     4
-	jnz	L(top)		C
+L(reduce_nby1):
+	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, n
+	jl	L(bmod)
 
-L(end):	pop	%rcx
-	mov	%rdx, %rax
+	push	v0			C preserve v0 argument over call
+	sub	$STACK_ALLOC, %rsp	C maintain ABI required rsp alignment
+IFDOS(`	mov	%rdx, %r8	')
+IFDOS(`	mov	%rsi, %rdx	')
+IFDOS(`	mov	%rdi, %rcx	')
+	ASSERT(nz, `test $15, %rsp')
+	CALL(	mpn_mod_1)
+	jmp	L(called)
+
+	ALIGN(16)			C              K10  BD   C2   NHM  SBR
+L(top):	cmovc	%r10, %rax		C if x-y < 0   0,3  0,3  0,6  0,5  0,5
+	cmovc	%r9, v0			C use x,y-x    0,3  0,3  2,8  1,7  1,7
+L(mid):	shr	R8(%rcx), %rax		C              1,7  1,6  2,8  2,8  2,8
+	mov	v0, %r10		C              1    1    4    3    3
+	sub	%rax, %r10		C              2    2    5    4    4
+	bsf	%r10, %rcx		C              3    3    6    5    5
+	mov	%rax, %r9		C              2    2    3    3    4
+	sub	v0, %rax		C              2    2    4    3    4
+	jnz	L(top)			C
+


More information about the gmp-commit mailing list