[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Jul 3 09:29:52 UTC 2018


details:   /var/hg/gmp/rev/dc255abaed65
changeset: 17646:dc255abaed65
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jul 03 09:45:46 2018 +0200
description:
Add missing ABI_SUPPORT decls.

details:   /var/hg/gmp/rev/6cf9795edf70
changeset: 17647:6cf9795edf70
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jul 03 09:48:34 2018 +0200
description:
Provide support for the DOS64 ABI.

details:   /var/hg/gmp/rev/23284e7d6b18
changeset: 17648:23284e7d6b18
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jul 03 11:10:58 2018 +0200
description:
Retune.

details:   /var/hg/gmp/rev/1ad8cc22b714
changeset: 17649:1ad8cc22b714
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Tue Jul 03 11:16:06 2018 +0200
description:
Remove cnt = 1 special code.

diffstat:

 mpn/x86_64/fastsse/lshift.asm      |    6 +-
 mpn/x86_64/fastsse/lshiftc.asm     |    6 +-
 mpn/x86_64/lshift.asm              |   81 +---------------------------
 mpn/x86_64/pentium4/gmp-mparam.h   |  106 +++++++++++++++++++-----------------
 mpn/x86_64/silvermont/hamdist.asm  |    3 +
 mpn/x86_64/silvermont/popcount.asm |    3 +
 mpn/x86_64/zen/mul_1.asm           |    3 +
 7 files changed, 77 insertions(+), 131 deletions(-)

diffs (truncated from 377 to 300 lines):

diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/fastsse/lshift.asm
--- a/mpn/x86_64/fastsse/lshift.asm	Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/fastsse/lshift.asm	Tue Jul 03 11:16:06 2018 +0200
@@ -2,7 +2,7 @@
 
 dnl  Contributed to the GNU project by David Harvey and Torbjorn Granlund.
 
-dnl  Copyright 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2010-2012, 2018 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -69,6 +69,7 @@
 	TEXT
 	ALIGN(64)
 PROLOGUE(mpn_lshift)
+	FUNC_ENTRY(4)
 	movd	R32(%rcx), %xmm4
 	mov	$64, R32(%rax)
 	sub	R32(%rcx), R32(%rax)
@@ -124,6 +125,7 @@
 	psrlq	%xmm5, %xmm0
 	por	%xmm1, %xmm0
 	movdqa	%xmm0, (rp)
+	FUNC_EXIT()
 	ret
 C *****************************************************************************
 
@@ -149,6 +151,7 @@
 	psrlq	%xmm5, %xmm0
 	por	%xmm1, %xmm0
 	movdqa	%xmm0, (rp)
+	FUNC_EXIT()
 	ret
 C *****************************************************************************
 
@@ -165,5 +168,6 @@
 L(end8):movq	(ap), %xmm0
 	psllq	%xmm4, %xmm0
 	movq	%xmm0, (rp)
+	FUNC_EXIT()
 	ret
 EPILOGUE()
diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/fastsse/lshiftc.asm
--- a/mpn/x86_64/fastsse/lshiftc.asm	Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/fastsse/lshiftc.asm	Tue Jul 03 11:16:06 2018 +0200
@@ -2,7 +2,7 @@
 
 dnl  Contributed to the GNU project by David Harvey and Torbjorn Granlund.
 
-dnl  Copyright 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2010-2012, 2018 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -70,6 +70,7 @@
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_lshiftc)
+	FUNC_ENTRY(4)
 	movd	R32(%rcx), %xmm4
 	mov	$64, R32(%rax)
 	sub	R32(%rcx), R32(%rax)
@@ -130,6 +131,7 @@
 	por	%xmm1, %xmm0
 	pxor	%xmm2, %xmm0
 	movdqa	%xmm0, (rp)
+	FUNC_EXIT()
 	ret
 C *****************************************************************************
 
@@ -157,6 +159,7 @@
 	por	%xmm1, %xmm0
 	pxor	%xmm2, %xmm0
 	movdqa	%xmm0, (rp)
+	FUNC_EXIT()
 	ret
 C *****************************************************************************
 
@@ -175,5 +178,6 @@
 	psllq	%xmm4, %xmm0
 	pxor	%xmm2, %xmm0
 	movq	%xmm0, (rp)
+	FUNC_EXIT()
 	ret
 EPILOGUE()
diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/lshift.asm
--- a/mpn/x86_64/lshift.asm	Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/lshift.asm	Tue Jul 03 11:16:06 2018 +0200
@@ -1,7 +1,7 @@
 dnl  AMD64 mpn_lshift -- mpn left shift.
 
-dnl  Copyright 2003, 2005, 2007, 2009, 2011, 2012 Free Software Foundation,
-dnl  Inc.
+dnl  Copyright 2003, 2005, 2007, 2009, 2011, 2012, 2018 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -56,82 +56,7 @@
 	ALIGN(32)
 PROLOGUE(mpn_lshift)
 	FUNC_ENTRY(4)
-	cmp	$1, R8(%rcx)
-	jne	L(gen)
-
-C For cnt=1 we want to work from lowest limb towards higher limbs.
-C Check for bad overlap (up=rp is OK!) up=rp+1..rp+n-1 is bad.
-C FIXME: this could surely be done more cleverly.
-
-	mov    rp, %rax
-	sub    up, %rax
-	je     L(fwd)			C rp = up
-	shr    $3, %rax
-	cmp    n, %rax
-	jb     L(gen)
-
-L(fwd):	mov	R32(n), R32(%rax)
-	shr	$2, n
-	je	L(e1)
-	and	$3, R32(%rax)
-
-	ALIGN(8)
-	nop
-	nop
-L(t1):	mov	(up), %r8
-	mov	8(up), %r9
-	mov	16(up), %r10
-	mov	24(up), %r11
-	lea	32(up), up
-	adc	%r8, %r8
-	mov	%r8, (rp)
-	adc	%r9, %r9
-	mov	%r9, 8(rp)
-	adc	%r10, %r10
-	mov	%r10, 16(rp)
-	adc	%r11, %r11
-	mov	%r11, 24(rp)
-	lea	32(rp), rp
-	dec	n
-	jne	L(t1)
-
-	inc	R32(%rax)
-	dec	R32(%rax)
-	jne	L(n00)
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
-	ret
-L(e1):	test	R32(%rax), R32(%rax)	C clear cy
-L(n00):	mov	(up), %r8
-	dec	R32(%rax)
-	jne	L(n01)
-	adc	%r8, %r8
-	mov	%r8, (rp)
-L(ret):	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
-	ret
-L(n01):	dec	R32(%rax)
-	mov	8(up), %r9
-	jne	L(n10)
-	adc	%r8, %r8
-	adc	%r9, %r9
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
-	ret
-L(n10):	mov	16(up), %r10
-	adc	%r8, %r8
-	adc	%r9, %r9
-	adc	%r10, %r10
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	mov	%r10, 16(rp)
-	adc	$-1, R32(%rax)
-	FUNC_EXIT()
-	ret
-
-L(gen):	neg	R32(%rcx)		C put rsh count in cl
+	neg	R32(%rcx)		C put rsh count in cl
 	mov	-8(up,n,8), %rax
 	shr	R8(%rcx), %rax		C function return value
 
diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/pentium4/gmp-mparam.h
--- a/mpn/x86_64/pentium4/gmp-mparam.h	Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/pentium4/gmp-mparam.h	Tue Jul 03 11:16:06 2018 +0200
@@ -1,6 +1,7 @@
 /* Pentium 4-64 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2010, 2014 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000-2010, 2014, 2018 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
@@ -39,48 +40,48 @@
 #undef HAVE_NATIVE_mpn_rsblsh_n
 
 /* 3400 MHz Pentium4 Nocona / 1024 Kibyte cache */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
+/* FFT tuning limit = 25000000, FFT tables not re-measured since long */
+/* Generated by tuneup.c, 2018-07-02, gcc 6.4 */
 
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
 #define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        14
 #define MOD_1_2_TO_MOD_1_4_THRESHOLD        32
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
 #define USE_PREINV_DIVREM_1                  1  /* native */
 #define DIV_QR_1_NORM_THRESHOLD              1
 #define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD              12
 #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
 #define BMOD_1_TO_MOD_1_THRESHOLD           20
 
-#define DIV_1_VS_MUL_1_PERCENT             222
+#define DIV_1_VS_MUL_1_PERCENT             228
 
 #define MUL_TOOM22_THRESHOLD                12
-#define MUL_TOOM33_THRESHOLD                41
-#define MUL_TOOM44_THRESHOLD               112
-#define MUL_TOOM6H_THRESHOLD               157
-#define MUL_TOOM8H_THRESHOLD               236
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               180
+#define MUL_TOOM6H_THRESHOLD                 0  /* always */
+#define MUL_TOOM8H_THRESHOLD               430
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      74
 #define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      78
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      87
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      84
 #define MUL_TOOM43_TO_TOOM54_THRESHOLD     106
 
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                 53
-#define SQR_TOOM4_THRESHOLD                154
-#define SQR_TOOM6_THRESHOLD                197
-#define SQR_TOOM8_THRESHOLD                296
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 18
+#define SQR_TOOM3_THRESHOLD                113
+#define SQR_TOOM4_THRESHOLD                214
+#define SQR_TOOM6_THRESHOLD                238
+#define SQR_TOOM8_THRESHOLD                430
 
-#define MULMID_TOOM42_THRESHOLD             22
+#define MULMID_TOOM42_THRESHOLD             24
 
 #define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD               11
 
 #define MUL_FFT_MODF_THRESHOLD             252  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
@@ -192,42 +193,45 @@
 #define SQR_FFT_THRESHOLD                 1984
 
 #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD             4392
+#define MULLO_DC_THRESHOLD                  45
+#define MULLO_MUL_N_THRESHOLD             6253
+#define SQRLO_BASECASE_THRESHOLD             6
+#define SQRLO_DC_THRESHOLD                  56
+#define SQRLO_SQR_THRESHOLD               4940
 
-#define DC_DIV_QR_THRESHOLD                 35
-#define DC_DIVAPPR_Q_THRESHOLD              68
-#define DC_BDIV_QR_THRESHOLD                32
-#define DC_BDIV_Q_THRESHOLD                 56
+#define DC_DIV_QR_THRESHOLD                 42
+#define DC_DIVAPPR_Q_THRESHOLD              95
+#define DC_BDIV_QR_THRESHOLD                40
+#define DC_BDIV_Q_THRESHOLD                 60
 
 #define INV_MULMOD_BNM1_THRESHOLD           22
-#define INV_NEWTON_THRESHOLD               195
-#define INV_APPR_THRESHOLD                 116
+#define INV_NEWTON_THRESHOLD               149
+#define INV_APPR_THRESHOLD                 117
 
-#define BINV_NEWTON_THRESHOLD              199
-#define REDC_1_TO_REDC_2_THRESHOLD           4
-#define REDC_2_TO_REDC_N_THRESHOLD          42
+#define BINV_NEWTON_THRESHOLD              204
+#define REDC_1_TO_REDC_2_THRESHOLD          24
+#define REDC_2_TO_REDC_N_THRESHOLD          55
 


More information about the gmp-commit mailing list