[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Jul 3 09:29:52 UTC 2018
details: /var/hg/gmp/rev/dc255abaed65
changeset: 17646:dc255abaed65
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jul 03 09:45:46 2018 +0200
description:
Add missing ABI_SUPPORT decls.
details: /var/hg/gmp/rev/6cf9795edf70
changeset: 17647:6cf9795edf70
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jul 03 09:48:34 2018 +0200
description:
Provide support for the DOS64 ABI.
details: /var/hg/gmp/rev/23284e7d6b18
changeset: 17648:23284e7d6b18
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jul 03 11:10:58 2018 +0200
description:
Retune.
details: /var/hg/gmp/rev/1ad8cc22b714
changeset: 17649:1ad8cc22b714
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Jul 03 11:16:06 2018 +0200
description:
Remove cnt = 1 special code.
diffstat:
mpn/x86_64/fastsse/lshift.asm | 6 +-
mpn/x86_64/fastsse/lshiftc.asm | 6 +-
mpn/x86_64/lshift.asm | 81 +---------------------------
mpn/x86_64/pentium4/gmp-mparam.h | 106 +++++++++++++++++++-----------------
mpn/x86_64/silvermont/hamdist.asm | 3 +
mpn/x86_64/silvermont/popcount.asm | 3 +
mpn/x86_64/zen/mul_1.asm | 3 +
7 files changed, 77 insertions(+), 131 deletions(-)
diffs (truncated from 377 to 300 lines):
diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/fastsse/lshift.asm
--- a/mpn/x86_64/fastsse/lshift.asm Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/fastsse/lshift.asm Tue Jul 03 11:16:06 2018 +0200
@@ -2,7 +2,7 @@
dnl Contributed to the GNU project by David Harvey and Torbjorn Granlund.
-dnl Copyright 2010-2012 Free Software Foundation, Inc.
+dnl Copyright 2010-2012, 2018 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -69,6 +69,7 @@
TEXT
ALIGN(64)
PROLOGUE(mpn_lshift)
+ FUNC_ENTRY(4)
movd R32(%rcx), %xmm4
mov $64, R32(%rax)
sub R32(%rcx), R32(%rax)
@@ -124,6 +125,7 @@
psrlq %xmm5, %xmm0
por %xmm1, %xmm0
movdqa %xmm0, (rp)
+ FUNC_EXIT()
ret
C *****************************************************************************
@@ -149,6 +151,7 @@
psrlq %xmm5, %xmm0
por %xmm1, %xmm0
movdqa %xmm0, (rp)
+ FUNC_EXIT()
ret
C *****************************************************************************
@@ -165,5 +168,6 @@
L(end8):movq (ap), %xmm0
psllq %xmm4, %xmm0
movq %xmm0, (rp)
+ FUNC_EXIT()
ret
EPILOGUE()
diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/fastsse/lshiftc.asm
--- a/mpn/x86_64/fastsse/lshiftc.asm Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/fastsse/lshiftc.asm Tue Jul 03 11:16:06 2018 +0200
@@ -2,7 +2,7 @@
dnl Contributed to the GNU project by David Harvey and Torbjorn Granlund.
-dnl Copyright 2010-2012 Free Software Foundation, Inc.
+dnl Copyright 2010-2012, 2018 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -70,6 +70,7 @@
TEXT
ALIGN(16)
PROLOGUE(mpn_lshiftc)
+ FUNC_ENTRY(4)
movd R32(%rcx), %xmm4
mov $64, R32(%rax)
sub R32(%rcx), R32(%rax)
@@ -130,6 +131,7 @@
por %xmm1, %xmm0
pxor %xmm2, %xmm0
movdqa %xmm0, (rp)
+ FUNC_EXIT()
ret
C *****************************************************************************
@@ -157,6 +159,7 @@
por %xmm1, %xmm0
pxor %xmm2, %xmm0
movdqa %xmm0, (rp)
+ FUNC_EXIT()
ret
C *****************************************************************************
@@ -175,5 +178,6 @@
psllq %xmm4, %xmm0
pxor %xmm2, %xmm0
movq %xmm0, (rp)
+ FUNC_EXIT()
ret
EPILOGUE()
diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/lshift.asm
--- a/mpn/x86_64/lshift.asm Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/lshift.asm Tue Jul 03 11:16:06 2018 +0200
@@ -1,7 +1,7 @@
dnl AMD64 mpn_lshift -- mpn left shift.
-dnl Copyright 2003, 2005, 2007, 2009, 2011, 2012 Free Software Foundation,
-dnl Inc.
+dnl Copyright 2003, 2005, 2007, 2009, 2011, 2012, 2018 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -56,82 +56,7 @@
ALIGN(32)
PROLOGUE(mpn_lshift)
FUNC_ENTRY(4)
- cmp $1, R8(%rcx)
- jne L(gen)
-
-C For cnt=1 we want to work from lowest limb towards higher limbs.
-C Check for bad overlap (up=rp is OK!) up=rp+1..rp+n-1 is bad.
-C FIXME: this could surely be done more cleverly.
-
- mov rp, %rax
- sub up, %rax
- je L(fwd) C rp = up
- shr $3, %rax
- cmp n, %rax
- jb L(gen)
-
-L(fwd): mov R32(n), R32(%rax)
- shr $2, n
- je L(e1)
- and $3, R32(%rax)
-
- ALIGN(8)
- nop
- nop
-L(t1): mov (up), %r8
- mov 8(up), %r9
- mov 16(up), %r10
- mov 24(up), %r11
- lea 32(up), up
- adc %r8, %r8
- mov %r8, (rp)
- adc %r9, %r9
- mov %r9, 8(rp)
- adc %r10, %r10
- mov %r10, 16(rp)
- adc %r11, %r11
- mov %r11, 24(rp)
- lea 32(rp), rp
- dec n
- jne L(t1)
-
- inc R32(%rax)
- dec R32(%rax)
- jne L(n00)
- adc R32(%rax), R32(%rax)
- FUNC_EXIT()
- ret
-L(e1): test R32(%rax), R32(%rax) C clear cy
-L(n00): mov (up), %r8
- dec R32(%rax)
- jne L(n01)
- adc %r8, %r8
- mov %r8, (rp)
-L(ret): adc R32(%rax), R32(%rax)
- FUNC_EXIT()
- ret
-L(n01): dec R32(%rax)
- mov 8(up), %r9
- jne L(n10)
- adc %r8, %r8
- adc %r9, %r9
- mov %r8, (rp)
- mov %r9, 8(rp)
- adc R32(%rax), R32(%rax)
- FUNC_EXIT()
- ret
-L(n10): mov 16(up), %r10
- adc %r8, %r8
- adc %r9, %r9
- adc %r10, %r10
- mov %r8, (rp)
- mov %r9, 8(rp)
- mov %r10, 16(rp)
- adc $-1, R32(%rax)
- FUNC_EXIT()
- ret
-
-L(gen): neg R32(%rcx) C put rsh count in cl
+ neg R32(%rcx) C put rsh count in cl
mov -8(up,n,8), %rax
shr R8(%rcx), %rax C function return value
diff -r a5f07b87662a -r 1ad8cc22b714 mpn/x86_64/pentium4/gmp-mparam.h
--- a/mpn/x86_64/pentium4/gmp-mparam.h Sun Jul 01 20:27:59 2018 +0200
+++ b/mpn/x86_64/pentium4/gmp-mparam.h Tue Jul 03 11:16:06 2018 +0200
@@ -1,6 +1,7 @@
/* Pentium 4-64 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000-2010, 2014 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000-2010, 2014, 2018 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
@@ -39,48 +40,48 @@
#undef HAVE_NATIVE_mpn_rsblsh_n
/* 3400 MHz Pentium4 Nocona / 1024 Kibyte cache */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
+/* FFT tuning limit = 25000000, FFT tables not re-measured since long */
+/* Generated by tuneup.c, 2018-07-02, gcc 6.4 */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 14
#define MOD_1_2_TO_MOD_1_4_THRESHOLD 32
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
#define USE_PREINV_DIVREM_1 1 /* native */
#define DIV_QR_1_NORM_THRESHOLD 1
#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD 12
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD 20
-#define DIV_1_VS_MUL_1_PERCENT 222
+#define DIV_1_VS_MUL_1_PERCENT 228
#define MUL_TOOM22_THRESHOLD 12
-#define MUL_TOOM33_THRESHOLD 41
-#define MUL_TOOM44_THRESHOLD 112
-#define MUL_TOOM6H_THRESHOLD 157
-#define MUL_TOOM8H_THRESHOLD 236
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 180
+#define MUL_TOOM6H_THRESHOLD 0 /* always */
+#define MUL_TOOM8H_THRESHOLD 430
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 74
#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 78
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 87
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 84
#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106
-#define SQR_BASECASE_THRESHOLD 5
-#define SQR_TOOM2_THRESHOLD 30
-#define SQR_TOOM3_THRESHOLD 53
-#define SQR_TOOM4_THRESHOLD 154
-#define SQR_TOOM6_THRESHOLD 197
-#define SQR_TOOM8_THRESHOLD 296
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 113
+#define SQR_TOOM4_THRESHOLD 214
+#define SQR_TOOM6_THRESHOLD 238
+#define SQR_TOOM8_THRESHOLD 430
-#define MULMID_TOOM42_THRESHOLD 22
+#define MULMID_TOOM42_THRESHOLD 24
#define MULMOD_BNM1_THRESHOLD 9
-#define SQRMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 11
#define MUL_FFT_MODF_THRESHOLD 252 /* k = 5 */
#define MUL_FFT_TABLE3 \
@@ -192,42 +193,45 @@
#define SQR_FFT_THRESHOLD 1984
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 33
-#define MULLO_MUL_N_THRESHOLD 4392
+#define MULLO_DC_THRESHOLD 45
+#define MULLO_MUL_N_THRESHOLD 6253
+#define SQRLO_BASECASE_THRESHOLD 6
+#define SQRLO_DC_THRESHOLD 56
+#define SQRLO_SQR_THRESHOLD 4940
-#define DC_DIV_QR_THRESHOLD 35
-#define DC_DIVAPPR_Q_THRESHOLD 68
-#define DC_BDIV_QR_THRESHOLD 32
-#define DC_BDIV_Q_THRESHOLD 56
+#define DC_DIV_QR_THRESHOLD 42
+#define DC_DIVAPPR_Q_THRESHOLD 95
+#define DC_BDIV_QR_THRESHOLD 40
+#define DC_BDIV_Q_THRESHOLD 60
#define INV_MULMOD_BNM1_THRESHOLD 22
-#define INV_NEWTON_THRESHOLD 195
-#define INV_APPR_THRESHOLD 116
+#define INV_NEWTON_THRESHOLD 149
+#define INV_APPR_THRESHOLD 117
-#define BINV_NEWTON_THRESHOLD 199
-#define REDC_1_TO_REDC_2_THRESHOLD 4
-#define REDC_2_TO_REDC_N_THRESHOLD 42
+#define BINV_NEWTON_THRESHOLD 204
+#define REDC_1_TO_REDC_2_THRESHOLD 24
+#define REDC_2_TO_REDC_N_THRESHOLD 55
More information about the gmp-commit
mailing list