[Gmp-commit] /home/hgfiles/gmp: 7 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat May 29 13:30:31 CEST 2010
details: /home/hgfiles/gmp/rev/159e1af98b99
changeset: 13644:159e1af98b99
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 24 10:21:57 2010 +0200
description:
New file.
details: /home/hgfiles/gmp/rev/35ae141984e7
changeset: 13645:35ae141984e7
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 24 10:22:26 2010 +0200
description:
Adjust cutoff point.
details: /home/hgfiles/gmp/rev/84878e79d86a
changeset: 13646:84878e79d86a
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 24 10:23:45 2010 +0200
description:
Retune.
details: /home/hgfiles/gmp/rev/befa7ebe1ab7
changeset: 13647:befa7ebe1ab7
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 24 17:02:54 2010 +0200
description:
Updates for stable MUPI_DIV_QR_THRESHOLD measuring.
details: /home/hgfiles/gmp/rev/7fcbcaf306f1
changeset: 13648:7fcbcaf306f1
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon May 24 19:03:12 2010 +0200
description:
Retune, in particular JACOBI_BASE_METHOD.
details: /home/hgfiles/gmp/rev/2541bef9e382
changeset: 13649:2541bef9e382
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue May 25 10:07:20 2010 +0200
description:
Trim space.
details: /home/hgfiles/gmp/rev/c173a930ec87
changeset: 13650:c173a930ec87
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue May 25 10:07:38 2010 +0200
description:
Retune.
diffstat:
ChangeLog | 15 ++
gmp-impl.h | 2 +
mpn/generic/mu_div_qr.c | 9 +
mpn/ia64/gmp-mparam.h | 2 +-
mpn/pa64/gmp-mparam.h | 67 +++++-----
mpn/powerpc32/lshift.asm | 4 +-
mpn/powerpc32/lshiftc.asm | 158 ++++++++++++++++++++++++
mpn/powerpc32/rshift.asm | 4 +-
mpn/powerpc64/mode64/p6/gmp-mparam.h | 16 +-
mpn/x86/k7/gmp-mparam.h | 4 +-
mpn/x86/p6/mmx/gmp-mparam.h | 12 +-
mpn/x86_64/atom/gmp-mparam.h | 34 ++--
mpn/x86_64/core2/gmp-mparam.h | 8 +-
mpn/x86_64/corei/gmp-mparam.h | 187 ++++++++++------------------
mpn/x86_64/gmp-mparam.h | 2 +-
mpn/x86_64/pentium4/gmp-mparam.h | 227 ++++++++++++----------------------
tune/common.c | 2 +-
tune/speed.h | 2 +-
18 files changed, 406 insertions(+), 349 deletions(-)
diffs (truncated from 1097 to 300 lines):
diff -r 66c54a9eb379 -r c173a930ec87 ChangeLog
--- a/ChangeLog Sat May 22 12:57:59 2010 +0200
+++ b/ChangeLog Tue May 25 10:07:38 2010 +0200
@@ -1,3 +1,18 @@
+2010-05-25 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): Trim out space
+ for inverse, since that is passed in already.
+
+2010-05-24 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): New function.
+ * gmp-impl.h: Declare it.
+ * tune/common.c (speed_mpn_mupi_div_qr): Use new itch function.
+ * tune/speed.h (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Pass parameters right
+ for new itch function.
+
+ * mpn/powerpc32/lshiftc.asm: New file.
+
2010-05-22 Torbjorn Granlund <tege at gmplib.org>
* tune/tuneup.c (tune_mod_1): Revert to version of 2010-05-06.
diff -r 66c54a9eb379 -r c173a930ec87 gmp-impl.h
--- a/gmp-impl.h Sat May 22 12:57:59 2010 +0200
+++ b/gmp-impl.h Tue May 25 10:07:38 2010 +0200
@@ -1208,6 +1208,8 @@
#define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
#define mpn_mu_divappr_q __MPN(mu_divappr_q)
__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
diff -r 66c54a9eb379 -r c173a930ec87 mpn/generic/mu_div_qr.c
--- a/mpn/generic/mu_div_qr.c Sat May 22 12:57:59 2010 +0200
+++ b/mpn/generic/mu_div_qr.c Tue May 25 10:07:38 2010 +0200
@@ -394,3 +394,12 @@
return in + itch_local + itch_out;
}
+
+mp_size_t
+mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
+{
+ mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+ mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+
+ return itch_local + itch_out;
+}
diff -r 66c54a9eb379 -r c173a930ec87 mpn/ia64/gmp-mparam.h
--- a/mpn/ia64/gmp-mparam.h Sat May 22 12:57:59 2010 +0200
+++ b/mpn/ia64/gmp-mparam.h Tue May 25 10:07:38 2010 +0200
@@ -199,7 +199,7 @@
#define HGCD_THRESHOLD 119
#define GCD_DC_THRESHOLD 588
#define GCDEXT_DC_THRESHOLD 469
-#define JACOBI_BASE_METHOD 1
+#define JACOBI_BASE_METHOD 4
#define GET_STR_DC_THRESHOLD 14
#define GET_STR_PRECOMPUTE_THRESHOLD 22
diff -r 66c54a9eb379 -r c173a930ec87 mpn/pa64/gmp-mparam.h
--- a/mpn/pa64/gmp-mparam.h Sat May 22 12:57:59 2010 +0200
+++ b/mpn/pa64/gmp-mparam.h Tue May 25 10:07:38 2010 +0200
@@ -28,35 +28,34 @@
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MUL_TOOM22_THRESHOLD 30
-#define MUL_TOOM33_THRESHOLD 113
-#define MUL_TOOM44_THRESHOLD 195
+#define MUL_TOOM22_THRESHOLD 31
+#define MUL_TOOM33_THRESHOLD 114
+#define MUL_TOOM44_THRESHOLD 179
#define MUL_TOOM6H_THRESHOLD 222
-#define MUL_TOOM8H_THRESHOLD 236
+#define MUL_TOOM8H_THRESHOLD 296
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 130
#define MUL_TOOM32_TO_TOOM53_THRESHOLD 229
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 129
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 54
-#define SQR_BASECASE_THRESHOLD 4
-#define SQR_TOOM2_THRESHOLD 54
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 56
#define SQR_TOOM3_THRESHOLD 169
#define SQR_TOOM4_THRESHOLD 280
-#define SQR_TOOM6_THRESHOLD 280
-#define SQR_TOOM8_THRESHOLD 296
+#define SQR_TOOM6_THRESHOLD 282
+#define SQR_TOOM8_THRESHOLD 309
-#define MULMOD_BNM1_THRESHOLD 15
-#define SQRMOD_BNM1_THRESHOLD 17
+#define MULMOD_BNM1_THRESHOLD 16
+#define SQRMOD_BNM1_THRESHOLD 19
#define MUL_FFT_MODF_THRESHOLD 336 /* k = 5 */
#define MUL_FFT_TABLE3 \
@@ -197,34 +196,34 @@
#define SQR_FFT_THRESHOLD 1856
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 125
-#define MULLO_MUL_N_THRESHOLD 4658
+#define MULLO_DC_THRESHOLD 133
+#define MULLO_MUL_N_THRESHOLD 4292
-#define DC_DIV_QR_THRESHOLD 123
-#define DC_DIVAPPR_Q_THRESHOLD 372
-#define DC_BDIV_QR_THRESHOLD 142
-#define DC_BDIV_Q_THRESHOLD 309
+#define DC_DIV_QR_THRESHOLD 140
+#define DC_DIVAPPR_Q_THRESHOLD 422
+#define DC_BDIV_QR_THRESHOLD 150
+#define DC_BDIV_Q_THRESHOLD 351
-#define INV_MULMOD_BNM1_THRESHOLD 56
-#define INV_NEWTON_THRESHOLD 315
-#define INV_APPR_THRESHOLD 318
+#define INV_MULMOD_BNM1_THRESHOLD 60
+#define INV_NEWTON_THRESHOLD 348
+#define INV_APPR_THRESHOLD 324
#define BINV_NEWTON_THRESHOLD 363
-#define REDC_1_TO_REDC_N_THRESHOLD 102
+#define REDC_1_TO_REDC_N_THRESHOLD 101
-#define MU_DIV_QR_THRESHOLD 979
-#define MU_DIVAPPR_Q_THRESHOLD 998
-#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 942
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 1142
+#define MUPI_DIV_QR_THRESHOLD 110
+#define MU_BDIV_QR_THRESHOLD 889
#define MU_BDIV_Q_THRESHOLD 1334
#define MATRIX22_STRASSEN_THRESHOLD 9
-#define HGCD_THRESHOLD 240
-#define GCD_DC_THRESHOLD 689
-#define GCDEXT_DC_THRESHOLD 538
+#define HGCD_THRESHOLD 242
+#define GCD_DC_THRESHOLD 1341
+#define GCDEXT_DC_THRESHOLD 545
#define JACOBI_BASE_METHOD 2
#define GET_STR_DC_THRESHOLD 21
#define GET_STR_PRECOMPUTE_THRESHOLD 24
-#define SET_STR_DC_THRESHOLD 1951
-#define SET_STR_PRECOMPUTE_THRESHOLD 4034
+#define SET_STR_DC_THRESHOLD 2008
+#define SET_STR_PRECOMPUTE_THRESHOLD 4066
diff -r 66c54a9eb379 -r c173a930ec87 mpn/powerpc32/lshift.asm
--- a/mpn/powerpc32/lshift.asm Sat May 22 12:57:59 2010 +0200
+++ b/mpn/powerpc32/lshift.asm Tue May 25 10:07:38 2010 +0200
@@ -38,7 +38,7 @@
ASM_START()
PROLOGUE(mpn_lshift)
- cmpwi cr0, r5, 12 C more than 12 limbs?
+ cmpwi cr0, r5, 30 C more than 30 limbs?
slwi r0, r5, 2
add r4, r4, r0 C make r4 point at end of s1
add r7, r3, r0 C make r7 point at end of res
@@ -153,4 +153,4 @@
stw r12, -20(r7)
lmw r24, -32(r1) C restore registers
blr
-EPILOGUE(mpn_lshift)
+EPILOGUE()
diff -r 66c54a9eb379 -r c173a930ec87 mpn/powerpc32/lshiftc.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc32/lshiftc.asm Tue May 25 10:07:38 2010 +0200
@@ -0,0 +1,158 @@
+dnl PowerPC-32 mpn_lshiftc.
+
+dnl Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 3.0
+C 75x (G3): 3.0
+C 7400,7410 (G4): 3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970: 2.5
+C power5: 2.5
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C cnt r6
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ cmpwi cr0, r5, 30 C more than 30 limbs?
+ slwi r0, r5, 2
+ add r4, r4, r0 C make r4 point at end of s1
+ add r7, r3, r0 C make r7 point at end of res
+ bgt L(BIG) C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8, r6, 32
+ lwzu r11, -4(r4) C load first s1 limb
+ srw r3, r11, r8 C compute function return value
+ bdz L(end1)
+
+L(oop): lwzu r10, -4(r4)
+ slw r9, r11, r6
+ srw r12, r10, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdz L(end2)
+ lwzu r11, -4(r4)
+ slw r9, r10, r6
+ srw r12, r11, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdnz L(oop)
+
+L(end1):
+ slw r0, r11, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+L(end2):
+ slw r0, r10, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+
+L(BIG):
+ stmw r24, -32(r1) C save registers we are supposed to preserve
+ lwzu r9, -4(r4)
+ subfic r8, r6, 32
+ srw r3, r9, r8 C compute function return value
+ slw r0, r9, r6
+ addi r5, r5, -1
+
+ andi. r10, r5, 3 C count for spill loop
+ beq L(e)
+ mtctr r10
+ lwzu r28, -4(r4)
+ bdz L(xe0)
+
+L(loop0):
+ slw r12, r28, r6
+ srw r24, r28, r8
+ lwzu r28, -4(r4)
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+ bdnz L(loop0) C taken at most once!
+
+L(xe0): slw r12, r28, r6
+ srw r24, r28, r8
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+
+L(e): srwi r5, r5, 2 C count for unrolled loop
+ addi r5, r5, -1
+ mtctr r5
+ lwz r28, -4(r4)
More information about the gmp-commit
mailing list