[Gmp-commit] /home/hgfiles/gmp: 7 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat May 29 13:30:31 CEST 2010


details:   /home/hgfiles/gmp/rev/159e1af98b99
changeset: 13644:159e1af98b99
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 24 10:21:57 2010 +0200
description:
New file.

details:   /home/hgfiles/gmp/rev/35ae141984e7
changeset: 13645:35ae141984e7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 24 10:22:26 2010 +0200
description:
Adjust cutoff point.

details:   /home/hgfiles/gmp/rev/84878e79d86a
changeset: 13646:84878e79d86a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 24 10:23:45 2010 +0200
description:
Retune.

details:   /home/hgfiles/gmp/rev/befa7ebe1ab7
changeset: 13647:befa7ebe1ab7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 24 17:02:54 2010 +0200
description:
Updates for stable MUPI_DIV_QR_THRESHOLD measuring.

details:   /home/hgfiles/gmp/rev/7fcbcaf306f1
changeset: 13648:7fcbcaf306f1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 24 19:03:12 2010 +0200
description:
Retune, in particular JACOBI_BASE_METHOD.

details:   /home/hgfiles/gmp/rev/2541bef9e382
changeset: 13649:2541bef9e382
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue May 25 10:07:20 2010 +0200
description:
Trim space.

details:   /home/hgfiles/gmp/rev/c173a930ec87
changeset: 13650:c173a930ec87
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue May 25 10:07:38 2010 +0200
description:
Retune.

diffstat:

 ChangeLog                            |   15 ++
 gmp-impl.h                           |    2 +
 mpn/generic/mu_div_qr.c              |    9 +
 mpn/ia64/gmp-mparam.h                |    2 +-
 mpn/pa64/gmp-mparam.h                |   67 +++++-----
 mpn/powerpc32/lshift.asm             |    4 +-
 mpn/powerpc32/lshiftc.asm            |  158 ++++++++++++++++++++++++
 mpn/powerpc32/rshift.asm             |    4 +-
 mpn/powerpc64/mode64/p6/gmp-mparam.h |   16 +-
 mpn/x86/k7/gmp-mparam.h              |    4 +-
 mpn/x86/p6/mmx/gmp-mparam.h          |   12 +-
 mpn/x86_64/atom/gmp-mparam.h         |   34 ++--
 mpn/x86_64/core2/gmp-mparam.h        |    8 +-
 mpn/x86_64/corei/gmp-mparam.h        |  187 ++++++++++------------------
 mpn/x86_64/gmp-mparam.h              |    2 +-
 mpn/x86_64/pentium4/gmp-mparam.h     |  227 ++++++++++++----------------------
 tune/common.c                        |    2 +-
 tune/speed.h                         |    2 +-
 18 files changed, 406 insertions(+), 349 deletions(-)

diffs (truncated from 1097 to 300 lines):

diff -r 66c54a9eb379 -r c173a930ec87 ChangeLog
--- a/ChangeLog	Sat May 22 12:57:59 2010 +0200
+++ b/ChangeLog	Tue May 25 10:07:38 2010 +0200
@@ -1,3 +1,18 @@
+2010-05-25  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): Trim out space
+	for inverse, since that is passed in already.
+
+2010-05-24  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): New function.
+	* gmp-impl.h: Declare it.
+	* tune/common.c (speed_mpn_mupi_div_qr): Use new itch function.
+	* tune/speed.h (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Pass parameters right
+	for new itch function.
+
+	* mpn/powerpc32/lshiftc.asm: New file.
+
 2010-05-22  Torbjorn Granlund  <tege at gmplib.org>
 
 	* tune/tuneup.c (tune_mod_1): Revert to version of 2010-05-06.
diff -r 66c54a9eb379 -r c173a930ec87 gmp-impl.h
--- a/gmp-impl.h	Sat May 22 12:57:59 2010 +0200
+++ b/gmp-impl.h	Tue May 25 10:07:38 2010 +0200
@@ -1208,6 +1208,8 @@
 
 #define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+#define   mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
+__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
 
 #define   mpn_mu_divappr_q __MPN(mu_divappr_q)
 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
diff -r 66c54a9eb379 -r c173a930ec87 mpn/generic/mu_div_qr.c
--- a/mpn/generic/mu_div_qr.c	Sat May 22 12:57:59 2010 +0200
+++ b/mpn/generic/mu_div_qr.c	Tue May 25 10:07:38 2010 +0200
@@ -394,3 +394,12 @@
 
   return in + itch_local + itch_out;
 }
+
+mp_size_t
+mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
+{
+  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
+  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+
+  return itch_local + itch_out;
+}
diff -r 66c54a9eb379 -r c173a930ec87 mpn/ia64/gmp-mparam.h
--- a/mpn/ia64/gmp-mparam.h	Sat May 22 12:57:59 2010 +0200
+++ b/mpn/ia64/gmp-mparam.h	Tue May 25 10:07:38 2010 +0200
@@ -199,7 +199,7 @@
 #define HGCD_THRESHOLD                     119
 #define GCD_DC_THRESHOLD                   588
 #define GCDEXT_DC_THRESHOLD                469
-#define JACOBI_BASE_METHOD                   1
+#define JACOBI_BASE_METHOD                   4
 
 #define GET_STR_DC_THRESHOLD                14
 #define GET_STR_PRECOMPUTE_THRESHOLD        22
diff -r 66c54a9eb379 -r c173a930ec87 mpn/pa64/gmp-mparam.h
--- a/mpn/pa64/gmp-mparam.h	Sat May 22 12:57:59 2010 +0200
+++ b/mpn/pa64/gmp-mparam.h	Tue May 25 10:07:38 2010 +0200
@@ -28,35 +28,34 @@
 #define MOD_1_NORM_THRESHOLD                 0  /* always */
 #define MOD_1_UNNORM_THRESHOLD               0  /* always */
 #define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
 #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   0  /* always */
 #define DIVEXACT_1_THRESHOLD                 0  /* always */
 #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
 
-#define MUL_TOOM22_THRESHOLD                30
-#define MUL_TOOM33_THRESHOLD               113
-#define MUL_TOOM44_THRESHOLD               195
+#define MUL_TOOM22_THRESHOLD                31
+#define MUL_TOOM33_THRESHOLD               114
+#define MUL_TOOM44_THRESHOLD               179
 #define MUL_TOOM6H_THRESHOLD               222
-#define MUL_TOOM8H_THRESHOLD               236
+#define MUL_TOOM8H_THRESHOLD               296
 
 #define MUL_TOOM32_TO_TOOM43_THRESHOLD     130
 #define MUL_TOOM32_TO_TOOM53_THRESHOLD     229
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     132
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     129
 #define MUL_TOOM42_TO_TOOM63_THRESHOLD      54
 
-#define SQR_BASECASE_THRESHOLD               4
-#define SQR_TOOM2_THRESHOLD                 54
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 56
 #define SQR_TOOM3_THRESHOLD                169
 #define SQR_TOOM4_THRESHOLD                280
-#define SQR_TOOM6_THRESHOLD                280
-#define SQR_TOOM8_THRESHOLD                296
+#define SQR_TOOM6_THRESHOLD                282
+#define SQR_TOOM8_THRESHOLD                309
 
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               17
+#define MULMOD_BNM1_THRESHOLD               16
+#define SQRMOD_BNM1_THRESHOLD               19
 
 #define MUL_FFT_MODF_THRESHOLD             336  /* k = 5 */
 #define MUL_FFT_TABLE3                                      \
@@ -197,34 +196,34 @@
 #define SQR_FFT_THRESHOLD                 1856
 
 #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 125
-#define MULLO_MUL_N_THRESHOLD             4658
+#define MULLO_DC_THRESHOLD                 133
+#define MULLO_MUL_N_THRESHOLD             4292
 
-#define DC_DIV_QR_THRESHOLD                123
-#define DC_DIVAPPR_Q_THRESHOLD             372
-#define DC_BDIV_QR_THRESHOLD               142
-#define DC_BDIV_Q_THRESHOLD                309
+#define DC_DIV_QR_THRESHOLD                140
+#define DC_DIVAPPR_Q_THRESHOLD             422
+#define DC_BDIV_QR_THRESHOLD               150
+#define DC_BDIV_Q_THRESHOLD                351
 
-#define INV_MULMOD_BNM1_THRESHOLD           56
-#define INV_NEWTON_THRESHOLD               315
-#define INV_APPR_THRESHOLD                 318
+#define INV_MULMOD_BNM1_THRESHOLD           60
+#define INV_NEWTON_THRESHOLD               348
+#define INV_APPR_THRESHOLD                 324
 
 #define BINV_NEWTON_THRESHOLD              363
-#define REDC_1_TO_REDC_N_THRESHOLD         102
+#define REDC_1_TO_REDC_N_THRESHOLD         101
 
-#define MU_DIV_QR_THRESHOLD                979
-#define MU_DIVAPPR_Q_THRESHOLD             998
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD               942
+#define MU_DIV_QR_THRESHOLD                998
+#define MU_DIVAPPR_Q_THRESHOLD            1142
+#define MUPI_DIV_QR_THRESHOLD              110
+#define MU_BDIV_QR_THRESHOLD               889
 #define MU_BDIV_Q_THRESHOLD               1334
 
 #define MATRIX22_STRASSEN_THRESHOLD          9
-#define HGCD_THRESHOLD                     240
-#define GCD_DC_THRESHOLD                   689
-#define GCDEXT_DC_THRESHOLD                538
+#define HGCD_THRESHOLD                     242
+#define GCD_DC_THRESHOLD                  1341
+#define GCDEXT_DC_THRESHOLD                545
 #define JACOBI_BASE_METHOD                   2
 
 #define GET_STR_DC_THRESHOLD                21
 #define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD              1951
-#define SET_STR_PRECOMPUTE_THRESHOLD      4034
+#define SET_STR_DC_THRESHOLD              2008
+#define SET_STR_PRECOMPUTE_THRESHOLD      4066
diff -r 66c54a9eb379 -r c173a930ec87 mpn/powerpc32/lshift.asm
--- a/mpn/powerpc32/lshift.asm	Sat May 22 12:57:59 2010 +0200
+++ b/mpn/powerpc32/lshift.asm	Tue May 25 10:07:38 2010 +0200
@@ -38,7 +38,7 @@
 
 ASM_START()
 PROLOGUE(mpn_lshift)
-	cmpwi	cr0, r5, 12	C more than 12 limbs?
+	cmpwi	cr0, r5, 30	C more than 30 limbs?
 	slwi	r0, r5, 2
 	add	r4, r4, r0	C make r4 point at end of s1
 	add	r7, r3, r0	C make r7 point at end of res
@@ -153,4 +153,4 @@
 	stw	r12, -20(r7)
 	lmw	r24, -32(r1)	C restore registers
 	blr
-EPILOGUE(mpn_lshift)
+EPILOGUE()
diff -r 66c54a9eb379 -r c173a930ec87 mpn/powerpc32/lshiftc.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc32/lshiftc.asm	Tue May 25 10:07:38 2010 +0200
@@ -0,0 +1,158 @@
+dnl  PowerPC-32 mpn_lshiftc.
+
+dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            3.0
+C 75x (G3):        3.0
+C 7400,7410 (G4):  3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970:   2.5
+C power5:          2.5
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C n	r5
+C cnt	r6
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+	cmpwi	cr0, r5, 30	C more than 30 limbs?
+	slwi	r0, r5, 2
+	add	r4, r4, r0	C make r4 point at end of s1
+	add	r7, r3, r0	C make r7 point at end of res
+	bgt	L(BIG)		C branch if more than 12 limbs
+
+	mtctr	r5		C copy size into CTR
+	subfic	r8, r6, 32
+	lwzu	r11, -4(r4)	C load first s1 limb
+	srw	r3, r11, r8	C compute function return value
+	bdz	L(end1)
+
+L(oop):	lwzu	r10, -4(r4)
+	slw	r9, r11, r6
+	srw	r12, r10, r8
+	nor	r9, r9, r12
+	stwu	r9, -4(r7)
+	bdz	L(end2)
+	lwzu	r11, -4(r4)
+	slw	r9, r10, r6
+	srw	r12, r11, r8
+	nor	r9, r9, r12
+	stwu	r9, -4(r7)
+	bdnz	L(oop)
+
+L(end1):
+	slw	r0, r11, r6
+	nor	r0, r0, r0
+	stw	r0, -4(r7)
+	blr
+L(end2):
+	slw	r0, r10, r6
+	nor	r0, r0, r0
+	stw	r0, -4(r7)
+	blr
+
+L(BIG):
+	stmw	r24, -32(r1)	C save registers we are supposed to preserve
+	lwzu	r9, -4(r4)
+	subfic	r8, r6, 32
+	srw	r3, r9, r8	C compute function return value
+	slw	r0, r9, r6
+	addi	r5, r5, -1
+
+	andi.	r10, r5, 3	C count for spill loop
+	beq	L(e)
+	mtctr	r10
+	lwzu	r28, -4(r4)
+	bdz	L(xe0)
+
+L(loop0):
+	slw	r12, r28, r6
+	srw	r24, r28, r8
+	lwzu	r28, -4(r4)
+	nor	r24, r0, r24
+	stwu	r24, -4(r7)
+	mr	r0, r12
+	bdnz	L(loop0)	C taken at most once!
+
+L(xe0):	slw	r12, r28, r6
+	srw	r24, r28, r8
+	nor	r24, r0, r24
+	stwu	r24, -4(r7)
+	mr	r0, r12
+
+L(e):	srwi	r5, r5, 2	C count for unrolled loop
+	addi	r5, r5, -1
+	mtctr	r5
+	lwz	r28, -4(r4)


More information about the gmp-commit mailing list