[Gmp-commit] /home/hgfiles/gmp: 9 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Mar 12 09:06:13 CET 2010


details:   /home/hgfiles/gmp/rev/3ae8c8fd84f1
changeset: 13482:3ae8c8fd84f1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Mar 06 12:37:51 2010 +0100
description:
(TESTS_REPS): Fix typo.

details:   /home/hgfiles/gmp/rev/2ca60933dcfb
changeset: 13483:2ca60933dcfb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Mar 06 13:16:17 2010 +0100
description:
Update x86_64 mod_1_* code.

details:   /home/hgfiles/gmp/rev/85b9f18acff7
changeset: 13484:85b9f18acff7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 07 20:30:01 2010 +0100
description:
(routine): Force r argument for several mod_1 calls.

details:   /home/hgfiles/gmp/rev/7f4795eff9b2
changeset: 13485:7f4795eff9b2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 10 01:52:28 2010 +0100
description:
Natively support ia64 mpn_mod_34lsub1.

details:   /home/hgfiles/gmp/rev/c6acc4c0fd57
changeset: 13486:c6acc4c0fd57
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 10 01:54:25 2010 +0100
description:
Natively support alpha/ev6 mpn_mod_1s_4p.

details:   /home/hgfiles/gmp/rev/d9718b727678
changeset: 13487:d9718b727678
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 10 01:57:14 2010 +0100
description:
Clean up cycle count tables.

details:   /home/hgfiles/gmp/rev/deedad3e677e
changeset: 13488:deedad3e677e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 10 10:47:23 2010 +0100
description:
Misc x86_64 mpn_divrem_1 improvements.

details:   /home/hgfiles/gmp/rev/4ea7826d6dc5
changeset: 13489:4ea7826d6dc5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Mar 10 10:49:35 2010 +0100
description:
Trivial merge.

details:   /home/hgfiles/gmp/rev/c10243298cf0
changeset: 13490:c10243298cf0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 11 22:32:33 2010 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                          |   55 ++++++
 doc/gmp.texi                       |   12 +-
 mpn/alpha/ev6/mod_1_4.asm          |  336 +++++++++++++++++++++++++++++++++++++
 mpn/generic/gcd_lehmer.c           |   12 +-
 mpn/generic/jacbase.c              |   66 +++++++-
 mpn/ia64/mod_34lsub1.asm           |  222 ++++++++++++++++++++++++
 mpn/x86_64/addaddmul_1msb0.asm     |   10 +-
 mpn/x86_64/addmul_2.asm            |   12 +-
 mpn/x86_64/aorrlsh1_n.asm          |   13 +-
 mpn/x86_64/aorrlsh2_n.asm          |   13 +-
 mpn/x86_64/aorrlsh_n.asm           |   11 +-
 mpn/x86_64/aors_n.asm              |   13 +-
 mpn/x86_64/aorsmul_1.asm           |   13 +-
 mpn/x86_64/atom/aors_n.asm         |   12 +-
 mpn/x86_64/bdiv_dbm1c.asm          |   13 +-
 mpn/x86_64/bdiv_q_1.asm            |   13 +-
 mpn/x86_64/com.asm                 |   11 +-
 mpn/x86_64/copyd.asm               |   12 +-
 mpn/x86_64/copyi.asm               |   11 +-
 mpn/x86_64/core2/aors_n.asm        |   12 +-
 mpn/x86_64/core2/aorslsh1_n.asm    |   11 +-
 mpn/x86_64/core2/aorsmul_1.asm     |   12 +-
 mpn/x86_64/core2/divrem_1.asm      |   67 +++---
 mpn/x86_64/core2/lshift.asm        |   12 +-
 mpn/x86_64/core2/lshiftc.asm       |   12 +-
 mpn/x86_64/core2/rshift.asm        |   12 +-
 mpn/x86_64/dive_1.asm              |   13 +-
 mpn/x86_64/divrem_1.asm            |   98 +++++-----
 mpn/x86_64/divrem_2.asm            |   11 +-
 mpn/x86_64/invert_limb.asm         |   13 +-
 mpn/x86_64/logops_n.asm            |   11 +-
 mpn/x86_64/lshift.asm              |   12 +-
 mpn/x86_64/lshiftc.asm             |   12 +-
 mpn/x86_64/lshsub_n.asm            |   11 +-
 mpn/x86_64/mod_1_1.asm             |  181 +++++++++++++++++++
 mpn/x86_64/mod_1_2.asm             |  215 +++++++++++++++++++++++
 mpn/x86_64/mod_1_4.asm             |   13 +-
 mpn/x86_64/mod_34lsub1.asm         |   12 +-
 mpn/x86_64/mode1o.asm              |   13 +-
 mpn/x86_64/mul_1.asm               |   13 +-
 mpn/x86_64/mul_2.asm               |   12 +-
 mpn/x86_64/mul_basecase.asm        |   11 +-
 mpn/x86_64/pentium4/aors_n.asm     |   11 +-
 mpn/x86_64/pentium4/aorslsh1_n.asm |   11 +-
 mpn/x86_64/pentium4/lshift.asm     |   12 +-
 mpn/x86_64/pentium4/rshift.asm     |   12 +-
 mpn/x86_64/popham.asm              |   11 +-
 mpn/x86_64/redc_1.asm              |   12 +-
 mpn/x86_64/rsh1aors_n.asm          |   11 +-
 mpn/x86_64/rshift.asm              |   12 +-
 mpn/x86_64/sublsh1_n.asm           |   14 +-
 tests/mpz/t-bin.c                  |   71 +++++++-
 tests/mpz/t-jac.c                  |   62 ++++--
 tests/tests.h                      |    2 +-
 tune/Makefile.am                   |    2 +-
 tune/common.c                      |    5 +
 tune/jacbase4.c                    |   27 ++
 tune/speed.c                       |   13 +-
 tune/speed.h                       |    2 +
 tune/tuneup.c                      |   16 +-
 60 files changed, 1624 insertions(+), 331 deletions(-)

diffs (truncated from 2787 to 300 lines):

diff -r 419f6a4cc606 -r c10243298cf0 ChangeLog
--- a/ChangeLog	Wed Mar 03 20:10:08 2010 +0100
+++ b/ChangeLog	Thu Mar 11 22:32:33 2010 +0100
@@ -1,3 +1,58 @@
+2010-03-11  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/generic/gcd_lehmer.c (gcd_2): Use sub_ddmmss.
+
+	* mpn/generic/jacbase.c (mpn_jacobi_base): Reorganized the
+	JACOBI_BASE_METHOD 4 slightly. Now requires that b > 1.
+
+2010-03-10  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/divrem_1.asm: Make fraction code take documented # of
+	cycles.  Annotate code for more CPUs.  Misc cleanups.
+	* mpn/x86_64/core2/divrem_1.asm: Annotate code for more CPUs.
+
+	* mpn/alpha/ev6/mod_1_4.asm: New file.
+
+	* mpn/ia64/mod_34lsub1.asm: New file.
+
+	* doc/gmp.texi (Language Bindings): Update Python site, add Ruby.
+
+2010-03-10  Niels Möller  <nisse at lysator.liu.se>
+
+	* tune/tuneup.c (tune_jacobi_base): Consider mpn_jacobi_base_4.
+	* tune/speed.c (routine): Added mpn_jacobi_base_4.
+	* tune/common.c (speed_mpn_jacobi_base_4): New function.
+	* tune/speed.h (speed_mpn_jacobi_base_4): Declare it.
+	* tune/Makefile.am (libspeed_la_SOURCES): Added jacbase4.c.
+	* tune/jacbase4.c: New file.
+
+	* mpn/generic/jacbase.c (mpn_jacobi_base): New function, for
+	JACOBI_BASE_METHOD 4.
+
+2010-03-09  Niels Möller  <nisse at lysator.liu.se>
+
+	* tests/mpz/t-jac.c (check_large_quotients): Also generate inputs
+	with large quotients and a large gcd.
+
+2010-03-09 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* tests/mpz/t-bin.c (randomwalk): New test-generator function.
+
+2010-03-07  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/speed.c (routine): Force r argument for several mod_1 calls.
+
+2010-03-06  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/divrem_1.asm: Disable SPECIAL_CODE_FOR_NORMALIZED_DIVISOR.
+	Misc clean up.
+
+	* mpn/x86_64/mod_1_1.asm: New file.
+	* mpn/x86_64/mod_1_2.asm: New file.
+	* mpn/x86_64/mod_1_4.asm: Update cycle counts.
+
+	* tests/tests.h (TESTS_REPS): Fix typo.
+
 2010-03-03  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/core2/divrem_1.asm: New file.
diff -r 419f6a4cc606 -r c10243298cf0 doc/gmp.texi
--- a/doc/gmp.texi	Wed Mar 03 20:10:08 2010 +0100
+++ b/doc/gmp.texi	Thu Mar 11 22:32:33 2010 +0100
@@ -1789,7 +1789,7 @@
 
 will generate better contents for the @file{gmp-mparam.h} parameter file.
 
-To use the results, put the output in the file file indicated in the
+To use the results, put the output in the file indicated in the
 @samp{Parameters for ...} header.  Then recompile from scratch.
 
 The @code{tuneup} program takes one useful parameter, @samp{-f NNN}, which
@@ -7319,9 +7319,13 @@
 @item Python
 @itemize @bullet
 @item
-mpz module in the standard distribution, @uref{http://www.python.org/}
- at item
-GMPY @uref{http://gmpy.sourceforge.net/}
+GMPY @uref{http://code.google.com/p/gmpy/}
+ at end itemize
+
+ at item Ruby
+ at itemize @bullet
+ at item
+http://rubygems.org/gems/gmp
 @end itemize
 
 @item Scheme
diff -r 419f6a4cc606 -r c10243298cf0 mpn/alpha/ev6/mod_1_4.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/alpha/ev6/mod_1_4.asm	Thu Mar 11 22:32:33 2010 +0100
@@ -0,0 +1,336 @@
+dnl Alpha mpn_mod_1s_4p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C  * Optimise.  2.75 c/l should be possible.
+C  * Write a proper mpn_mod_1s_4p_cps.  The code below was compiler generated.
+C  * Make mpn_mod_1s_4p_cps work for ev4-ev5.
+C  * Optimise feed-in code, starting the sw pipeline in switch code.
+C  * Use fewer registers.  Use r28 and r27.
+C  * If we cannot reduce register usage, write perhaps small-n basecase.
+C  * Does it work for PIC?
+
+C      cycles/limb
+C EV4:     ?
+C EV5:    23
+C EV6:     3
+
+define(`ap',     `r16')
+define(`n',      `r17')
+define(`pl',     `r24')
+define(`ph',     `r25')
+define(`rl',     `r6')
+define(`rh',     `r7')
+define(`B1modb', `r1')
+define(`B2modb', `r2')
+define(`B3modb', `r3')
+define(`B4modb', `r4')
+define(`B5modb', `r5')
+
+ASM_START()
+	.arch	ev56
+
+PROLOGUE(mpn_mod_1s_4p)
+	lda	r30, -80(r30)
+	stq	r9, 8(r30)
+	stq	r10, 16(r30)
+	stq	r11, 24(r30)
+	stq	r12, 32(r30)
+	stq	r13, 40(r30)
+	stq	r14, 48(r30)
+	stq	r15, 56(r30)
+	s8addq	n, ap, ap		C point ap at vector end
+
+	ldq	B1modb, 16(r19)
+	ldq	B2modb, 24(r19)
+	ldq	B3modb, 32(r19)
+	ldq	B4modb, 40(r19)
+	ldq	B5modb, 48(r19)
+
+	and	n, 3, r0
+	lda	n, -4(n)
+	beq	r0, L(b0)
+	lda	r6, -2(r0)
+	blt	r6, L(b1)
+	beq	r6, L(b2)
+
+L(b3):	ldq	r21, -16(ap)
+	ldq	r22, -8(ap)
+	ldq	r20, -24(ap)
+	mulq	r21, B1modb, r8
+	umulh	r21, B1modb, r12
+	mulq	r22, B2modb, r9
+	umulh	r22, B2modb, r13
+	addq	r8, r20, pl
+	cmpult	pl, r8, r0
+	addq	r0, r12, ph
+	addq	r9, pl, rl
+	cmpult	rl, r9, r0
+	addq	r13, ph, ph
+	addq	r0, ph, rh
+	lda	ap, -56(ap)
+	br	L(com)
+
+L(b0):	ldq	r21, -24(ap)
+	ldq	r22, -16(ap)
+	ldq	r23, -8(ap)
+	ldq	r20, -32(ap)
+	mulq	r21, B1modb, r8
+	umulh	r21, B1modb, r12
+	mulq	r22, B2modb, r9
+	umulh	r22, B2modb, r13
+	mulq	r23, B3modb, r10
+	umulh	r23, B3modb, r14
+	addq	r8, r20, pl
+	cmpult	pl, r8, r0
+	addq	r0, r12, ph
+	addq	r9, pl, pl
+	cmpult	pl, r9, r0
+	addq	r13, ph, ph
+	addq	r0, ph, ph
+	addq	r10, pl, rl
+	cmpult	rl, r10, r0
+	addq	r14, ph, ph
+	addq	r0, ph, rh
+	lda	ap, -64(ap)
+	br	L(com)
+
+L(b1):	bis	r31, r31, rh
+	ldq	rl, -8(ap)
+	lda	ap, -40(ap)
+	br	L(com)
+
+L(b2):	ldq	r21, -8(ap)
+	ldq	r20, -16(ap)
+	mulq	r21, B1modb, r8
+	umulh	r21, B1modb, r12
+	addq	r8, r20, rl
+	cmpult	rl, r8, r0
+	addq	r0, r12, rh
+	lda	ap, -48(ap)
+
+L(com):	ble	n, L(ed3)
+	ldq	r21, 8(ap)
+	ldq	r22, 16(ap)
+	ldq	r23, 24(ap)
+	ldq	r20, 0(ap)
+	lda	n, -4(n)
+	lda	ap, -32(ap)
+	mulq	r21, B1modb, r8
+	umulh	r21, B1modb, r12
+	mulq	r22, B2modb, r9
+	umulh	r22, B2modb, r13
+	mulq	r23, B3modb, r10
+	umulh	r23, B3modb, r14
+	mulq	rl, B4modb, r11
+	umulh	rl, B4modb, r15
+	ble	n, L(ed2)
+
+	ALIGN(16)
+L(top):	ldq	r21, 8(ap)
+	mulq	rh, B5modb, rl
+	addq	r8, r20, pl
+	ldq	r22, 16(ap)
+	cmpult	pl, r8, r0
+	umulh	rh, B5modb, rh
+	ldq	r23, 24(ap)
+	addq	r0, r12, ph
+	addq	r9, pl, pl
+	mulq	r21, B1modb, r8
+	cmpult	pl, r9, r0
+	addq	r13, ph, ph
+	umulh	r21, B1modb, r12
+	lda	ap, -32(ap)
+	addq	r0, ph, ph
+	addq	r10, pl, pl
+	mulq	r22, B2modb, r9
+	cmpult	pl, r10, r0
+	addq	r14, ph, ph
+	addq	r11, pl, pl
+	umulh	r22, B2modb, r13
+	addq	r0, ph, ph
+	cmpult	pl, r11, r0
+	addq	r15, ph, ph
+	mulq	r23, B3modb, r10
+	ldq	r20, 32(ap)
+	addq	pl, rl, rl
+	umulh	r23, B3modb, r14
+	addq	r0, ph, ph
+	cmpult	rl, pl, r0
+	mulq	rl, B4modb, r11
+	addq	ph, rh, rh
+	umulh	rl, B4modb, r15
+	addq	r0, rh, rh
+	lda	n, -4(n)
+	bgt	n, L(top)
+
+L(ed2):	mulq	rh, B5modb, rl
+	addq	r8, r20, pl
+	umulh	rh, B5modb, rh
+	cmpult	pl, r8, r0
+	addq	r0, r12, ph
+	addq	r9, pl, pl
+	cmpult	pl, r9, r0
+	addq	r13, ph, ph
+	addq	r0, ph, ph
+	addq	r10, pl, pl
+	cmpult	pl, r10, r0
+	addq	r14, ph, ph
+	addq	r11, pl, pl
+	addq	r0, ph, ph
+	cmpult	pl, r11, r0
+	addq	r15, ph, ph
+	addq	pl, rl, rl
+	addq	r0, ph, ph
+	cmpult	rl, pl, r0


More information about the gmp-commit mailing list