[Gmp-commit] /home/hgfiles/gmp: 9 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Mar 12 09:06:13 CET 2010
details: /home/hgfiles/gmp/rev/3ae8c8fd84f1
changeset: 13482:3ae8c8fd84f1
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Mar 06 12:37:51 2010 +0100
description:
(TESTS_REPS): Fix typo.
details: /home/hgfiles/gmp/rev/2ca60933dcfb
changeset: 13483:2ca60933dcfb
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Mar 06 13:16:17 2010 +0100
description:
Update x86_64 mod_1_* code.
details: /home/hgfiles/gmp/rev/85b9f18acff7
changeset: 13484:85b9f18acff7
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Mar 07 20:30:01 2010 +0100
description:
(routine): Force r argument for several mod_1 calls.
details: /home/hgfiles/gmp/rev/7f4795eff9b2
changeset: 13485:7f4795eff9b2
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 10 01:52:28 2010 +0100
description:
Natively support ia64 mpn_mod_34lsub1.
details: /home/hgfiles/gmp/rev/c6acc4c0fd57
changeset: 13486:c6acc4c0fd57
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 10 01:54:25 2010 +0100
description:
Natively support alpha/ev6 mpn_mod_1s_4p.
details: /home/hgfiles/gmp/rev/d9718b727678
changeset: 13487:d9718b727678
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 10 01:57:14 2010 +0100
description:
Clean up cycle count tables.
details: /home/hgfiles/gmp/rev/deedad3e677e
changeset: 13488:deedad3e677e
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 10 10:47:23 2010 +0100
description:
Misc x86_64 mpn_divrem_1 improvements.
details: /home/hgfiles/gmp/rev/4ea7826d6dc5
changeset: 13489:4ea7826d6dc5
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Mar 10 10:49:35 2010 +0100
description:
Trivial merge.
details: /home/hgfiles/gmp/rev/c10243298cf0
changeset: 13490:c10243298cf0
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Mar 11 22:32:33 2010 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 55 ++++++
doc/gmp.texi | 12 +-
mpn/alpha/ev6/mod_1_4.asm | 336 +++++++++++++++++++++++++++++++++++++
mpn/generic/gcd_lehmer.c | 12 +-
mpn/generic/jacbase.c | 66 +++++++-
mpn/ia64/mod_34lsub1.asm | 222 ++++++++++++++++++++++++
mpn/x86_64/addaddmul_1msb0.asm | 10 +-
mpn/x86_64/addmul_2.asm | 12 +-
mpn/x86_64/aorrlsh1_n.asm | 13 +-
mpn/x86_64/aorrlsh2_n.asm | 13 +-
mpn/x86_64/aorrlsh_n.asm | 11 +-
mpn/x86_64/aors_n.asm | 13 +-
mpn/x86_64/aorsmul_1.asm | 13 +-
mpn/x86_64/atom/aors_n.asm | 12 +-
mpn/x86_64/bdiv_dbm1c.asm | 13 +-
mpn/x86_64/bdiv_q_1.asm | 13 +-
mpn/x86_64/com.asm | 11 +-
mpn/x86_64/copyd.asm | 12 +-
mpn/x86_64/copyi.asm | 11 +-
mpn/x86_64/core2/aors_n.asm | 12 +-
mpn/x86_64/core2/aorslsh1_n.asm | 11 +-
mpn/x86_64/core2/aorsmul_1.asm | 12 +-
mpn/x86_64/core2/divrem_1.asm | 67 +++---
mpn/x86_64/core2/lshift.asm | 12 +-
mpn/x86_64/core2/lshiftc.asm | 12 +-
mpn/x86_64/core2/rshift.asm | 12 +-
mpn/x86_64/dive_1.asm | 13 +-
mpn/x86_64/divrem_1.asm | 98 +++++-----
mpn/x86_64/divrem_2.asm | 11 +-
mpn/x86_64/invert_limb.asm | 13 +-
mpn/x86_64/logops_n.asm | 11 +-
mpn/x86_64/lshift.asm | 12 +-
mpn/x86_64/lshiftc.asm | 12 +-
mpn/x86_64/lshsub_n.asm | 11 +-
mpn/x86_64/mod_1_1.asm | 181 +++++++++++++++++++
mpn/x86_64/mod_1_2.asm | 215 +++++++++++++++++++++++
mpn/x86_64/mod_1_4.asm | 13 +-
mpn/x86_64/mod_34lsub1.asm | 12 +-
mpn/x86_64/mode1o.asm | 13 +-
mpn/x86_64/mul_1.asm | 13 +-
mpn/x86_64/mul_2.asm | 12 +-
mpn/x86_64/mul_basecase.asm | 11 +-
mpn/x86_64/pentium4/aors_n.asm | 11 +-
mpn/x86_64/pentium4/aorslsh1_n.asm | 11 +-
mpn/x86_64/pentium4/lshift.asm | 12 +-
mpn/x86_64/pentium4/rshift.asm | 12 +-
mpn/x86_64/popham.asm | 11 +-
mpn/x86_64/redc_1.asm | 12 +-
mpn/x86_64/rsh1aors_n.asm | 11 +-
mpn/x86_64/rshift.asm | 12 +-
mpn/x86_64/sublsh1_n.asm | 14 +-
tests/mpz/t-bin.c | 71 +++++++-
tests/mpz/t-jac.c | 62 ++++--
tests/tests.h | 2 +-
tune/Makefile.am | 2 +-
tune/common.c | 5 +
tune/jacbase4.c | 27 ++
tune/speed.c | 13 +-
tune/speed.h | 2 +
tune/tuneup.c | 16 +-
60 files changed, 1624 insertions(+), 331 deletions(-)
diffs (truncated from 2787 to 300 lines):
diff -r 419f6a4cc606 -r c10243298cf0 ChangeLog
--- a/ChangeLog Wed Mar 03 20:10:08 2010 +0100
+++ b/ChangeLog Thu Mar 11 22:32:33 2010 +0100
@@ -1,3 +1,58 @@
+2010-03-11 Niels Möller <nisse at lysator.liu.se>
+
+ * mpn/generic/gcd_lehmer.c (gcd_2): Use sub_ddmmss.
+
+ * mpn/generic/jacbase.c (mpn_jacobi_base): Reorganized the
+ JACOBI_BASE_METHOD 4 slightly. Now requires that b > 1.
+
+2010-03-10 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/divrem_1.asm: Make fraction code take documented # of
+ cycles. Annotate code for more CPUs. Misc cleanups.
+ * mpn/x86_64/core2/divrem_1.asm: Annotate code for more CPUs.
+
+ * mpn/alpha/ev6/mod_1_4.asm: New file.
+
+ * mpn/ia64/mod_34lsub1.asm: New file.
+
+ * doc/gmp.texi (Language Bindings): Update Python site, add Ruby.
+
+2010-03-10 Niels Möller <nisse at lysator.liu.se>
+
+ * tune/tuneup.c (tune_jacobi_base): Consider mpn_jacobi_base_4.
+ * tune/speed.c (routine): Added mpn_jacobi_base_4.
+ * tune/common.c (speed_mpn_jacobi_base_4): New function.
+ * tune/speed.h (speed_mpn_jacobi_base_4): Declare it.
+ * tune/Makefile.am (libspeed_la_SOURCES): Added jacbase4.c.
+ * tune/jacbase4.c: New file.
+
+ * mpn/generic/jacbase.c (mpn_jacobi_base): New function, for
+ JACOBI_BASE_METHOD 4.
+
+2010-03-09 Niels Möller <nisse at lysator.liu.se>
+
+ * tests/mpz/t-jac.c (check_large_quotients): Also generate inputs
+ with large quotients and a large gcd.
+
+2010-03-09 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * tests/mpz/t-bin.c (randomwalk): New test-generator function.
+
+2010-03-07 Torbjorn Granlund <tege at gmplib.org>
+
+ * tune/speed.c (routine): Force r argument for several mod_1 calls.
+
+2010-03-06 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/divrem_1.asm: Disable SPECIAL_CODE_FOR_NORMALIZED_DIVISOR.
+ Misc clean up.
+
+ * mpn/x86_64/mod_1_1.asm: New file.
+ * mpn/x86_64/mod_1_2.asm: New file.
+ * mpn/x86_64/mod_1_4.asm: Update cycle counts.
+
+ * tests/tests.h (TESTS_REPS): Fix typo.
+
2010-03-03 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/core2/divrem_1.asm: New file.
diff -r 419f6a4cc606 -r c10243298cf0 doc/gmp.texi
--- a/doc/gmp.texi Wed Mar 03 20:10:08 2010 +0100
+++ b/doc/gmp.texi Thu Mar 11 22:32:33 2010 +0100
@@ -1789,7 +1789,7 @@
will generate better contents for the @file{gmp-mparam.h} parameter file.
-To use the results, put the output in the file file indicated in the
+To use the results, put the output in the file indicated in the
@samp{Parameters for ...} header. Then recompile from scratch.
The @code{tuneup} program takes one useful parameter, @samp{-f NNN}, which
@@ -7319,9 +7319,13 @@
@item Python
@itemize @bullet
@item
-mpz module in the standard distribution, @uref{http://www.python.org/}
- at item
-GMPY @uref{http://gmpy.sourceforge.net/}
+GMPY @uref{http://code.google.com/p/gmpy/}
+ at end itemize
+
+ at item Ruby
+ at itemize @bullet
+ at item
+http://rubygems.org/gems/gmp
@end itemize
@item Scheme
diff -r 419f6a4cc606 -r c10243298cf0 mpn/alpha/ev6/mod_1_4.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/alpha/ev6/mod_1_4.asm Thu Mar 11 22:32:33 2010 +0100
@@ -0,0 +1,336 @@
+dnl Alpha mpn_mod_1s_4p
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2009 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C * Optimise. 2.75 c/l should be possible.
+C * Write a proper mpn_mod_1s_4p_cps. The code below was compiler generated.
+C * Make mpn_mod_1s_4p_cps work for ev4-ev5.
+C * Optimise feed-in code, starting the sw pipeline in switch code.
+C * Use fewer registers. Use r28 and r27.
+C * If we cannot reduce register usage, write perhaps small-n basecase.
+C * Does it work for PIC?
+
+C cycles/limb
+C EV4: ?
+C EV5: 23
+C EV6: 3
+
+define(`ap', `r16')
+define(`n', `r17')
+define(`pl', `r24')
+define(`ph', `r25')
+define(`rl', `r6')
+define(`rh', `r7')
+define(`B1modb', `r1')
+define(`B2modb', `r2')
+define(`B3modb', `r3')
+define(`B4modb', `r4')
+define(`B5modb', `r5')
+
+ASM_START()
+ .arch ev56
+
+PROLOGUE(mpn_mod_1s_4p)
+ lda r30, -80(r30)
+ stq r9, 8(r30)
+ stq r10, 16(r30)
+ stq r11, 24(r30)
+ stq r12, 32(r30)
+ stq r13, 40(r30)
+ stq r14, 48(r30)
+ stq r15, 56(r30)
+ s8addq n, ap, ap C point ap at vector end
+
+ ldq B1modb, 16(r19)
+ ldq B2modb, 24(r19)
+ ldq B3modb, 32(r19)
+ ldq B4modb, 40(r19)
+ ldq B5modb, 48(r19)
+
+ and n, 3, r0
+ lda n, -4(n)
+ beq r0, L(b0)
+ lda r6, -2(r0)
+ blt r6, L(b1)
+ beq r6, L(b2)
+
+L(b3): ldq r21, -16(ap)
+ ldq r22, -8(ap)
+ ldq r20, -24(ap)
+ mulq r21, B1modb, r8
+ umulh r21, B1modb, r12
+ mulq r22, B2modb, r9
+ umulh r22, B2modb, r13
+ addq r8, r20, pl
+ cmpult pl, r8, r0
+ addq r0, r12, ph
+ addq r9, pl, rl
+ cmpult rl, r9, r0
+ addq r13, ph, ph
+ addq r0, ph, rh
+ lda ap, -56(ap)
+ br L(com)
+
+L(b0): ldq r21, -24(ap)
+ ldq r22, -16(ap)
+ ldq r23, -8(ap)
+ ldq r20, -32(ap)
+ mulq r21, B1modb, r8
+ umulh r21, B1modb, r12
+ mulq r22, B2modb, r9
+ umulh r22, B2modb, r13
+ mulq r23, B3modb, r10
+ umulh r23, B3modb, r14
+ addq r8, r20, pl
+ cmpult pl, r8, r0
+ addq r0, r12, ph
+ addq r9, pl, pl
+ cmpult pl, r9, r0
+ addq r13, ph, ph
+ addq r0, ph, ph
+ addq r10, pl, rl
+ cmpult rl, r10, r0
+ addq r14, ph, ph
+ addq r0, ph, rh
+ lda ap, -64(ap)
+ br L(com)
+
+L(b1): bis r31, r31, rh
+ ldq rl, -8(ap)
+ lda ap, -40(ap)
+ br L(com)
+
+L(b2): ldq r21, -8(ap)
+ ldq r20, -16(ap)
+ mulq r21, B1modb, r8
+ umulh r21, B1modb, r12
+ addq r8, r20, rl
+ cmpult rl, r8, r0
+ addq r0, r12, rh
+ lda ap, -48(ap)
+
+L(com): ble n, L(ed3)
+ ldq r21, 8(ap)
+ ldq r22, 16(ap)
+ ldq r23, 24(ap)
+ ldq r20, 0(ap)
+ lda n, -4(n)
+ lda ap, -32(ap)
+ mulq r21, B1modb, r8
+ umulh r21, B1modb, r12
+ mulq r22, B2modb, r9
+ umulh r22, B2modb, r13
+ mulq r23, B3modb, r10
+ umulh r23, B3modb, r14
+ mulq rl, B4modb, r11
+ umulh rl, B4modb, r15
+ ble n, L(ed2)
+
+ ALIGN(16)
+L(top): ldq r21, 8(ap)
+ mulq rh, B5modb, rl
+ addq r8, r20, pl
+ ldq r22, 16(ap)
+ cmpult pl, r8, r0
+ umulh rh, B5modb, rh
+ ldq r23, 24(ap)
+ addq r0, r12, ph
+ addq r9, pl, pl
+ mulq r21, B1modb, r8
+ cmpult pl, r9, r0
+ addq r13, ph, ph
+ umulh r21, B1modb, r12
+ lda ap, -32(ap)
+ addq r0, ph, ph
+ addq r10, pl, pl
+ mulq r22, B2modb, r9
+ cmpult pl, r10, r0
+ addq r14, ph, ph
+ addq r11, pl, pl
+ umulh r22, B2modb, r13
+ addq r0, ph, ph
+ cmpult pl, r11, r0
+ addq r15, ph, ph
+ mulq r23, B3modb, r10
+ ldq r20, 32(ap)
+ addq pl, rl, rl
+ umulh r23, B3modb, r14
+ addq r0, ph, ph
+ cmpult rl, pl, r0
+ mulq rl, B4modb, r11
+ addq ph, rh, rh
+ umulh rl, B4modb, r15
+ addq r0, rh, rh
+ lda n, -4(n)
+ bgt n, L(top)
+
+L(ed2): mulq rh, B5modb, rl
+ addq r8, r20, pl
+ umulh rh, B5modb, rh
+ cmpult pl, r8, r0
+ addq r0, r12, ph
+ addq r9, pl, pl
+ cmpult pl, r9, r0
+ addq r13, ph, ph
+ addq r0, ph, ph
+ addq r10, pl, pl
+ cmpult pl, r10, r0
+ addq r14, ph, ph
+ addq r11, pl, pl
+ addq r0, ph, ph
+ cmpult pl, r11, r0
+ addq r15, ph, ph
+ addq pl, rl, rl
+ addq r0, ph, ph
+ cmpult rl, pl, r0
More information about the gmp-commit
mailing list