[Gmp-commit] /home/hgfiles/gmp: 5 new changesets

Mon Dec 28 15:37:23 CET 2009

details:   /home/hgfiles/gmp/rev/4cf30e0589b3
changeset: 13243:4cf30e0589b3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 28 14:54:27 2009 +0100
description:
Optimise shifting for Core i.

details:   /home/hgfiles/gmp/rev/4722549eec70
changeset: 13244:4722549eec70
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 28 15:32:16 2009 +0100
description:
Make mpn_lshiftc standard internal function.

details:   /home/hgfiles/gmp/rev/4db5125f1d87
changeset: 13245:4db5125f1d87
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 28 15:33:06 2009 +0100
description:
Test mpn_lshiftc.

details:   /home/hgfiles/gmp/rev/a05582cfd8e8
changeset: 13246:a05582cfd8e8
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 28 15:35:42 2009 +0100
description:
Support measuring mpn_lshiftc.

details:   /home/hgfiles/gmp/rev/9f83d5200872
changeset: 13247:9f83d5200872
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Dec 28 15:37:05 2009 +0100
description:
(tune_mu_div, tune_mu_bdiv): Set step_factor.

diffstat:

 ChangeLog                    |   24 ++++++-
 configure.in                 |    4 +-
 mpn/generic/lshiftc.c        |   63 ++++++++++++++++
 mpn/generic/mul_fft.c        |   38 ---------
 mpn/x86_64/core2/lshift.asm  |    6 +-
 mpn/x86_64/core2/lshiftc.asm |  138 ++++++++++++++++++++++++++++++++++++
 mpn/x86_64/core2/rshift.asm  |    6 +-
 mpn/x86_64/lshiftc.asm       |  164 +++++++++++++++++++++++++++++++++++++++++++
 tests/devel/try.c            |   18 +++-
 tests/refmpn.c               |   23 ++++++
 tests/tests.h                |   13 +--
 tune/common.c                |    5 +
 tune/speed.c                 |    1 +
 tune/speed.h                 |    1 +
 tune/tuneup.c                |    6 +-
 15 files changed, 449 insertions(+), 61 deletions(-)

diffs (truncated from 719 to 300 lines):

diff -r 052c56e76bcd -r 9f83d5200872 ChangeLog

--- a/ChangeLog	Sun Dec 27 20:30:53 2009 +0100
+++ b/ChangeLog	Mon Dec 28 15:37:05 2009 +0100
@@ -1,8 +1,30 @@
+2009-12-28  Torbjorn Granlund  <tege at gmplib.org>
+
+	* tune/tuneup.c (tune_mu_div, tune_mu_bdiv): Set step_factor.
+
+	* tune/common.c, tune/speed.c, tune/speed.h: Support measuring
+	mpn_lshiftc.
+
+	* tests/devel/try.c: Test mpn_lshiftc.
+	* tests/refmpn.c (refmpn_com): New function.
+	(refmpn_lshiftc): Likewise.
+
+	* configure.in (gmp_mpn_functions_optional) Move lshiftc from here...
+	(gmp_mpn_functions): ...to here.
+	* mpn/generic/lshiftc.c: New file.
+	* mpn/x86_64/lshiftc.asm: New file.
+	* mpn/x86_64/core2/lshiftc.asm: New file.
+	* mpn/generic/mul_fft.c (mpn_lshiftc): Remove.
+
+	* mpn/x86_64/core2/lshift.asm: Tweak for better Core iN performance.
+	* mpn/x86_64/core2/rshift.asm: Likewise.
+
 2009-12-27  Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/mul.c: Use toom6h and toom8h for almost balanced.
 
-	* mpn/generic/mullo_n.c (mpn_dc_mullo_n): New ratio, to be used in Toom-8 range.
+	* mpn/generic/mullo_n.c (mpn_dc_mullo_n): New ratio, to be used in
+	Toom-8 range.
 
 2009-12-27  Torbjorn Granlund  <tege at gmplib.org>
 
diff -r 052c56e76bcd -r 9f83d5200872 configure.in
--- a/configure.in	Sun Dec 27 20:30:53 2009 +0100
+++ b/configure.in	Mon Dec 28 15:37:05 2009 +0100
@@ -2488,13 +2488,13 @@
   addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n			\
   addlsh2_n sublsh2_n rsblsh2_n						\
   addlsh_n sublsh_n rsblsh_n						\
-  add_n_sub_n addaddmul_1msb0 lshiftc"
+  add_n_sub_n addaddmul_1msb0"
 
 gmp_mpn_functions="$extra_functions					   \
   add add_1 add_n sub sub_1 sub_n neg_n mul_1 addmul_1			   \
   submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2     \
   fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump		   \
-  mod_1_1 mod_1_2 mod_1_3 mod_1_4					   \
+  mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc				   \
   mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul	   \
   random random2 pow_1							   \
   rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp	   \
diff -r 052c56e76bcd -r 9f83d5200872 mpn/generic/lshiftc.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/lshiftc.c	Mon Dec 28 15:37:05 2009 +0100
@@ -0,0 +1,63 @@
+/* mpn_lshiftc -- Shift left low level with complement.
+
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2009 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by up and n limbs long) cnt bits to the left
+   and store the n least significant limbs of the result at rp.
+   Return the bits shifted out from the most significant limb.
+
+   Argument constraints:
+   1. 0 < cnt < GMP_NUMB_BITS.
+   2. If the result is to be written over the input, rp must be >= up.
+*/
+
+mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  ASSERT (n >= 1);
+  ASSERT (cnt >= 1);
+  ASSERT (cnt < GMP_NUMB_BITS);
+  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+      high_limb = low_limb << cnt;
+    }
+  *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+  return retval;
+}
diff -r 052c56e76bcd -r 9f83d5200872 mpn/generic/mul_fft.c
--- a/mpn/generic/mul_fft.c	Sun Dec 27 20:30:53 2009 +0100
+++ b/mpn/generic/mul_fft.c	Mon Dec 28 15:37:05 2009 +0100
@@ -180,44 +180,6 @@
     }
 }
 
-/* Shift {up, n} of cnt bits to the left, store the complemented result
-   in {rp, n}, and output the shifted bits (not complemented).
-   Same as:
-     cc = mpn_lshift (rp, up, n, cnt);
-     mpn_com_n (rp, rp, n);
-     return cc;
-
-   Assumes n >= 1, 1 < cnt < GMP_NUMB_BITS, rp >= up.
-*/
-#ifndef HAVE_NATIVE_mpn_lshiftc
-#undef mpn_lshiftc
-static mp_limb_t
-mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
-  mp_limb_t high_limb, low_limb;
-  unsigned int tnc;
-  mp_size_t i;
-  mp_limb_t retval;
-
-  up += n;
-  rp += n;
-
-  tnc = GMP_NUMB_BITS - cnt;
-  low_limb = *--up;
-  retval = low_limb >> tnc;
-  high_limb = (low_limb << cnt);
-
-  for (i = n - 1; i != 0; i--)
-    {
-      low_limb = *--up;
-      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
-      high_limb = low_limb << cnt;
-    }
-  *--rp = (~high_limb) & GMP_NUMB_MASK;
-
-  return retval;
-}
-#endif
 
 /* r <- a*2^e mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
    Assumes a is semi-normalized, i.e. a[n] <= 1.
diff -r 052c56e76bcd -r 9f83d5200872 mpn/x86_64/core2/lshift.asm
--- a/mpn/x86_64/core2/lshift.asm	Sun Dec 27 20:30:53 2009 +0100
+++ b/mpn/x86_64/core2/lshift.asm	Mon Dec 28 15:37:05 2009 +0100
@@ -1,6 +1,6 @@
 dnl  x86-64 mpn_lshift optimized for "Core 2".
 
-dnl  Copyright 2007 Free Software Foundation, Inc.
+dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -25,7 +25,7 @@
 C K10:		 4.25
 C P4:		14.7
 C P6 core2:	 1.27
-C P6 corei7:	 1.75
+C P6 corei7:	 1.5
 
 
 C INPUT PARAMETERS
@@ -112,8 +112,8 @@
 	mov	%r9, -16(rp)
 L(00):	shld	%cl, %r11, %r10
 	mov	-24(up), %r9
-	lea	-32(up), up
 	mov	%r10, -24(rp)
+	add	$-32, up
 	lea	-32(rp), rp
 	sub	$4, n
 	jnc	L(top)
diff -r 052c56e76bcd -r 9f83d5200872 mpn/x86_64/core2/lshiftc.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/core2/lshiftc.asm	Mon Dec 28 15:37:05 2009 +0100
@@ -0,0 +1,138 @@
+dnl  x86-64 mpn_lshiftc optimized for "Core 2".
+
+dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/limb
+C K8,K9:	 ?
+C K10:		 ?
+C P4:		 ?
+C P6 core2:	 1.5
+C P6 corei7:	 1.75
+
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`n',	`%rdx')
+define(`cnt',	`%cl')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_lshiftc)
+	lea	-8(rp,n,8), rp
+	lea	-8(up,n,8), up
+
+	mov	%edx, %eax
+	and	$3, %eax
+	jne	L(nb00)
+L(b00):	C n = 4, 8, 12, ...
+	mov	(up), %r10
+	mov	-8(up), %r11
+	xor	%eax, %eax
+	shld	%cl, %r10, %rax
+	mov	-16(up), %r8
+	lea	24(rp), rp
+	sub	$4, n
+	jmp	L(00)
+
+L(nb00):C n = 1, 5, 9, ...
+	cmp	$2, %eax
+	jae	L(nb01)
+L(b01):	mov	(up), %r9
+	xor	%eax, %eax
+	shld	%cl, %r9, %rax
+	sub	$2, n
+	jb	L(le1)
+	mov	-8(up), %r10
+	mov	-16(up), %r11
+	lea	-8(up), up
+	lea	16(rp), rp
+	jmp	L(01)
+L(le1):	shl	%cl, %r9
+	not	%r9
+	mov	%r9, (rp)
+	ret
+
+L(nb01):C n = 2, 6, 10, ...
+	jne	L(b11)
+L(b10):	mov	(up), %r8
+	mov	-8(up), %r9
+	xor	%eax, %eax
+	shld	%cl, %r8, %rax
+	sub	$3, n
+	jb	L(le2)
+	mov	-16(up), %r10
+	lea	-16(up), up
+	lea	8(rp), rp
+	jmp	L(10)
+L(le2):	shld	%cl, %r9, %r8
+	not	%r8
+	mov	%r8, (rp)
+	shl	%cl, %r9
+	not	%r9
+	mov	%r9, -8(rp)
+	ret
+
+	ALIGN(16)			C performance critical!
+L(b11):	C n = 3, 7, 11, ...
+	mov	(up), %r11