[Gmp-commit] /var/hg/gmp: 6 new changesets

Tue Mar 22 22:03:50 CET 2011

details:   /var/hg/gmp/rev/caba017f1f41
changeset: 14099:caba017f1f41
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Mar 22 20:38:17 2011 +0100
description:
Prepare for mpn_div_qr_2_pi1_norm assembly.

details:   /var/hg/gmp/rev/3cd98a699f72
changeset: 14100:3cd98a699f72
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Mar 22 20:43:32 2011 +0100
description:
First poor implementation of x86_64 div_qr_2_pi1_norm

details:   /var/hg/gmp/rev/2da46c2275a1
changeset: 14101:2da46c2275a1
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Mar 22 21:49:23 2011 +0100
description:
First poor implementation of x86_64 div_qr_2_pi1_norm

details:   /var/hg/gmp/rev/2ee8ce3904f5
changeset: 14102:2ee8ce3904f5
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Mar 22 21:50:15 2011 +0100
description:
Copied inner loop from divrem_2.asm

details:   /var/hg/gmp/rev/1a5ac470ca92
changeset: 14103:1a5ac470ca92
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Mar 22 22:00:46 2011 +0100
description:
Trivial merge.

details:   /var/hg/gmp/rev/eddb0d9d7ce2
changeset: 14104:eddb0d9d7ce2
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Tue Mar 22 22:03:13 2011 +0100
description:
Fixed copyright header.

diffstat:

 ChangeLog                        |   21 ++++++
 configure.in                     |   99 ++++++++++++++++-------------
 gmp-impl.h                       |    3 +
 mpn/asm-defs.m4                  |    4 +
 mpn/generic/div_qr_2.c           |   43 -------------
 mpn/generic/div_qr_2_pi1_norm.c  |   76 +++++++++++++++++++++++
 mpn/x86_64/div_qr_2_pi1_norm.asm |  128 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 286 insertions(+), 88 deletions(-)

diffs (truncated from 480 to 300 lines):

diff -r 4aeb0b693420 -r eddb0d9d7ce2 ChangeLog

--- a/ChangeLog	Tue Mar 22 15:04:44 2011 +0100
+++ b/ChangeLog	Tue Mar 22 22:03:13 2011 +0100
@@ -1,5 +1,26 @@
+2011-03-22  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/x86_64/div_qr_2_pi1_norm.asm: Copied optimized inner loop
+	from divrem_2.asm.
+
+	* mpn/x86_64/div_qr_2_pi1_norm.asm: First working, but poorly
+	optimized, implementation.
+
+	* mpn/asm-defs.m4 (define_mpn): Added div_qr_2_pi[12]_*norm.
+
+	* mpn/generic/div_qr_2_pi1_norm.c (mpn_div_qr_2_pi1_norm): Moved
+	to separate file, from...
+	* mpn/generic/div_qr_2.c: ... old location.
+
+	* gmp-impl.h (mpn_div_qr_2_pi1_norm): Declare.
+
+	* configure.in: Added div_qr_2_pi1_norm.
+
 2011-03-22  Torbjorn Granlund  <tege at gmplib.org>
 
+	* configure.in (powerpc): Reinsert lost AIX cpu_path 32-bit handling.
+	Reinsert lost linux/bsd cpu_path handling.
+
 	* mpn/generic/mod_1_1.c: Disable powerpc asm for _LONG_LONG_LIMB.
 	* mpn/generic/div_qr_2.c: Likewise.
 
diff -r 4aeb0b693420 -r eddb0d9d7ce2 configure.in
--- a/configure.in	Tue Mar 22 15:04:44 2011 +0100
+++ b/configure.in	Tue Mar 22 22:03:13 2011 +0100
@@ -955,44 +955,47 @@
 
     case $host in
       POWERPC64_PATTERN)
-        case $host in
-          *-*-aix*)
-            # On AIX a true 64-bit ABI is available.
-            # Need -Wc to pass object type flags through to the linker.
-            abilist="mode64 $abilist"
-            cclist_mode64="gcc xlc"
-            gcc_mode64_cflags="-O2 -maix64 -mpowerpc64"
-            gcc_mode64_cflags_optlist="cpu"
+	case $host in
+	  *-*-aix*)
+	    # On AIX a true 64-bit ABI is available.
+	    # Need -Wc to pass object type flags through to the linker.
+	    abilist="mode64 $abilist"
+	    cclist_mode64="gcc xlc"
+	    gcc_mode64_cflags="-O2 -maix64 -mpowerpc64"
+	    gcc_mode64_cflags_optlist="cpu"
 	    gcc_mode64_ldflags="-Wc,-maix64"
-            xlc_mode64_cflags="-O2 -q64 -qmaxmem=20000"
-            xlc_mode64_cflags_optlist="arch"
+	    xlc_mode64_cflags="-O2 -q64 -qmaxmem=20000"
+	    xlc_mode64_cflags_optlist="arch"
 	    xlc_mode64_ldflags="-Wc,-q64"
-            # Must indicate object type to ar and nm
+	    # Must indicate object type to ar and nm
 	    ar_mode64_flags="-X64"
 	    nm_mode64_flags="-X64"
 	    path_mode64=""
+	    p=""
 	    for i in $cpu_path
 	      do path_mode64="${path_mode64}powerpc64/mode64/$i "
 		 path_mode64="${path_mode64}powerpc64/$i "
+		 p="${p} powerpc32/$i "
 	      done
-            path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
-            # grab this object, though it's not a true cycle counter routine
-            SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
-            cyclecounter_size_mode64=0
-            ;;
-          *-*-darwin*)
-            # On Darwin we can use 64-bit instructions with a longlong limb,
-            # but the chip still in 32-bit mode.
-            # In theory this can be used on any OS which knows how to save
-            # 64-bit registers in a context switch.
-            #
-            # Note that we must use -mpowerpc64 with gcc, since the
-            # longlong.h macros expect limb operands in a single 64-bit
-            # register, not two 32-bit registers as would be given for a
-            # long long without -mpowerpc64.  In theory we could detect and
-            # accommodate both styles, but the proper 64-bit registers will
-            # be fastest and are what we really want to use.
-            #
+	    path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+	    path="$p $path"
+	    # grab this object, though it's not a true cycle counter routine
+	    SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+	    cyclecounter_size_mode64=0
+	    ;;
+	  *-*-darwin*)
+	    # On Darwin we can use 64-bit instructions with a longlong limb,
+	    # but the chip still in 32-bit mode.
+	    # In theory this can be used on any OS which knows how to save
+	    # 64-bit registers in a context switch.
+	    #
+	    # Note that we must use -mpowerpc64 with gcc, since the
+	    # longlong.h macros expect limb operands in a single 64-bit
+	    # register, not two 32-bit registers as would be given for a
+	    # long long without -mpowerpc64.  In theory we could detect and
+	    # accommodate both styles, but the proper 64-bit registers will
+	    # be fastest and are what we really want to use.
+	    #
 	    # One would think -mpowerpc64 would set the assembler in the right
 	    # mode to handle 64-bit instructions.  But for that, also
 	    # -force_cpusubtype_ALL is needed.
@@ -1025,8 +1028,8 @@
 	    path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
 	    path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
 	    path="$p $path"
-            SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
-            cyclecounter_size_mode64=0
+	    SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+	    cyclecounter_size_mode64=0
 	    any_mode64_testlist="sizeof-long-8"
 	    ;;
 	  *-*-linux* | *-*-*bsd*)
@@ -1035,38 +1038,44 @@
 	    # others require -m64, hence the use of cflags_maybe.  The
 	    # sizeof-long-8 test checks the mode is right (for the no option
 	    # case).
-            #
-            # -mpowerpc64 is not used, since it should be the default in
-            # 64-bit mode.  (We need its effect for the various longlong.h
-            # asm macros to be right of course.)
-            #
-            # gcc64 was an early port of gcc to 64-bit mode, but should be
-            # obsolete before too long.  We prefer plain gcc when it knows
-            # 64-bits.
+	    #
+	    # -mpowerpc64 is not used, since it should be the default in
+	    # 64-bit mode.  (We need its effect for the various longlong.h
+	    # asm macros to be right of course.)
+	    #
+	    # gcc64 was an early port of gcc to 64-bit mode, but should be
+	    # obsolete before too long.	 We prefer plain gcc when it knows
+	    # 64-bits.
 	    #
 	    abilist="mode64 mode32 $abilist"
 	    cclist_mode32="gcc"
 	    gcc_mode32_cflags="-mpowerpc64"
 	    gcc_mode32_cflags_optlist="cpu opt"
 	    gcc_mode32_cflags_opt="-O3 -O2 -O1"
-	    path_mode32="powerpc64/mode32 $vmx_path powerpc64"
 	    limb_mode32=longlong
 	    cclist_mode64="gcc gcc64"
 	    gcc_mode64_cflags_maybe="-m64"
 	    gcc_mode64_cflags_optlist="cpu opt"
 	    gcc_mode64_cflags_opt="-O3 -O2 -O1"
 	    path_mode64=""
+	    path_mode32=""
+	    p=""
 	    for i in $cpu_path
 	      do path_mode64="${path_mode64}powerpc64/mode64/$i "
 		 path_mode64="${path_mode64}powerpc64/$i "
+		 path_mode32="${path_mode32}powerpc64/mode32/$i "
+		 path_mode32="${path_mode32}powerpc64/$i "
+		 p="${p} powerpc32/$i "
 	      done
 	    path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
-            SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
-            cyclecounter_size_mode64=0
+	    path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+	    path="$p $path"
+	    SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+	    cyclecounter_size_mode64=0
 	    any_mode64_testlist="sizeof-long-8"
 	    ;;
-        esac
-        ;;
+	esac
+	;;
     esac
     ;;
 
@@ -2538,7 +2547,7 @@
   toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts	   \
   toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts	   \
   invertappr invert binvert mulmod_bnm1 sqrmod_bnm1			   \
-  div_qr_2								   \
+  div_qr_2 div_qr_2_pi1_norm						   \
   sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q				   \
   dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q				   \
   mu_div_qr mu_divappr_q mu_div_q					   \
diff -r 4aeb0b693420 -r eddb0d9d7ce2 gmp-impl.h
--- a/gmp-impl.h	Tue Mar 22 15:04:44 2011 +0100
+++ b/gmp-impl.h	Tue Mar 22 22:03:13 2011 +0100
@@ -1295,6 +1295,9 @@
 #define   mpn_fft_next_size __MPN(fft_next_size)
 __GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
 
+#define   mpn_div_qr_2_pi1_norm __MPN(div_qr_2_pi1_norm)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2_pi1_norm __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t));
+
 #define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
 
diff -r 4aeb0b693420 -r eddb0d9d7ce2 mpn/asm-defs.m4
--- a/mpn/asm-defs.m4	Tue Mar 22 15:04:44 2011 +0100
+++ b/mpn/asm-defs.m4	Tue Mar 22 22:03:13 2011 +0100
@@ -1352,6 +1352,10 @@
 define_mpn(count_leading_zeros)
 define_mpn(count_trailing_zeros)
 define_mpn(div_qr_2)
+define_mpn(div_qr_2_pi1_norm)
+define_mpn(div_qr_2_pi1_unnorm)
+define_mpn(div_qr_2_pi2_norm)
+define_mpn(div_qr_2_pi2_unnorm)
 define_mpn(divexact_1)
 define_mpn(divexact_by3c)
 define_mpn(divrem)
diff -r 4aeb0b693420 -r eddb0d9d7ce2 mpn/generic/div_qr_2.c
--- a/mpn/generic/div_qr_2.c	Tue Mar 22 15:04:44 2011 +0100
+++ b/mpn/generic/div_qr_2.c	Tue Mar 22 22:03:13 2011 +0100
@@ -220,49 +220,6 @@
 }
 
 static mp_limb_t
-mpn_div_qr_2_pi1_norm (mp_ptr qp, mp_ptr np, mp_size_t nn,
-		       mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
-{
-  mp_limb_t qh;
-  mp_size_t i;
-  mp_limb_t r1, r0;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-
-  np += nn - 2;
-  r1 = np[1];
-  r0 = np[0];
-
-  qh = 0;
-  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
-    {
-#if GMP_NAIL_BITS == 0
-      sub_ddmmss (r1, r0, r1, r0, d1, d0);
-#else
-      r0 = r0 - d0;
-      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
-      r0 &= GMP_NUMB_MASK;
-#endif
-      qh = 1;
-    }
-
-  for (i = nn - 2 - 1; i >= 0; i--)
-    {
-      mp_limb_t n0, q;
-      n0 = np[-1];
-      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
-      np--;
-      qp[i] = q;
-    }
-
-  np[1] = r1;
-  np[0] = r0;
-
-  return qh;
-}
-
-static mp_limb_t
 mpn_div_qr_2_pi2_norm (mp_ptr qp, mp_ptr np, mp_size_t nn,
 		       mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
 {
diff -r 4aeb0b693420 -r eddb0d9d7ce2 mpn/generic/div_qr_2_pi1_norm.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_2_pi1_norm.c	Tue Mar 22 22:03:13 2011 +0100
@@ -0,0 +1,76 @@
+/* mpn_div_qr_2_pi1_norm
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels MÃ¶ller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+