[Gmp-commit] /var/hg/gmp: 5 new changesets

Fri Oct 14 18:59:52 UTC 2016

details:   /var/hg/gmp/rev/e87a78ea5686
changeset: 17076:e87a78ea5686
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Oct 14 07:59:04 2016 +0200
description:
Support RISC-V.

details:   /var/hg/gmp/rev/a20eda8bdd36
changeset: 17077:a20eda8bdd36
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Oct 14 11:40:29 2016 +0200
description:
Rewrite arm support to handle armv8 CPUs in 32-bit mode.

details:   /var/hg/gmp/rev/66a0f5e25a26
changeset: 17078:66a0f5e25a26
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Oct 14 11:43:13 2016 +0200
description:
Rewrite for shallower recurrency.

details:   /var/hg/gmp/rev/94c787bc3d13
changeset: 17079:94c787bc3d13
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Oct 14 11:46:44 2016 +0200
description:
Initial RISC-V assembly support.

details:   /var/hg/gmp/rev/0daf2054873b
changeset: 17080:0daf2054873b
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Oct 14 11:48:53 2016 +0200
description:
(riscv umul_ppmm): New.

diffstat:

 configure.ac               |  90 ++++++++++++++++++++++++++++++--------------
 longlong.h                 |  11 ++++-
 mpn/generic/addmul_1.c     |  93 ++++++++++++++++++++++++---------------------
 mpn/generic/submul_1.c     |  80 +++++++++++++++++++++------------------
 mpn/riscv/64/aors_n.asm    |  89 ++++++++++++++++++++++++++++++++++++++++++++
 mpn/riscv/64/aorsmul_1.asm |  75 +++++++++++++++++++++++++++++++++++++
 mpn/riscv/64/mul_1.asm     |  58 ++++++++++++++++++++++++++++
 7 files changed, 386 insertions(+), 110 deletions(-)

diffs (truncated from 722 to 300 lines):

diff -r f92bdb96119d -r 0daf2054873b configure.ac

--- a/configure.ac	Tue Aug 30 15:55:58 2016 +0200
+++ b/configure.ac	Fri Oct 14 11:48:53 2016 +0200
@@ -594,9 +594,14 @@
   arm*-*-* | aarch64*-*-*)
     gcc_cflags="$gcc_cflags $fomit_frame_pointer"
     gcc_cflags_optlist="arch fpmode neon tune"
+    gcc_64_cflags_optlist="arch tune"
     gcc_testlist="gcc-arm-umodsi"
-    GMP_INCLUDE_MPN(arm/arm-defs.m4)
+    gcc_64_testlist=""
     CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+    CALLING_CONVENTIONS_OBJS_64=""
+    cclist_64="gcc cc"
+    any_32_testlist="sizeof-long-4"
+    any_64_testlist="sizeof-long-8"
 
     # This is needed for clang, which is not content with flags like -mfpu=neon
     # alone.
@@ -611,14 +616,14 @@
     # either enforce them, or organise to strip paths as the corresponding
     # options fail.
     case $host_cpu in
+      armxscale | arm7ej | arm9te | arm9e* | arm10* | armv5*)
+	path="arm/v5 arm"
+	gcc_cflags_arch="-march=armv5"
+	;;
       armsa1 | arm7t* | arm9t* | armv4t*)
 	path="arm"
 	gcc_cflags_arch="-march=armv4"
 	;;
-      armxscale | arm7ej | arm9te | arm9e* | arm10* | armv5*)
-	path="arm/v5 arm"
-	gcc_cflags_arch="-march=armv5"
-	;;
       arm1156 | armv6t2*)
 	path="arm/v6t2 arm/v6 arm/v5 arm"
 	gcc_cflags_arch="-march=armv6t2"
@@ -682,46 +687,60 @@
 	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
 	;;
-      armcortexa53)
-	unset CALLING_CONVENTIONS_OBJS
-	path="arm64/cora53 arm64"
-	gcc_cflags_arch=""
+      armcortexa53 | armcortexa53neon)
+        abilist="64 32"
+	path="arm/neon arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
+	path_64="arm64/cora53 arm64"
+	gcc_cflags_arch="-march=armv8-a"
+	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune="-mtune=cortex-a53"
 	;;
-      armcortexa57)
-	unset CALLING_CONVENTIONS_OBJS
-	path="arm64"
-	gcc_cflags_arch=""
+      armcortexa57 | armcortexa57neon)
+        abilist="64 32"
+	path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+	path_64="arm64/cora57 arm64"
+	gcc_cflags_arch="-march=armv8-a"
+	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune="-mtune=cortex-a57"
 	;;
-      armcortexa72)
-	unset CALLING_CONVENTIONS_OBJS
-	path="arm64"
-	gcc_cflags_arch=""
+      armcortexa72 | armcortexa72neon)
+        abilist="64 32"
+	path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+	path_64="arm64/cora72 arm64"
+	gcc_cflags_arch="-march=armv8-a"
+	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune="-mtune=cortex-a72"
 	;;
       armexynosm1)
-	unset CALLING_CONVENTIONS_OBJS
-	path="arm64"
-	gcc_cflags_arch=""
+        abilist="64 32"
+	path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+	path_64="arm64"
+	gcc_cflags_arch="-march=armv8-a"
+	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune="-mtune=exynosm1"
 	;;
       armthunderx)
-	unset CALLING_CONVENTIONS_OBJS
-	path="arm64"
-	gcc_cflags_arch=""
+        abilist="64 32"
+	path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+	path_64="arm64"
+	gcc_cflags_arch="-march=armv8-a"
+	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune="-mtune=thunderx"
 	;;
       armxgene1)
-	unset CALLING_CONVENTIONS_OBJS
-	path="arm64/xgene1 arm64"
-	gcc_cflags_arch=""
+        abilist="64 32"
+	path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+	path_64="arm64/xgene1 arm64"
+	gcc_cflags_arch="-march=armv8-a"
+	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune="-mtune=xgene1"
 	;;
       aarch64*)
-	unset CALLING_CONVENTIONS_OBJS
-	path="arm64"
-	gcc_cflags_arch=""
+        abilist="64 32"
+	path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+	path_64="arm64"
+	gcc_cflags_arch="-march=armv8-a"
+	gcc_cflags_neon="-mfpu=neon"
 	gcc_cflags_tune=""
 	;;
       *)
@@ -1273,6 +1292,13 @@
     ;;
 
 
+  # RISC-V
+  [riscv-*-*])
+    cclist="gcc"
+    path="riscv/64"
+    ;;
+
+
   # IBM System/390 and z/Architecture
   S390_PATTERN | S390X_PATTERN)
     abilist="32"
@@ -3581,6 +3607,12 @@
   GMP_ASM_ALIGN_LOG
 
   case $host in
+    arm*-*-* | aarch64*-*-*)
+      case $ABI in
+        32)
+	  GMP_INCLUDE_MPN(arm/arm-defs.m4) ;;
+      esac
+      ;;
     hppa*-*-*)
       # for both pa32 and pa64
       GMP_INCLUDE_MPN(pa32/pa-defs.m4)
diff -r f92bdb96119d -r 0daf2054873b longlong.h
--- a/longlong.h	Tue Aug 30 15:55:58 2016 +0200
+++ b/longlong.h	Fri Oct 14 11:48:53 2016 +0200
@@ -1,6 +1,6 @@
 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
 
-Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2015 Free Software
+Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2016 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
@@ -1584,6 +1584,15 @@
   } while (0)
 #endif /* RT/ROMP */
 
+#if defined (__riscv64) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, u, v) \
+  do {									\
+    UDItype __u = (u), __v = (v);					\
+    (pl) = __u * __v;							\
+    __asm__ ("mulhu\t%2, %1, %0" : "=r" (ph) : "%r" (__u), "r" (__v));	\
+  } while (0)
+#endif
+
 #if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32
 #define umul_ppmm(w1, w0, u, v) \
   __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0"		\
diff -r f92bdb96119d -r 0daf2054873b mpn/generic/addmul_1.c
--- a/mpn/generic/addmul_1.c	Tue Aug 30 15:55:58 2016 +0200
+++ b/mpn/generic/addmul_1.c	Fri Oct 14 11:48:53 2016 +0200
@@ -3,7 +3,8 @@
    pointed to by RP.  Return the most significant limb of the product,
    adjusted for carry-out from the addition.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1992-1994, 1996, 2000, 2002, 2004, 2016 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
@@ -38,30 +39,36 @@
 #if GMP_NAIL_BITS == 0
 
 mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
 {
-  mp_limb_t ul, cl, hpl, lpl, rl;
+  mp_limb_t u0, crec, c, p1, p0, r0;
 
   ASSERT (n >= 1);
   ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
 
-  cl = 0;
+  crec = 0;
   do
     {
-      ul = *up++;
-      umul_ppmm (hpl, lpl, ul, vl);
+      u0 = *up++;
+      umul_ppmm (p1, p0, u0, v0);
 
-      lpl += cl;
-      cl = (lpl < cl) + hpl;
+      r0 = *rp;
 
-      rl = *rp;
-      lpl = rl + lpl;
-      cl += lpl < rl;
-      *rp++ = lpl;
+      p0 = r0 + p0;
+      c = r0 > p0;
+
+      p1 = p1 + c;
+
+      r0 = p0 + crec;		/* cycle 0, 3, ... */
+      c = p0 > r0;		/* cycle 1, 4, ... */
+
+      crec = p1 + c;		/* cycle 2, 5, ... */
+
+      *rp++ = r0;
     }
   while (--n != 0);
 
-  return cl;
+  return crec;
 }
 
 #endif
@@ -69,35 +76,35 @@
 #if GMP_NAIL_BITS == 1
 
 mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
 {
-  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, cl, xl, c1, c2, c3;
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, crec, xl, c1, c2, c3;
 
   ASSERT (n >= 1);
   ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
   ASSERT_MPN (rp, n);
   ASSERT_MPN (up, n);
-  ASSERT_LIMB (vl);
+  ASSERT_LIMB (v0);
 
-  shifted_vl = vl << GMP_NAIL_BITS;
-  cl = 0;
-  prev_hpl = 0;
+  shifted_v0 = v0 << GMP_NAIL_BITS;
+  crec = 0;
+  prev_p1 = 0;
   do
     {
-      ul = *up++;
-      rl = *rp;
-      umul_ppmm (hpl, lpl, ul, shifted_vl);
-      lpl >>= GMP_NAIL_BITS;
-      ADDC_LIMB (c1, xl, prev_hpl, lpl);
-      ADDC_LIMB (c2, xl, xl, rl);
-      ADDC_LIMB (c3, xl, xl, cl);
-      cl = c1 + c2 + c3;
+      u0 = *up++;
+      r0 = *rp;
+      umul_ppmm (p1, p0, u0, shifted_v0);
+      p0 >>= GMP_NAIL_BITS;
+      ADDC_LIMB (c1, xl, prev_p1, p0);
+      ADDC_LIMB (c2, xl, xl, r0);
+      ADDC_LIMB (c3, xl, xl, crec);
+      crec = c1 + c2 + c3;
       *rp++ = xl;
-      prev_hpl = hpl;
+      prev_p1 = p1;
     }
   while (--n != 0);
 
-  return prev_hpl + cl;
+  return prev_p1 + crec;
 }
 
 #endif
@@ -105,34 +112,34 @@
 #if GMP_NAIL_BITS >= 2
 
 mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
 {
-  mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, xw, cl, xl;
+  mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, crec, xl;