[Gmp-commit] /var/hg/gmp: 5 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Fri Oct 14 18:59:52 UTC 2016
details: /var/hg/gmp/rev/e87a78ea5686
changeset: 17076:e87a78ea5686
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Oct 14 07:59:04 2016 +0200
description:
Support RISC-V.
details: /var/hg/gmp/rev/a20eda8bdd36
changeset: 17077:a20eda8bdd36
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Oct 14 11:40:29 2016 +0200
description:
Rewrite arm support to handle armv8 CPUs in 32-bit mode.
details: /var/hg/gmp/rev/66a0f5e25a26
changeset: 17078:66a0f5e25a26
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Oct 14 11:43:13 2016 +0200
description:
Rewrite for shallower recurrency.
details: /var/hg/gmp/rev/94c787bc3d13
changeset: 17079:94c787bc3d13
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Oct 14 11:46:44 2016 +0200
description:
Initial RISC-V assembly support.
details: /var/hg/gmp/rev/0daf2054873b
changeset: 17080:0daf2054873b
user: Torbjorn Granlund <tg at gmplib.org>
date: Fri Oct 14 11:48:53 2016 +0200
description:
(riscv umul_ppmm): New.
diffstat:
configure.ac | 90 ++++++++++++++++++++++++++++++--------------
longlong.h | 11 ++++-
mpn/generic/addmul_1.c | 93 ++++++++++++++++++++++++---------------------
mpn/generic/submul_1.c | 80 +++++++++++++++++++++------------------
mpn/riscv/64/aors_n.asm | 89 ++++++++++++++++++++++++++++++++++++++++++++
mpn/riscv/64/aorsmul_1.asm | 75 +++++++++++++++++++++++++++++++++++++
mpn/riscv/64/mul_1.asm | 58 ++++++++++++++++++++++++++++
7 files changed, 386 insertions(+), 110 deletions(-)
diffs (truncated from 722 to 300 lines):
diff -r f92bdb96119d -r 0daf2054873b configure.ac
--- a/configure.ac Tue Aug 30 15:55:58 2016 +0200
+++ b/configure.ac Fri Oct 14 11:48:53 2016 +0200
@@ -594,9 +594,14 @@
arm*-*-* | aarch64*-*-*)
gcc_cflags="$gcc_cflags $fomit_frame_pointer"
gcc_cflags_optlist="arch fpmode neon tune"
+ gcc_64_cflags_optlist="arch tune"
gcc_testlist="gcc-arm-umodsi"
- GMP_INCLUDE_MPN(arm/arm-defs.m4)
+ gcc_64_testlist=""
CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+ CALLING_CONVENTIONS_OBJS_64=""
+ cclist_64="gcc cc"
+ any_32_testlist="sizeof-long-4"
+ any_64_testlist="sizeof-long-8"
# This is needed for clang, which is not content with flags like -mfpu=neon
# alone.
@@ -611,14 +616,14 @@
# either enforce them, or organise to strip paths as the corresponding
# options fail.
case $host_cpu in
+ armxscale | arm7ej | arm9te | arm9e* | arm10* | armv5*)
+ path="arm/v5 arm"
+ gcc_cflags_arch="-march=armv5"
+ ;;
armsa1 | arm7t* | arm9t* | armv4t*)
path="arm"
gcc_cflags_arch="-march=armv4"
;;
- armxscale | arm7ej | arm9te | arm9e* | arm10* | armv5*)
- path="arm/v5 arm"
- gcc_cflags_arch="-march=armv5"
- ;;
arm1156 | armv6t2*)
path="arm/v6t2 arm/v6 arm/v5 arm"
gcc_cflags_arch="-march=armv6t2"
@@ -682,46 +687,60 @@
gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
;;
- armcortexa53)
- unset CALLING_CONVENTIONS_OBJS
- path="arm64/cora53 arm64"
- gcc_cflags_arch=""
+ armcortexa53 | armcortexa53neon)
+ abilist="64 32"
+ path="arm/neon arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
+ path_64="arm64/cora53 arm64"
+ gcc_cflags_arch="-march=armv8-a"
+ gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune="-mtune=cortex-a53"
;;
- armcortexa57)
- unset CALLING_CONVENTIONS_OBJS
- path="arm64"
- gcc_cflags_arch=""
+ armcortexa57 | armcortexa57neon)
+ abilist="64 32"
+ path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ path_64="arm64/cora57 arm64"
+ gcc_cflags_arch="-march=armv8-a"
+ gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune="-mtune=cortex-a57"
;;
- armcortexa72)
- unset CALLING_CONVENTIONS_OBJS
- path="arm64"
- gcc_cflags_arch=""
+ armcortexa72 | armcortexa72neon)
+ abilist="64 32"
+ path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ path_64="arm64/cora72 arm64"
+ gcc_cflags_arch="-march=armv8-a"
+ gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune="-mtune=cortex-a72"
;;
armexynosm1)
- unset CALLING_CONVENTIONS_OBJS
- path="arm64"
- gcc_cflags_arch=""
+ abilist="64 32"
+ path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ path_64="arm64"
+ gcc_cflags_arch="-march=armv8-a"
+ gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune="-mtune=exynosm1"
;;
armthunderx)
- unset CALLING_CONVENTIONS_OBJS
- path="arm64"
- gcc_cflags_arch=""
+ abilist="64 32"
+ path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ path_64="arm64"
+ gcc_cflags_arch="-march=armv8-a"
+ gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune="-mtune=thunderx"
;;
armxgene1)
- unset CALLING_CONVENTIONS_OBJS
- path="arm64/xgene1 arm64"
- gcc_cflags_arch=""
+ abilist="64 32"
+ path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ path_64="arm64/xgene1 arm64"
+ gcc_cflags_arch="-march=armv8-a"
+ gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune="-mtune=xgene1"
;;
aarch64*)
- unset CALLING_CONVENTIONS_OBJS
- path="arm64"
- gcc_cflags_arch=""
+ abilist="64 32"
+ path="arm/v7a/cora15/neon arm/neon arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ path_64="arm64"
+ gcc_cflags_arch="-march=armv8-a"
+ gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune=""
;;
*)
@@ -1273,6 +1292,13 @@
;;
+ # RISC-V
+ [riscv-*-*])
+ cclist="gcc"
+ path="riscv/64"
+ ;;
+
+
# IBM System/390 and z/Architecture
S390_PATTERN | S390X_PATTERN)
abilist="32"
@@ -3581,6 +3607,12 @@
GMP_ASM_ALIGN_LOG
case $host in
+ arm*-*-* | aarch64*-*-*)
+ case $ABI in
+ 32)
+ GMP_INCLUDE_MPN(arm/arm-defs.m4) ;;
+ esac
+ ;;
hppa*-*-*)
# for both pa32 and pa64
GMP_INCLUDE_MPN(pa32/pa-defs.m4)
diff -r f92bdb96119d -r 0daf2054873b longlong.h
--- a/longlong.h Tue Aug 30 15:55:58 2016 +0200
+++ b/longlong.h Fri Oct 14 11:48:53 2016 +0200
@@ -1,6 +1,6 @@
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
-Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2015 Free Software
+Copyright 1991-1994, 1996, 1997, 1999-2005, 2007-2009, 2011-2016 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
@@ -1584,6 +1584,15 @@
} while (0)
#endif /* RT/ROMP */
+#if defined (__riscv64) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, u, v) \
+ do { \
+ UDItype __u = (u), __v = (v); \
+ (pl) = __u * __v; \
+ __asm__ ("mulhu\t%2, %1, %0" : "=r" (ph) : "%r" (__u), "r" (__v)); \
+ } while (0)
+#endif
+
#if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
diff -r f92bdb96119d -r 0daf2054873b mpn/generic/addmul_1.c
--- a/mpn/generic/addmul_1.c Tue Aug 30 15:55:58 2016 +0200
+++ b/mpn/generic/addmul_1.c Fri Oct 14 11:48:53 2016 +0200
@@ -3,7 +3,8 @@
pointed to by RP. Return the most significant limb of the product,
adjusted for carry-out from the addition.
-Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1992-1994, 1996, 2000, 2002, 2004, 2016 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
@@ -38,30 +39,36 @@
#if GMP_NAIL_BITS == 0
mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
{
- mp_limb_t ul, cl, hpl, lpl, rl;
+ mp_limb_t u0, crec, c, p1, p0, r0;
ASSERT (n >= 1);
ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
- cl = 0;
+ crec = 0;
do
{
- ul = *up++;
- umul_ppmm (hpl, lpl, ul, vl);
+ u0 = *up++;
+ umul_ppmm (p1, p0, u0, v0);
- lpl += cl;
- cl = (lpl < cl) + hpl;
+ r0 = *rp;
- rl = *rp;
- lpl = rl + lpl;
- cl += lpl < rl;
- *rp++ = lpl;
+ p0 = r0 + p0;
+ c = r0 > p0;
+
+ p1 = p1 + c;
+
+ r0 = p0 + crec; /* cycle 0, 3, ... */
+ c = p0 > r0; /* cycle 1, 4, ... */
+
+ crec = p1 + c; /* cycle 2, 5, ... */
+
+ *rp++ = r0;
}
while (--n != 0);
- return cl;
+ return crec;
}
#endif
@@ -69,35 +76,35 @@
#if GMP_NAIL_BITS == 1
mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
{
- mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, cl, xl, c1, c2, c3;
+ mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, crec, xl, c1, c2, c3;
ASSERT (n >= 1);
ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
ASSERT_MPN (rp, n);
ASSERT_MPN (up, n);
- ASSERT_LIMB (vl);
+ ASSERT_LIMB (v0);
- shifted_vl = vl << GMP_NAIL_BITS;
- cl = 0;
- prev_hpl = 0;
+ shifted_v0 = v0 << GMP_NAIL_BITS;
+ crec = 0;
+ prev_p1 = 0;
do
{
- ul = *up++;
- rl = *rp;
- umul_ppmm (hpl, lpl, ul, shifted_vl);
- lpl >>= GMP_NAIL_BITS;
- ADDC_LIMB (c1, xl, prev_hpl, lpl);
- ADDC_LIMB (c2, xl, xl, rl);
- ADDC_LIMB (c3, xl, xl, cl);
- cl = c1 + c2 + c3;
+ u0 = *up++;
+ r0 = *rp;
+ umul_ppmm (p1, p0, u0, shifted_v0);
+ p0 >>= GMP_NAIL_BITS;
+ ADDC_LIMB (c1, xl, prev_p1, p0);
+ ADDC_LIMB (c2, xl, xl, r0);
+ ADDC_LIMB (c3, xl, xl, crec);
+ crec = c1 + c2 + c3;
*rp++ = xl;
- prev_hpl = hpl;
+ prev_p1 = p1;
}
while (--n != 0);
- return prev_hpl + cl;
+ return prev_p1 + crec;
}
#endif
@@ -105,34 +112,34 @@
#if GMP_NAIL_BITS >= 2
mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t v0)
{
- mp_limb_t shifted_vl, ul, rl, lpl, hpl, prev_hpl, xw, cl, xl;
+ mp_limb_t shifted_v0, u0, r0, p0, p1, prev_p1, xw, crec, xl;
More information about the gmp-commit
mailing list