[Gmp-commit] /var/hg/gmp: Provide basic set of loongarch/64 asm files.
mercurial at gmplib.org
mercurial at gmplib.org
Sat Jul 15 12:32:27 CEST 2023
details: /var/hg/gmp/rev/7e88bbf045ab
changeset: 18386:7e88bbf045ab
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Jul 15 12:18:07 2023 +0200
description:
Provide basic set of loongarch/64 asm files.
diffstat:
.bootstrap | 7 +
ChangeLog | 24 +++
configure.ac | 5 +
mpn/arm64/com.asm | 60 +++++----
mpn/arm64/mul_1.asm | 1 +
mpn/loongarch/64/add_n.asm | 64 ++++++++++
mpn/loongarch/64/aorslsh1_n.asm | 50 ++++++++
mpn/loongarch/64/aorslsh2_n.asm | 50 ++++++++
mpn/loongarch/64/aorslshC_n.asm | 116 ++++++++++++++++++
mpn/loongarch/64/aorsmul_1.asm | 79 ++++++++++++
mpn/loongarch/64/cnd_aors_n.asm | 99 ++++++++++++++++
mpn/loongarch/64/copyd.asm | 75 ++++++++++++
mpn/loongarch/64/copyi.asm | 73 +++++++++++
mpn/loongarch/64/lshift.asm | 120 +++++++++++++++++++
mpn/loongarch/64/mul_1.asm | 63 ++++++++++
mpn/loongarch/64/rshift.asm | 119 +++++++++++++++++++
mpn/loongarch/64/sub_n.asm | 106 +++++++++++++++++
mpn/riscv/64/aors_n.asm | 106 ++++++++++++----
mpn/riscv/64/sec_tabselect.asm | 125 +++++++++++++++-----
mpn/s390_64/submul_1.asm | 6 +-
mpn/x86/aorsmul_1.asm | 202 +++++++++++++++++++++-----------
mpn/x86/pentium4/sse2/sqr_basecase.asm | 2 +-
mpn/x86_64/coreibwl/addmul_1.asm | 4 +-
mpn/x86_64/coreibwl/mul_basecase.asm | 5 +-
mpn/x86_64/fastavx/copyd.asm | 21 ++-
mpn/x86_64/fastavx/copyi.asm | 21 ++-
mpn/x86_64/zen/aorrlsh_n.asm | 5 +-
mpn/x86_64/zen/sbpi1_bdiv_r.asm | 18 +-
tests/devel/aors_n.c | 13 ++
29 files changed, 1449 insertions(+), 190 deletions(-)
diffs (truncated from 2015 to 300 lines):
diff -r 47f015491ce4 -r 7e88bbf045ab .bootstrap
--- a/.bootstrap Thu Jul 06 09:48:50 2023 +0200
+++ b/.bootstrap Sat Jul 15 12:18:07 2023 +0200
@@ -7,6 +7,13 @@
autoreconf -i -s
# aclocal && libtoolize && autoconf && autoheader && automake -a
+cp -L ltmain.sh foo; rm ltmain.sh; mv foo ltmain.sh
+cp -L ylwrap foo; rm ylwrap; mv foo ylwrap
+cp -L install-sh foo; rm install-sh; mv foo install-sh
+cp -L missing foo; rm missing; mv foo missing
+cp -L test-driver foo;rm test-driver;mv foo test-driver
+rm -rf autom4te.cache
+
cat >doc/version.texi <<EOF
@set UPDATED 19 January 2038
@set UPDATED-MONTH January 2038
diff -r 47f015491ce4 -r 7e88bbf045ab ChangeLog
--- a/ChangeLog Thu Jul 06 09:48:50 2023 +0200
+++ b/ChangeLog Sat Jul 15 12:18:07 2023 +0200
@@ -5,6 +5,12 @@
* configfsf.guess: Updated to version 2023-06-23, from gnulib.
* configfsf.sub: Updated to version 2023-06-26, from gnulib.
+2023-06-29 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac: Make sparc -mptr64 argument optional.
+
+ * configure.ac, config.guess, config.sub, acinclude.m4: Handle power10.
+
2023-06-23 Marius Hillenbrand <mhillen at linux.ibm.com>
Stefan Liebler <stli at linux.ibm.com>
@@ -15,6 +21,13 @@
* mpn/s390_64/z13/mul_1.c: New file.
* mpn/s390_64/z13/mul_basecase.c: New file.
+2022-11-02 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/x86_64/bd1/popcount.asm: Prefer movd to movq for register moves.
+ * mpn/x86_64/bd1/hamdist.asm: Likewise.
+ * mpn/x86_64/core2/hamdist.asm: Likewise.
+ * mpn/x86_64/core2/popcount.asm: Likewise.
+
2022-10-28 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpz/nextprime.c (findnext): Use TMP_ALLOC_TYPE to allocate
@@ -43,10 +56,21 @@
* tests/mpz/t-aorsmul.c: Test the (r,x,x) case.
* mpz/aorsmul.c: Speed-up the (r,x,x) case (tx:Fredrik Johansson).
+2022-04-24 Torbjörn Granlund <tg at gmplib.org>
+
+ * mpn/arm/v5/gcd_22.asm: New file.
+
2022-04-18 Marc Glisse <marc.glisse at inria.fr>
* gmpxx.h (mpz_class): Do not use mp directly.
+2022-03-30 Torbjörn Granlund <tg at gmplib.org>
+
+ * configure.ac, config.guess, config.sub, acinclude.m4: Handle Intel
+ tremont separately.
+
+ * longlong.h (loongarch64 umul_ppmm): New #define.
+
2022-03-18 Marco Bodrato <bodrato at mail.dm.unipi.it>
* tests/mpz/t-lucm.c: Remove an unused var.
diff -r 47f015491ce4 -r 7e88bbf045ab configure.ac
--- a/configure.ac Thu Jul 06 09:48:50 2023 +0200
+++ b/configure.ac Sat Jul 15 12:18:07 2023 +0200
@@ -1015,6 +1015,11 @@
;;
+ loongarch64-*-*)
+ path="loongarch/64"
+ ;;
+
+
# IRIX 5 and earlier can only run 32-bit o32.
#
# IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64. n32 is
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/arm64/com.asm
--- a/mpn/arm64/com.asm Thu Jul 06 09:48:50 2023 +0200
+++ b/mpn/arm64/com.asm Sat Jul 15 12:18:07 2023 +0200
@@ -1,6 +1,6 @@
dnl ARM64 mpn_com.
-dnl Copyright 2013, 2014 Free Software Foundation, Inc.
+dnl Copyright 2013, 2020 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -31,9 +31,13 @@
include(`../config.m4')
C cycles/limb
-C Cortex-A53 2.25
-C Cortex-A57 1.25
-C X-Gene 1.75
+C Cortex-A53
+C Cortex-A55
+C Cortex-A57
+C Cortex-A72
+C Cortex-A73
+C X-Gene
+C Apple M1
changecom(blah)
@@ -48,37 +52,41 @@
C Copy until rp is 128-bit aligned
tbz rp, #3, L(al2)
- ld1 {v22.1d}, [up], #8
+ ldr x4, [up],#8
sub n, n, #1
- mvn v22.8b, v22.8b
- st1 {v22.1d}, [rp], #8
+ mvn x4, x4
+ str x4, [rp],#8
-L(al2): ld1 {v26.2d}, [up], #16
- subs n, n, #6
- b.lt L(end)
+L(al2): ldp x4,x5, [up],#16
+ sub n, n, #6
+ tbnz n, #63, L(end)
ALIGN(16)
-L(top): ld1 {v22.2d}, [up], #16
- mvn v26.16b, v26.16b
- st1 {v26.2d}, [rp], #16
- ld1 {v26.2d}, [up], #16
- mvn v22.16b, v22.16b
- st1 {v22.2d}, [rp], #16
- subs n, n, #4
- b.ge L(top)
+L(top): ldp x6,x7, [up],#32
+ mvn x4, x4
+ mvn x5, x5
+ stp x4,x5, [rp],#32
+ ldp x4,x5, [up,#-16]
+ mvn x6, x6
+ mvn x7, x7
+ stp x6,x7, [rp,#-16]
+ sub n, n, #4
+ tbz n, #63, L(top)
-L(end): mvn v26.16b, v26.16b
- st1 {v26.2d}, [rp], #16
+L(end): mvn x4, x4
+ mvn x5, x5
+ stp x4,x5, [rp],#16
C Copy last 0-3 limbs. Note that rp is aligned after loop, but not when we
C arrive here via L(bc)
L(bc): tbz n, #1, L(tl1)
- ld1 {v22.2d}, [up], #16
- mvn v22.16b, v22.16b
- st1 {v22.2d}, [rp], #16
+ ldp x4,x5, [up],#16
+ mvn x4, x4
+ mvn x5, x5
+ stp x4,x5, [rp],#16
L(tl1): tbz n, #0, L(tl2)
- ld1 {v22.1d}, [up]
- mvn v22.8b, v22.8b
- st1 {v22.1d}, [rp]
+ ldr x4, [up]
+ mvn x4, x4
+ str x4, [rp]
L(tl2): ret
EPILOGUE()
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/arm64/mul_1.asm
--- a/mpn/arm64/mul_1.asm Thu Jul 06 09:48:50 2023 +0200
+++ b/mpn/arm64/mul_1.asm Sat Jul 15 12:18:07 2023 +0200
@@ -37,6 +37,7 @@
C Cortex-A57 7
C Cortex-A72
C X-Gene 4
+C Apple M1 1
C TODO
C * Start first multiply earlier.
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/loongarch/64/add_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/loongarch/64/add_n.asm Sat Jul 15 12:18:07 2023 +0200
@@ -0,0 +1,64 @@
+dnl Loongarch mpn_add_n
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2023 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp_arg',`$r4')
+define(`ap', `$r5')
+define(`bp', `$r6')
+define(`n', `$r7')
+
+define(`rp', `$r8')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ alsl.d rp, n, rp_arg, 3
+ alsl.d ap, n, ap, 3
+ alsl.d bp, n, bp, 3
+ sub.d n, $r0, n
+ slli.d n, n, 3
+ or $r4, $r0, $r0
+
+L(top): ldx.d $r14, ap, n
+ ldx.d $r13, bp, n
+ add.d $r12, $r14, $r13
+ sltu $r15, $r12, $r13 C cy0
+ add.d $r14, $r12, $r4
+ sltu $r16, $r14, $r4 C cy1 set iff r4=1 & r12=111...1
+ stx.d $r14, rp, n
+ addi.d n, n, 8
+ or $r4, $r15, $r16
+ bnez n, L(top)
+
+ jr $r1
+EPILOGUE()
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/loongarch/64/aorslsh1_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/loongarch/64/aorslsh1_n.asm Sat Jul 15 12:18:07 2023 +0200
@@ -0,0 +1,50 @@
+dnl Loongarch mpn_addlsh1_n, mpn_sublsh1_n.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2023 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(`ADDSUB', `add.d')
+ define(`CARRY', `sltu $1,$2,$3')
+ define(`func', `mpn_addlsh1_n')
More information about the gmp-commit
mailing list