[Gmp-commit] /var/hg/gmp: Provide basic set of loongarch/64 asm files.
    mercurial at gmplib.org 
    mercurial at gmplib.org
       
    Sat Jul 15 12:32:27 CEST 2023
    
    
  
details:   /var/hg/gmp/rev/7e88bbf045ab
changeset: 18386:7e88bbf045ab
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Sat Jul 15 12:18:07 2023 +0200
description:
Provide basic set of loongarch/64 asm files.
diffstat:
 .bootstrap                             |    7 +
 ChangeLog                              |   24 +++
 configure.ac                           |    5 +
 mpn/arm64/com.asm                      |   60 +++++----
 mpn/arm64/mul_1.asm                    |    1 +
 mpn/loongarch/64/add_n.asm             |   64 ++++++++++
 mpn/loongarch/64/aorslsh1_n.asm        |   50 ++++++++
 mpn/loongarch/64/aorslsh2_n.asm        |   50 ++++++++
 mpn/loongarch/64/aorslshC_n.asm        |  116 ++++++++++++++++++
 mpn/loongarch/64/aorsmul_1.asm         |   79 ++++++++++++
 mpn/loongarch/64/cnd_aors_n.asm        |   99 ++++++++++++++++
 mpn/loongarch/64/copyd.asm             |   75 ++++++++++++
 mpn/loongarch/64/copyi.asm             |   73 +++++++++++
 mpn/loongarch/64/lshift.asm            |  120 +++++++++++++++++++
 mpn/loongarch/64/mul_1.asm             |   63 ++++++++++
 mpn/loongarch/64/rshift.asm            |  119 +++++++++++++++++++
 mpn/loongarch/64/sub_n.asm             |  106 +++++++++++++++++
 mpn/riscv/64/aors_n.asm                |  106 ++++++++++++----
 mpn/riscv/64/sec_tabselect.asm         |  125 +++++++++++++++-----
 mpn/s390_64/submul_1.asm               |    6 +-
 mpn/x86/aorsmul_1.asm                  |  202 +++++++++++++++++++++-----------
 mpn/x86/pentium4/sse2/sqr_basecase.asm |    2 +-
 mpn/x86_64/coreibwl/addmul_1.asm       |    4 +-
 mpn/x86_64/coreibwl/mul_basecase.asm   |    5 +-
 mpn/x86_64/fastavx/copyd.asm           |   21 ++-
 mpn/x86_64/fastavx/copyi.asm           |   21 ++-
 mpn/x86_64/zen/aorrlsh_n.asm           |    5 +-
 mpn/x86_64/zen/sbpi1_bdiv_r.asm        |   18 +-
 tests/devel/aors_n.c                   |   13 ++
 29 files changed, 1449 insertions(+), 190 deletions(-)
diffs (truncated from 2015 to 300 lines):
diff -r 47f015491ce4 -r 7e88bbf045ab .bootstrap
--- a/.bootstrap	Thu Jul 06 09:48:50 2023 +0200
+++ b/.bootstrap	Sat Jul 15 12:18:07 2023 +0200
@@ -7,6 +7,13 @@
 autoreconf -i -s
 # aclocal && libtoolize && autoconf && autoheader && automake -a
 
+cp -L ltmain.sh foo;  rm ltmain.sh;  mv foo ltmain.sh
+cp -L ylwrap foo;     rm ylwrap;     mv foo ylwrap
+cp -L install-sh foo; rm install-sh; mv foo install-sh
+cp -L missing foo;    rm missing;    mv foo missing
+cp -L test-driver foo;rm test-driver;mv foo test-driver
+rm -rf autom4te.cache
+
 cat >doc/version.texi <<EOF
 @set UPDATED 19 January 2038
 @set UPDATED-MONTH January 2038
diff -r 47f015491ce4 -r 7e88bbf045ab ChangeLog
--- a/ChangeLog	Thu Jul 06 09:48:50 2023 +0200
+++ b/ChangeLog	Sat Jul 15 12:18:07 2023 +0200
@@ -5,6 +5,12 @@
 	* configfsf.guess: Updated to version 2023-06-23, from gnulib.
 	* configfsf.sub: Updated to version 2023-06-26, from gnulib.
 
+2023-06-29  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac: Make sparc -mptr64 argument optional.
+
+	* configure.ac, config.guess, config.sub, acinclude.m4: Handle power10.
+
 2023-06-23 Marius Hillenbrand <mhillen at linux.ibm.com>
 	   Stefan Liebler <stli at linux.ibm.com>
 
@@ -15,6 +21,13 @@
 	* mpn/s390_64/z13/mul_1.c: New file.
 	* mpn/s390_64/z13/mul_basecase.c: New file.
 
+2022-11-02  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/x86_64/bd1/popcount.asm: Prefer movd to movq for register moves.
+	* mpn/x86_64/bd1/hamdist.asm: Likewise.
+	* mpn/x86_64/core2/hamdist.asm: Likewise.
+	* mpn/x86_64/core2/popcount.asm: Likewise.
+
 2022-10-28 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpz/nextprime.c (findnext): Use TMP_ALLOC_TYPE to allocate
@@ -43,10 +56,21 @@
 	* tests/mpz/t-aorsmul.c: Test the (r,x,x) case.
 	* mpz/aorsmul.c: Speed-up the (r,x,x) case (tx:Fredrik Johansson).
 
+2022-04-24  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/arm/v5/gcd_22.asm: New file.
+
 2022-04-18  Marc Glisse  <marc.glisse at inria.fr>
 
 	* gmpxx.h (mpz_class): Do not use mp directly.
 
+2022-03-30  Torbjörn Granlund  <tg at gmplib.org>
+
+	* configure.ac, config.guess, config.sub, acinclude.m4: Handle Intel
+	tremont separately.
+
+	* longlong.h (loongarch64 umul_ppmm): New #define.
+
 2022-03-18 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* tests/mpz/t-lucm.c: Remove an unused var.
diff -r 47f015491ce4 -r 7e88bbf045ab configure.ac
--- a/configure.ac	Thu Jul 06 09:48:50 2023 +0200
+++ b/configure.ac	Sat Jul 15 12:18:07 2023 +0200
@@ -1015,6 +1015,11 @@
     ;;
 
 
+  loongarch64-*-*)
+    path="loongarch/64"
+    ;;
+
+
   # IRIX 5 and earlier can only run 32-bit o32.
   #
   # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64.  n32 is
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/arm64/com.asm
--- a/mpn/arm64/com.asm	Thu Jul 06 09:48:50 2023 +0200
+++ b/mpn/arm64/com.asm	Sat Jul 15 12:18:07 2023 +0200
@@ -1,6 +1,6 @@
 dnl  ARM64 mpn_com.
 
-dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
+dnl  Copyright 2013, 2020 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -31,9 +31,13 @@
 include(`../config.m4')
 
 C	     cycles/limb
-C Cortex-A53	 2.25
-C Cortex-A57	 1.25
-C X-Gene	 1.75
+C Cortex-A53	 
+C Cortex-A55	 
+C Cortex-A57	 
+C Cortex-A72	 
+C Cortex-A73	 
+C X-Gene	 
+C Apple M1	 
 
 changecom(blah)
 
@@ -48,37 +52,41 @@
 
 C Copy until rp is 128-bit aligned
 	tbz	rp, #3, L(al2)
-	ld1	{v22.1d}, [up], #8
+	ldr	x4, [up],#8
 	sub	n, n, #1
-	mvn	v22.8b, v22.8b
-	st1	{v22.1d}, [rp], #8
+	mvn	x4, x4
+	str	x4, [rp],#8
 
-L(al2):	ld1	{v26.2d}, [up], #16
-	subs	n, n, #6
-	b.lt	L(end)
+L(al2):	ldp	x4,x5, [up],#16
+	sub	n, n, #6
+	tbnz	n, #63, L(end)
 
 	ALIGN(16)
-L(top):	ld1	{v22.2d}, [up], #16
-	mvn	v26.16b, v26.16b
-	st1	{v26.2d}, [rp], #16
-	ld1	{v26.2d}, [up], #16
-	mvn	v22.16b, v22.16b
-	st1	{v22.2d}, [rp], #16
-	subs	n, n, #4
-	b.ge	L(top)
+L(top):	ldp	x6,x7, [up],#32
+	mvn	x4, x4
+	mvn	x5, x5
+	stp	x4,x5, [rp],#32
+	ldp	x4,x5, [up,#-16]
+	mvn	x6, x6
+	mvn	x7, x7
+	stp	x6,x7, [rp,#-16]
+	sub	n, n, #4
+	tbz	n, #63, L(top)
 
-L(end):	mvn	v26.16b, v26.16b
-	st1	{v26.2d}, [rp], #16
+L(end):	mvn	x4, x4
+	mvn	x5, x5
+	stp	x4,x5, [rp],#16
 
 C Copy last 0-3 limbs.  Note that rp is aligned after loop, but not when we
 C arrive here via L(bc)
 L(bc):	tbz	n, #1, L(tl1)
-	ld1	{v22.2d}, [up], #16
-	mvn	v22.16b, v22.16b
-	st1	{v22.2d}, [rp], #16
+	ldp	x4,x5, [up],#16
+	mvn	x4, x4
+	mvn	x5, x5
+	stp	x4,x5, [rp],#16
 L(tl1):	tbz	n, #0, L(tl2)
-	ld1	{v22.1d}, [up]
-	mvn	v22.8b, v22.8b
-	st1	{v22.1d}, [rp]
+	ldr	x4, [up]
+	mvn	x4, x4
+	str	x4, [rp]
 L(tl2):	ret
 EPILOGUE()
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/arm64/mul_1.asm
--- a/mpn/arm64/mul_1.asm	Thu Jul 06 09:48:50 2023 +0200
+++ b/mpn/arm64/mul_1.asm	Sat Jul 15 12:18:07 2023 +0200
@@ -37,6 +37,7 @@
 C Cortex-A57	 7
 C Cortex-A72
 C X-Gene	 4
+C Apple M1	 1
 
 C TODO
 C  * Start first multiply earlier.
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/loongarch/64/add_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/loongarch/64/add_n.asm	Sat Jul 15 12:18:07 2023 +0200
@@ -0,0 +1,64 @@
+dnl  Loongarch mpn_add_n
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2023 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp_arg',`$r4')
+define(`ap',	`$r5')
+define(`bp',	`$r6')
+define(`n',	`$r7')
+
+define(`rp',	`$r8')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	alsl.d	rp, n, rp_arg, 3
+	alsl.d	ap, n, ap, 3
+	alsl.d	bp, n, bp, 3
+	sub.d	n, $r0, n
+	slli.d	n, n, 3
+	or	$r4, $r0, $r0
+
+L(top):	ldx.d	$r14, ap, n
+	ldx.d	$r13, bp, n
+	add.d	$r12, $r14, $r13
+	sltu	$r15, $r12, $r13	C cy0
+	add.d	$r14, $r12, $r4
+	sltu	$r16, $r14, $r4		C cy1 set iff r4=1 & r12=111...1
+	stx.d	$r14, rp, n
+	addi.d	n, n, 8
+	or	$r4, $r15, $r16
+	bnez	n, L(top)
+
+	jr	$r1
+EPILOGUE()
diff -r 47f015491ce4 -r 7e88bbf045ab mpn/loongarch/64/aorslsh1_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/loongarch/64/aorslsh1_n.asm	Sat Jul 15 12:18:07 2023 +0200
@@ -0,0 +1,50 @@
+dnl  Loongarch mpn_addlsh1_n, mpn_sublsh1_n.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2023 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(`ADDSUB', `add.d')
+  define(`CARRY',  `sltu $1,$2,$3')
+  define(`func',   `mpn_addlsh1_n')
    
    
More information about the gmp-commit
mailing list