[Gmp-commit] /var/hg/gmp: 12 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Mar 10 09:32:01 CET 2013


details:   /var/hg/gmp/rev/d3091c1aec19
changeset: 15544:d3091c1aec19
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 01:31:08 2013 +0100
description:
Add a cycle number.

details:   /var/hg/gmp/rev/3cd3fd799c9d
changeset: 15545:3cd3fd799c9d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 01:31:31 2013 +0100
description:
Add a15 specific mul_1 and addmul_1.

details:   /var/hg/gmp/rev/75a2e83f26ab
changeset: 15546:75a2e83f26ab
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:17:47 2013 +0100
description:
Rewrite file from `haswell' subdir.

details:   /var/hg/gmp/rev/02823baddc3e
changeset: 15547:02823baddc3e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:18:35 2013 +0100
description:
New addmul_1 for mulx extension.

details:   /var/hg/gmp/rev/7e2d8d658e3e
changeset: 15548:7e2d8d658e3e
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:19:02 2013 +0100
description:
Rewrite file from `haswell' subdir.

details:   /var/hg/gmp/rev/86b976ff0ce0
changeset: 15549:86b976ff0ce0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:21:21 2013 +0100
description:
Support simulation of mulx and adx.

details:   /var/hg/gmp/rev/63d1e7c5f575
changeset: 15550:63d1e7c5f575
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:22:32 2013 +0100
description:
Remove haswell subdir for now.

details:   /var/hg/gmp/rev/e386b2859273
changeset: 15551:e386b2859273
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:22:59 2013 +0100
description:
Remove haswell subdir for now.

details:   /var/hg/gmp/rev/76e827f5830f
changeset: 15552:76e827f5830f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:27:30 2013 +0100
description:
Simulate some v9-2011 insns

details:   /var/hg/gmp/rev/be7a077f9daa
changeset: 15553:be7a077f9daa
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:28:51 2013 +0100
description:
Add missing header.

details:   /var/hg/gmp/rev/75f6bcea7544
changeset: 15554:75f6bcea7544
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:30:44 2013 +0100
description:
ChangeLog

details:   /var/hg/gmp/rev/bdef598be625
changeset: 15555:bdef598be625
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Mar 10 09:31:56 2013 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                                |   27 +++++
 configure.ac                             |    8 +-
 mpn/arm/v6/mul_1.asm                     |    2 +-
 mpn/arm/v7a/cora15/addmul_1.asm          |   96 +++++++++++++++++++
 mpn/arm/v7a/cora15/mul_1.asm             |   92 ++++++++++++++++++
 mpn/sparc64/ultrasparct3/missing.asm     |   51 ++++++++++
 mpn/sparc64/ultrasparct3/missing.m4      |   60 ++++++++++++
 mpn/x86_64/haswell/mulx/adx/addmul_1.asm |   85 -----------------
 mpn/x86_64/haswell/mulx/mul_1.asm        |   86 -----------------
 mpn/x86_64/mulx/addmul_1.asm             |  138 ++++++++++++++++++++++++++++
 mpn/x86_64/mulx/adx/addmul_1.asm         |  152 +++++++++++++++++++++++++++++++
 mpn/x86_64/mulx/adx/missing-call.m4      |   42 ++++++++
 mpn/x86_64/mulx/adx/missing-inline.m4    |   89 ++++++++++++++++++
 mpn/x86_64/mulx/adx/missing.asm          |  119 ++++++++++++++++++++++++
 mpn/x86_64/mulx/mul_1.asm                |  141 ++++++++++++++++++++++++++++
 tests/mpz/t-cong_2exp.c                  |   70 +++++++++++--
 16 files changed, 1069 insertions(+), 189 deletions(-)

diffs (truncated from 1390 to 300 lines):

diff -r b42278fb6fa2 -r bdef598be625 ChangeLog
--- a/ChangeLog	Sat Mar 09 03:24:54 2013 +0100
+++ b/ChangeLog	Sun Mar 10 09:31:56 2013 +0100
@@ -1,3 +1,30 @@
+2013-03-10  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/mulx/adx/missing.asm: Simulate some mulx/adx insns.
+	* mpn/x86_64/mulx/adx/missing-call.m4: Call variant.
+	* mpn/x86_64/mulx/adx/missing-inline.m4: Inline variant.
+
+	* mpn/sparc64/ultrasparct3/missing.asm: Simulate some v9-2011 insns.
+	* mpn/sparc64/ultrasparct3/missing.m4: Inline or invoke missing.asm for
+	v9-2011 insn.
+
+	* configure.ac: Strip `haswell' from paths for now.
+
+	* mpn/x86_64/mulx/addmul_1.asm: New.
+	* mpn/x86_64/mulx/mul_1.asm: Rewrite file from `haswell' subdir.
+	* mpn/x86_64/mulx/adx/addmul_1.asm: Likewise.
+	* mpn/x86_64/haswell: Remove.
+
+	* mpn/arm/v7a/cora15/mul_1.asm: New file.
+	* mpn/arm/v7a/cora15/addmul_1.asm: New file.
+
+2013-03-09  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/sparc64/ultrasparc1234/add_n.asm: Use g5 instead of g4.
+	* mpn/sparc64/ultrasparc1234/sub_n.asm: Likewise.
+
+	* mpn/sparc64/ultrasparct3/aormul_2.asm: Fix a typo.
+
 2013-03-07  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/arm/v7a/cora9/gmp-mparam.h: New file.
diff -r b42278fb6fa2 -r bdef598be625 configure.ac
--- a/configure.ac	Sat Mar 09 03:24:54 2013 +0100
+++ b/configure.ac	Sun Mar 10 09:31:56 2013 +0100
@@ -1663,14 +1663,14 @@
       coreihwl)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/haswell x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-	path_64="x86_64/haswell/mulx x86_64/haswell x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path_64="x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	;;
       coreibwl)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-	path="x86/haswell x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-	path_64="x86_64/haswell/mulx/adx x86_64/haswell/mulx x86_64/haswell x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+	path_64="x86_64/mulx/adx x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	;;
       atom)
 	gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
diff -r b42278fb6fa2 -r bdef598be625 mpn/arm/v6/mul_1.asm
--- a/mpn/arm/v6/mul_1.asm	Sat Mar 09 03:24:54 2013 +0100
+++ b/mpn/arm/v6/mul_1.asm	Sun Mar 10 09:31:56 2013 +0100
@@ -24,7 +24,7 @@
 C XScale	 -
 C Cortex-A8	 ?
 C Cortex-A9	 3.25
-C Cortex-A15	 ?
+C Cortex-A15	 4
 
 C TODO
 C  * Micro-optimise feed-in code.
diff -r b42278fb6fa2 -r bdef598be625 mpn/arm/v7a/cora15/addmul_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/addmul_1.asm	Sun Mar 10 09:31:56 2013 +0100
@@ -0,0 +1,96 @@
+dnl  ARM mpn_addmul_1 optimised for A15.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A8	 ?
+C Cortex-A9	 6.5			3.25
+C Cortex-A15	 3			this
+
+
+C This runs well on A15 but very poorly on A9.  We have made no effort at
+C improving its A9 performance, as doubling the speed seems hard.
+
+C This is armv5 code, optimized for the armv7a cpu A15.  Its location in the
+C GMP file structure might be misleading.
+
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`v0', `r3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	push	{r4-r8}
+
+	adds	r0, r0, #0	C clear carry
+
+	tst	n, #1
+	beq	L(bx0)
+
+L(bx1):	mov	r5, #0
+	ldr	r8, [up], #4
+	tst	n, #2
+	beq	L(lo1)
+	b	L(lo3)
+
+L(bx0):	mov	r7, #0
+	ldr	r8, [up], #4
+	adds	r0, r0, #0
+	tst	n, #2
+	beq	L(lo0)
+	b	L(lo2)
+
+L(top):	ldr	r8, [up], #4
+	str	r6, [rp, #-4]
+L(lo0):	ldr	r4, [rp], #4
+	mov	r5, #0
+	umlal	r4, r5, r8, v0
+	adds	r4, r4, r7
+	ldr	r8, [up], #4
+	str	r4, [rp, #-4]
+L(lo3):	ldr	r6, [rp], #4
+	mov	r7, #0
+	umlal	r6, r7, r8, v0
+	adcs	r6, r6, r5
+	ldr	r8, [up], #4
+	str	r6, [rp, #-4]
+L(lo2):	ldr	r4, [rp], #4
+	mov	r5, #0
+	umlal	r4, r5, r8, v0
+	adcs	r4, r4, r7
+	ldr	r8, [up], #4
+	str	r4, [rp, #-4]
+L(lo1):	ldr	r6, [rp], #4
+	mov	r7, #0
+	umlal	r6, r7, r8, v0
+	adcs	r6, r6, r5
+	adc	r7, r7, #0
+	subs	n, n, #4
+	bgt	L(top)
+
+	str	r6, [rp, #-4]
+	mov	r0, r7
+	pop	{r4-r8}
+	bx	lr
+EPILOGUE()
diff -r b42278fb6fa2 -r bdef598be625 mpn/arm/v7a/cora15/mul_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v7a/cora15/mul_1.asm	Sun Mar 10 09:31:56 2013 +0100
@@ -0,0 +1,92 @@
+dnl  ARM mpn_mul_1 optimised for A15.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb		best
+C StrongARM:	 -
+C XScale	 -
+C Cortex-A8	 ?
+C Cortex-A9	 5.25			3.25
+C Cortex-A15	 2.25			this
+
+
+C This runs well on A15 but very poorly on A9.  By scheduling loads and adds
+C it is possible to get good A9 performance as well, but at the cost of using
+C many more (callee-saves) registers.
+
+C This is armv5 code, optimized for the armv7a cpu A15.  Its location in the
+C GMP file structure might be misleading.
+
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`v0', `r3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1c)
+	ldr	r12, [sp]
+	b	L(ent)
+EPILOGUE()
+PROLOGUE(mpn_mul_1)
+	mov	r12, #0
+L(ent):	push	{r4-r7}
+
+	ldr	r6, [up], #4
+	tst	n, #1
+	beq	L(bx0)
+
+L(bx1):	umull	r4, r7, r6, v0
+	adds	r4, r4, r12
+	tst	n, #2
+	beq	L(lo1)
+	b	L(lo3)
+
+L(bx0):	umull	r4, r5, r6, v0
+	adds	r4, r4, r12
+	tst	n, #2
+	beq	L(lo0)
+	b	L(lo2)
+
+L(top):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r5, r6, v0
+	adds	r4, r4, r7
+L(lo0):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r7, r6, v0
+	adcs	r4, r4, r5
+L(lo3):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r5, r6, v0
+	adcs	r4, r4, r7
+L(lo2):	ldr	r6, [up], #4
+	str	r4, [rp], #4
+	umull	r4, r7, r6, v0
+	adcs	r4, r4, r5
+L(lo1):	adc	r7, r7, #0
+	subs	n, n, #4
+	bgt	L(top)
+
+	str	r4, [rp]
+	mov	r0, r7
+	pop	{r4-r7}
+	bx	lr
+EPILOGUE()
diff -r b42278fb6fa2 -r bdef598be625 mpn/sparc64/ultrasparct3/missing.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/sparc64/ultrasparct3/missing.asm	Sun Mar 10 09:31:56 2013 +0100
@@ -0,0 +1,51 @@
+dnl  SPARC v9-2011 simulation support.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(__gmpn_umulh)
+	save	%sp, -176, %sp
+	ldx	[%sp+2047+176+256], %o0
+	ldx	[%sp+2047+176+256+8], %o1
+	rd	%ccr, %o4
+	srl	%o0, 0, %l4
+	srl	%o1, 0, %l1
+	srlx	%o1, 32, %o1


More information about the gmp-commit mailing list