[Gmp-commit] /var/hg/gmp: 6 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Mar 14 22:45:47 CET 2013


details:   /var/hg/gmp/rev/9150ec471dd6
changeset: 15578:9150ec471dd6
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 14 15:34:55 2013 +0100
description:
Avoid ARM conditional insn execution.

details:   /var/hg/gmp/rev/581c2fb98e17
changeset: 15579:581c2fb98e17
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 14 22:25:47 2013 +0100
description:
(ADDSUB): Remove unused definition.

details:   /var/hg/gmp/rev/a5395f469f70
changeset: 15580:a5395f469f70
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 14 22:26:35 2013 +0100
description:
Remove a redundant ASM_START.

details:   /var/hg/gmp/rev/abab3a171532
changeset: 15581:abab3a171532
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 14 22:44:48 2013 +0100
description:
Provide ARM64 cnd_add_n and cnd_sub_n.

details:   /var/hg/gmp/rev/127b13d0a7bb
changeset: 15582:127b13d0a7bb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 14 22:45:29 2013 +0100
description:
Provide IA64 cnd_add_n and cnd_sub_n.

details:   /var/hg/gmp/rev/6dbd2044d1ce
changeset: 15583:6dbd2044d1ce
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Mar 14 22:45:42 2013 +0100
description:
ChangeLog

diffstat:

 ChangeLog                |   10 +
 mpn/arm/cnd_aors_n.asm   |   28 ++--
 mpn/arm64/aors_n.asm     |    2 -
 mpn/arm64/cnd_aors_n.asm |   88 ++++++++++++++++
 mpn/ia64/aors_n.asm      |    3 +-
 mpn/ia64/cnd_aors_n.asm  |  248 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 361 insertions(+), 18 deletions(-)

diffs (truncated from 476 to 300 lines):

diff -r d9d55fa98fd5 -r 6dbd2044d1ce ChangeLog
--- a/ChangeLog	Thu Mar 14 10:35:21 2013 +0100
+++ b/ChangeLog	Thu Mar 14 22:45:42 2013 +0100
@@ -1,5 +1,15 @@
 2013-03-14  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/ia64/cnd_aors_n.asm: New file.
+
+	* mpn/arm64/cnd_aors_n.asm: New file.
+
+	* mpn/arm64/aors_n.asm (ADDSUB): Remove unused definition.
+
+	* mpn/ia64/aors_n.asm: Remove a redundant ASM_START.
+
+	* mpn/arm/cnd_aors_n.asm: Avoid ARM conditional insn execution.
+
 	* mpn/x86_64/missing.asm: Move from mulx/adx since we cannot currently
 	prune missing.asm from path.
 	* mpn/x86_64/mulx/adx/missing-call.m4: Likewise.
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/arm/cnd_aors_n.asm
--- a/mpn/arm/cnd_aors_n.asm	Thu Mar 14 10:35:21 2013 +0100
+++ b/mpn/arm/cnd_aors_n.asm	Thu Mar 14 22:45:42 2013 +0100
@@ -1,6 +1,6 @@
 dnl  ARM mpn_cnd_add_n, mpn_cnd_sub_n
 
-dnl  Copyright 2012 Free Software Foundation, Inc.
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -55,11 +55,11 @@
 	push	{r4-r11}
 	ldr	n, [sp, #32]
 
+	cmp	cnd, #1
+	sbc	cnd, cnd, cnd		C conditionally set to 0xffffffff
+
 	INITCY				C really only needed for n = 0 (mod 4)
 
-	teq	cnd, #0			C could use this for clearing/setting cy
-	mvnne	cnd, #0			C conditionally set to 0xffffffff
-
 	ands	r4, n, #3
 	beq	L(top)
 	cmp	r4, #2
@@ -68,9 +68,9 @@
 
 L(b3):	ldm	vp!, {r4,r5,r6}
 	ldm	up!, {r8,r9,r10}
-	and	r4, r4, cnd
-	and	r5, r5, cnd
-	and	r6, r6, cnd
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	bic	r6, r6, cnd
 	ADDSUB	r8, r8, r4
 	ADDSUBC	r9, r9, r5
 	ADDSUBC	r10, r10, r6
@@ -82,8 +82,8 @@
 
 L(b2):	ldm	vp!, {r4,r5}
 	ldm	up!, {r8,r9}
-	and	r4, r4, cnd
-	and	r5, r5, cnd
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
 	ADDSUB	r8, r8, r4
 	ADDSUBC	r9, r9, r5
 	stm	rp!, {r8,r9}
@@ -94,7 +94,7 @@
 
 L(b1):	ldr	r4, [vp], #4
 	ldr	r8, [up], #4
-	and	r4, r4, cnd
+	bic	r4, r4, cnd
 	ADDSUB	r8, r8, r4
 	str	r8, [rp], #4
 	sub	n, n, #1
@@ -103,10 +103,10 @@
 
 L(top):	ldm	vp!, {r4,r5,r6,r7}
 	ldm	up!, {r8,r9,r10,r11}
-	and	r4, r4, cnd
-	and	r5, r5, cnd
-	and	r6, r6, cnd
-	and	r7, r7, cnd
+	bic	r4, r4, cnd
+	bic	r5, r5, cnd
+	bic	r6, r6, cnd
+	bic	r7, r7, cnd
 	ADDSUBC	r8, r8, r4
 	ADDSUBC	r9, r9, r5
 	ADDSUBC	r10, r10, r6
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/arm64/aors_n.asm
--- a/mpn/arm64/aors_n.asm	Thu Mar 14 10:35:21 2013 +0100
+++ b/mpn/arm64/aors_n.asm	Thu Mar 14 22:45:42 2013 +0100
@@ -33,7 +33,6 @@
 define(`n',  `x3')
 
 ifdef(`OPERATION_add_n', `
-  define(`ADDSUB',	adds)
   define(`ADDSUBC',	adcs)
   define(`CLRCY',	`cmn	xzr, xzr')
   define(`SETCY',	`cmp	$1, #1')
@@ -41,7 +40,6 @@
   define(`func',	mpn_add_n)
   define(`func_nc',	mpn_add_nc)')
 ifdef(`OPERATION_sub_n', `
-  define(`ADDSUB',	subs)
   define(`ADDSUBC',	sbcs)
   define(`CLRCY',	`cmp	xzr, xzr')
   define(`SETCY',	`subs	$1, xzr, $1')
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/arm64/cnd_aors_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm64/cnd_aors_n.asm	Thu Mar 14 22:45:42 2013 +0100
@@ -0,0 +1,88 @@
+dnl  ARM64 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C Cortex-A53	 ?
+C Cortex-A57	 ?
+
+changecom(@&*$)
+
+define(`cnd',	`x0')
+define(`rp',	`x1')
+define(`up',	`x2')
+define(`vp',	`x3')
+define(`n',	`x4')
+
+ifdef(`OPERATION_cnd_add_n', `
+  define(`ADDSUBC',      adcs)
+  define(`CLRCY',	`cmn	xzr, xzr')
+  define(`RETVAL',	`adc	x0, xzr, xzr')
+  define(func,		mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+  define(`ADDSUBC',      sbcs)
+  define(`CLRCY',	`cmp	xzr, xzr')
+  define(`RETVAL',	`sbc	x0, xzr, xzr
+			and	x0, x0, #1')
+  define(func,		mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	cmp	cnd, #1
+	sbc	cnd, cnd, cnd
+
+	CLRCY				C really only needed for n = 0 (mod 4)
+
+	tbz	n, #0, L(1)
+	ldr	x10, [up], #8
+	ldr	x12, [vp], #8
+	bic	x6, x12, cnd
+	ADDSUBC	x8, x10, x6
+	sub	n, n, #1
+	str	x8, [rp], #8
+	cbz	n, L(rt)
+
+L(1):	ldp	x10, x11, [up], #16
+	ldp	x12, x13, [vp], #16
+	sub	n, n, #2
+	cbz	n, L(end)
+
+L(top):	bic	x6, x12, cnd
+	bic	x7, x13, cnd
+	ldp	x12, x13, [vp], #16
+	ADDSUBC	x8, x10, x6
+	ADDSUBC	x9, x11, x7
+	ldp	x10, x11, [up], #16
+	sub	n, n, #2
+	stp	x8, x9, [rp], #16
+	cbnz	n, L(top)
+
+L(end):	bic	x6, x12, cnd
+	bic	x7, x13, cnd
+	ADDSUBC	x8, x10, x6
+	ADDSUBC	x9, x11, x7
+	stp	x8, x9, [rp]
+L(rt):	RETVAL
+	ret
+EPILOGUE()
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/ia64/aors_n.asm
--- a/mpn/ia64/aors_n.asm	Thu Mar 14 10:35:21 2013 +0100
+++ b/mpn/ia64/aors_n.asm	Thu Mar 14 22:45:42 2013 +0100
@@ -30,7 +30,7 @@
 C    "switch (8 * (n >= 8) + (n mod 8))" to enter it and feed-in code.
 C  * The non-nc code was trimmed cycle for cycle to its current state.  It is
 C    probably hard to save more that an odd cycle there.  The nc code is much
-C    rawer (since tune/speed doesn't have any applicable direct measurements).
+C    cruder (since tune/speed doesn't have any applicable direct measurements).
 C  * Without the nc entry points, this becomes around 1800 bytes of object
 C    code; the nc code adds over 1000 bytes.  We should perhaps sacrifice a
 C    few cycles for the non-nc code and let it fall into the nc code.
@@ -406,7 +406,6 @@
 
 EPILOGUE()
 
-ASM_START()
 PROLOGUE(func)
 	.prologue
 	.save	ar.lc, r2
diff -r d9d55fa98fd5 -r 6dbd2044d1ce mpn/ia64/cnd_aors_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/ia64/cnd_aors_n.asm	Thu Mar 14 22:45:42 2013 +0100
@@ -0,0 +1,248 @@
+dnl  IA-64 mpn_cnd_add_n/mpn_cnd_sub_n.
+
+dnl  Contributed to the GNU project by Torbjörn Granlund.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      ?
+C Itanium 2:    1.5
+
+C INPUT PARAMETERS
+define(`cnd', `r32')
+define(`rp',  `r33')
+define(`up',  `r34')
+define(`vp',  `r35')
+define(`n',   `r36')
+
+ifdef(`OPERATION_cnd_add_n',`
+  define(ADDSUB,	add)
+  define(CND,		ltu)
+  define(INCR,		1)
+  define(LIM,		-1)
+  define(func,    mpn_cnd_add_n)
+')
+ifdef(`OPERATION_cnd_sub_n',`
+  define(ADDSUB,	sub)
+  define(CND,		gtu)
+  define(INCR,		-1)
+  define(LIM,		0)
+  define(func,    mpn_cnd_sub_n)
+')
+
+define(cmpeqor, `cmp.eq.or')
+define(PFDIST, 160)
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`x0',`r20') define(`x1',`r21') define(`x2',`r22') define(`x3',`r23')
+define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
+define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31')
+define(`up1',`up') define(`up2',`r8') define(`upadv',`r1')
+define(`vp1',`vp') define(`vp2',`r9') define(`vpadv',`r11')
+define(`rp1',`rp') define(`rp2',`r10')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+	.prologue
+	.save	ar.lc, r2
+	.body
+ifdef(`HAVE_ABI_32',`
+	addp4	rp = 0, rp		C				M I
+	addp4	up = 0, up		C				M I
+	addp4	vp = 0, vp		C				M I


More information about the gmp-commit mailing list