[Gmp-commit] /var/hg/gmp: 21 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Aug 29 18:18:46 CEST 2013


details:   /var/hg/gmp/rev/bb2f3994ff5f
changeset: 15933:bb2f3994ff5f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:39:44 2013 +0200
description:
Rewrite.

details:   /var/hg/gmp/rev/0f3e9bb2155d
changeset: 15934:0f3e9bb2155d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:40:32 2013 +0200
description:
New file.

details:   /var/hg/gmp/rev/1e678d02cc50
changeset: 15935:1e678d02cc50
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:40:42 2013 +0200
description:
New file.

details:   /var/hg/gmp/rev/6145d67a4549
changeset: 15936:6145d67a4549
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:42:29 2013 +0200
description:
Replace alpha sqr_diagonal implementations by one sqr_diag_addlsh1.

details:   /var/hg/gmp/rev/651d92c0366f
changeset: 15937:651d92c0366f
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:43:44 2013 +0200
description:
Replace powerpc32 sqr_diagonal implementation by sqr_diag_addlsh1.

details:   /var/hg/gmp/rev/f4afc53adbd2
changeset: 15938:f4afc53adbd2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:44:43 2013 +0200
description:
New file.

details:   /var/hg/gmp/rev/fe5b5dd9317d
changeset: 15939:fe5b5dd9317d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:46:23 2013 +0200
description:
Fix comment typos.

details:   /var/hg/gmp/rev/5b32c45d29ff
changeset: 15940:5b32c45d29ff
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 16:57:25 2013 +0200
description:
Use "test R8(reg)" instead of "bt".

details:   /var/hg/gmp/rev/ef41d169dba1
changeset: 15941:ef41d169dba1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 18:05:36 2013 +0200
description:
New file, closely based on copyi-palignr.asm.

details:   /var/hg/gmp/rev/3e7af4c44d40
changeset: 15942:3e7af4c44d40
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Fri Aug 23 18:08:02 2013 +0200
description:
Trivially simplify wind-down code.

details:   /var/hg/gmp/rev/3272fcdafcaf
changeset: 15943:3272fcdafcaf
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Aug 28 23:09:47 2013 +0200
description:
Grab default divrem_1.asm for sbr and thereby for ibr and hwl.

details:   /var/hg/gmp/rev/dea09c0000a0
changeset: 15944:dea09c0000a0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 16:55:02 2013 +0200
description:
Fix typo.

details:   /var/hg/gmp/rev/6da67516e0ab
changeset: 15945:6da67516e0ab
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 17:17:02 2013 +0200
description:
Handle mulx insn using m4, allowing use without assembler support.

details:   /var/hg/gmp/rev/f6f0e7f22ceb
changeset: 15946:f6f0e7f22ceb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 17:17:36 2013 +0200
description:
More of last change.

details:   /var/hg/gmp/rev/b5339c79bfc5
changeset: 15947:b5339c79bfc5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 17:18:06 2013 +0200
description:
Fix comment typo.

details:   /var/hg/gmp/rev/a8cdcbd95ae3
changeset: 15948:a8cdcbd95ae3
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 17:25:46 2013 +0200
description:
(x86): Remove any mulx paths.  Let bwl path = hwl path.
(fat_path): Add coreihwl.

details:   /var/hg/gmp/rev/6c85622c4c10
changeset: 15949:6c85622c4c10
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 17:26:44 2013 +0200
description:
Cosmetic change.

details:   /var/hg/gmp/rev/ede4cbbb7737
changeset: 15950:ede4cbbb7737
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 17:49:58 2013 +0200
description:
Support Haswell.

details:   /var/hg/gmp/rev/06c60e7089bf
changeset: 15951:06c60e7089bf
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 18:10:01 2013 +0200
description:
Add nops to ABI=32 operand extension code.

details:   /var/hg/gmp/rev/f6b6b6dd8b8c
changeset: 15952:f6b6b6dd8b8c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 18:17:44 2013 +0200
description:
Clean up some bundlings.

details:   /var/hg/gmp/rev/00242b464786
changeset: 15953:00242b464786
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 29 18:18:37 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                                     |   54 ++
 configure.ac                                  |    6 +-
 mpn/alpha/aorslsh1_n.asm                      |  227 +++--------
 mpn/alpha/aorslsh2_n.asm                      |  156 ++++++++
 mpn/alpha/ev6/aorslsh1_n.asm                  |  161 ++++++++
 mpn/alpha/ev6/sqr_diagonal.asm                |  115 ------
 mpn/alpha/sqr_diag_addlsh1.asm                |   82 ++++
 mpn/alpha/sqr_diagonal.asm                    |   65 ---
 mpn/arm/v6/sqr_basecase.asm                   |    2 +-
 mpn/ia64/add_n_sub_n.asm                      |    1 +
 mpn/ia64/aors_n.asm                           |   13 +-
 mpn/ia64/aorsorrlshC_n.asm                    |    2 +
 mpn/ia64/cnd_aors_n.asm                       |    4 +-
 mpn/ia64/divrem_2.asm                         |    3 +-
 mpn/ia64/logops_n.asm                         |    2 +
 mpn/ia64/lorrshift.asm                        |    2 +
 mpn/ia64/lshiftc.asm                          |    2 +
 mpn/ia64/mod_34lsub1.asm                      |    1 +
 mpn/ia64/mul_2.asm                            |    2 +-
 mpn/ia64/popcount.asm                         |    1 +
 mpn/ia64/rsh1aors_n.asm                       |    2 +
 mpn/powerpc32/sqr_diag_addlsh1.asm            |   68 +++
 mpn/powerpc32/sqr_diagonal.asm                |  103 -----
 mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm |   82 ++++
 mpn/x86_64/atom/copyd.asm                     |    2 +-
 mpn/x86_64/atom/copyi.asm                     |    2 +-
 mpn/x86_64/coreihwl/addmul_2.asm              |  227 ++++++++++++
 mpn/x86_64/coreihwl/aorsmul_1.asm             |  186 ++++++++++
 mpn/x86_64/coreihwl/mul_1.asm                 |  142 +++++++
 mpn/x86_64/coreihwl/mul_2.asm                 |  162 ++++++++
 mpn/x86_64/coreihwl/mul_basecase.asm          |  431 +++++++++++++++++++++++
 mpn/x86_64/coreihwl/mulx/addmul_2.asm         |  227 ------------
 mpn/x86_64/coreihwl/mulx/aorsmul_1.asm        |  187 ----------
 mpn/x86_64/coreihwl/mulx/mul_1.asm            |  142 -------
 mpn/x86_64/coreihwl/mulx/mul_2.asm            |  162 --------
 mpn/x86_64/coreihwl/mulx/mul_basecase.asm     |  431 -----------------------
 mpn/x86_64/coreihwl/mulx/sqr_basecase.asm     |  474 -------------------------
 mpn/x86_64/coreihwl/sqr_basecase.asm          |  476 ++++++++++++++++++++++++++
 mpn/x86_64/coreisbr/divrem_1.asm              |   26 +
 mpn/x86_64/divrem_1.asm                       |    3 +-
 mpn/x86_64/fastsse/com-palignr.asm            |  291 +++++++++++++++
 mpn/x86_64/fastsse/copyd-palignr.asm          |   44 +-
 mpn/x86_64/fastsse/copyi-palignr.asm          |   44 +-
 mpn/x86_64/fastsse/copyi.asm                  |    8 +-
 mpn/x86_64/fastsse/lshift-movdqu2.asm         |    8 +-
 mpn/x86_64/fastsse/lshiftc-movdqu2.asm        |    8 +-
 mpn/x86_64/fastsse/rshift-movdqu2.asm         |    8 +-
 mpn/x86_64/fastsse/tabselect.asm              |    6 +-
 mpn/x86_64/fat/fat.c                          |    6 +
 mpn/x86_64/x86_64-defs.m4                     |   57 +++
 50 files changed, 2776 insertions(+), 2140 deletions(-)

diffs (truncated from 5553 to 300 lines):

diff -r 4b3679330116 -r 00242b464786 ChangeLog
--- a/ChangeLog	Fri Aug 16 17:57:19 2013 +0200
+++ b/ChangeLog	Thu Aug 29 18:18:37 2013 +0200
@@ -1,3 +1,57 @@
+2013-08-29  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/ia64/aors_n.asm: Clean up some bundlings.
+
+	* mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Support Haswell.
+	(fake_cpuid_table): Likewise.
+
+	* configure.ac (x86): Remove any mulx paths.  Let bwl path = hwl path.
+	(fat_path): Add coreihwl.
+
+	* mpn/x86_64/coreihwl/aorsmul_1.asm: Move from `mulx' directory, use
+	mulx() macro.
+	* mpn/x86_64/coreihwl/mul_1.asm: Likewise.
+	* mpn/x86_64/coreihwl/mul_2.asm: Likewise.
+	* mpn/x86_64/coreihwl/mul_basecase.asm: Likewise.
+	* mpn/x86_64/coreihwl/sqr_basecase.asm: Likewise.
+
+	* mpn/x86_64/x86_64-defs.m4 (mulx): New macro.
+	(regnum, regnumh, ix): Supporting macros.
+
+2013-08-28  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/coreisbr/divrem_1.asm: New file.
+
+2013-08-23  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/fastsse/com-palignr.asm: New file, closely based on
+	copyi-palignr.asm.
+
+	* mpn/x86_64/fastsse/copyi.asm Use "test R8(reg)" instead of "bt".
+	* mpn/x86_64/fastsse/copyd-palignr.asm: Likewise.
+	* mpn/x86_64/fastsse/copyi-palignr.asm: Likewise.
+	* mpn/x86_64/fastsse/lshift-movdqu2.asm: Likewise.
+	* mpn/x86_64/fastsse/lshiftc-movdqu2.asm: Likewise.
+	* mpn/x86_64/fastsse/rshift-movdqu2.asm: Likewise.
+	* mpn/x86_64/fastsse/tabselect.asm: Likewise.
+
+	* mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm: New file.
+
+	* mpn/alpha/aorslsh2_n.asm: New file.
+	* mpn/alpha/aorslsh1_n.asm: Rewrite.
+	* mpn/alpha/ev6/aorslsh1_n.asm: New file.
+
+2013-08-21  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/alpha/sqr_diag_addlsh1.asm: New file.
+	* mpn/alpha/sqr_diagonal.asm: Remove.
+	* mpn/alpha/ev6/sqr_diagonal.asm: Remove.
+
+2013-08-20  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc32/sqr_diag_addlsh1.asm: New file.
+	* mpn/powerpc32/sqr_diagonal.asm: Remove.
+
 2013-08-15  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/coreihwl/mulx/sqr_basecase.asm: New file.
diff -r 4b3679330116 -r 00242b464786 configure.ac
--- a/configure.ac	Fri Aug 16 17:57:19 2013 +0200
+++ b/configure.ac	Thu Aug 29 18:18:37 2013 +0200
@@ -1737,13 +1737,13 @@
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
 	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
-	path_64="x86_64/coreihwl/mulx x86_64/coreihwl x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+	path_64="x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	;;
       coreibwl)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
 	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
-	path_64="x86_64/mulx/adx x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+	path_64="x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	# extra_functions_64="missing"	 # enable for bmi2/adx simulation
 	;;
       atom)
@@ -2104,7 +2104,7 @@
 	fat_path="x86_64 x86_64/fat
 		  x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat
 		  x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
-		  x86_64/atom x86_64/nano"
+		  x86_64/coreihwl x86_64/atom x86_64/nano"
 	fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
       fi
 
diff -r 4b3679330116 -r 00242b464786 mpn/alpha/aorslsh1_n.asm
--- a/mpn/alpha/aorslsh1_n.asm	Fri Aug 16 17:57:19 2013 +0200
+++ b/mpn/alpha/aorslsh1_n.asm	Thu Aug 29 18:18:37 2013 +0200
@@ -1,6 +1,6 @@
 dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
 
-dnl  Copyright 2003 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,17 +20,10 @@
 include(`../config.m4')
 
 C      cycles/limb
-C EV4:    12.5
+C EV4:     ?
 C EV5:     6.25
-C EV6:     4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
+C EV6:     4.5
 
-C TODO
-C  * Write special version for ev6, as this is a slowdown for 100 < n < 2200
-C    compared to separate mpn_lshift and mpn_add_n.
-C  * Use addq instead of sll for left shift, and similarly cmplt instead of srl
-C    for right shift.
-
-dnl  INPUT PARAMETERS
 define(`rp',`r16')
 define(`up',`r17')
 define(`vp',`r18')
@@ -38,12 +31,8 @@
 
 define(`u0', `r8')
 define(`u1', `r1')
-define(`u2', `r2')
-define(`u3', `r3')
 define(`v0', `r4')
 define(`v1', `r5')
-define(`v2', `r6')
-define(`v3', `r7')
 
 define(`cy0', `r0')
 define(`cy1', `r20')
@@ -67,168 +56,98 @@
 
 ASM_START()
 PROLOGUE(func)
-	lda	n, -4(n)
-	bis	r31, r31, cy1
-	and	n, 3, r1
-	beq	r1, $Lb00
-	cmpeq	r1, 1, r2
-	bne	r2, $Lb01
-	cmpeq	r1, 2, r2
-	bne	r2, $Lb10
-$Lb11:	C n = 3, 7, 11, ...
-	ldq	v0, 0(vp)
+	and	n, 2, cy0
+	blbs	n, L(bx1)
+L(bx0):	ldq	v1, 0(vp)
+	ldq	u1, 0(up)
+	nop
+	bne	cy0, L(b10)
+
+L(b00):	lda	vp, 48(vp)
+	lda	up, -16(up)
+	lda	rp, -8(rp)
+	br	r31, L(lo0)
+
+L(b10):	lda	vp, 32(vp)
+	lda	rp, 8(rp)
+	lda	cy0, 0(r31)
+	br	r31, L(lo2)
+
+L(bx1):	ldq	v0, 0(vp)
 	ldq	u0, 0(up)
-	ldq	v1, 8(vp)
-	ldq	u1, 8(up)
-	ldq	v2, 16(vp)
-	ldq	u2, 16(up)
+	lda	cy1, 0(r31)
+	beq	cy0, L(b01)
+
+L(b11):	lda	vp, 40(vp)
+	lda	up, -24(up)
+	lda	rp, 16(rp)
+	br	r31, L(lo3)
+
+L(b01):	lda	n, -4(n)
+	ble	n, L(end)
 	lda	vp, 24(vp)
-	lda	up, 24(up)
-	bge	n, $Loop
-	br	r31, $Lcj3
-$Lb10:	C n = 2, 6, 10, ...
-	bis	r31, r31, cy0
-	ldq	v1, 0(vp)
-	ldq	u1, 0(up)
-	ldq	v2, 8(vp)
-	ldq	u2, 8(up)
-	lda	rp, -8(rp)
-	blt	n, $Lcj2
-	ldq	v3, 16(vp)
-	ldq	u3, 16(up)
-	lda	vp, 48(vp)
-	lda	up, 16(up)
-	br	r31, $LL10
-$Lb01:	C n = 1, 5, 9, ...
-	ldq	v2, 0(vp)
-	ldq	u2, 0(up)
-	lda	rp, -16(rp)
-	blt	n, $Lcj1
-	ldq	v3, 8(vp)
-	ldq	u3, 8(up)
-	ldq	v0, 16(vp)
-	ldq	u0, 16(up)
-	lda	vp, 40(vp)
-	lda	up, 8(up)
-	lda	rp, 32(rp)
-	br	r31, $LL01
-$Lb00:	C n = 4, 8, 12, ...
-	bis	r31, r31, cy0
-	ldq	v3, 0(vp)
-	ldq	u3, 0(up)
-	ldq	v0, 8(vp)
-	ldq	u0, 8(up)
-	ldq	v1, 16(vp)
+	lda	up, -8(up)
+
+	ALIGN(16)
+L(top):	addq	v0, v0, sl	C left shift vlimb
+	ldq	v1, -16(vp)
+	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
+	cmplt	v0, r31, cy0	C carry out #1
 	ldq	u1, 16(up)
-	lda	vp, 32(vp)
-	lda	rp, 8(rp)
-	br	r31, $LL00x
-	ALIGN(16)
-C 0
-$Loop:	sll	v0, 1, sl	C left shift vlimb
-	ldq	v3, 0(vp)
-C 1
-	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
-	ldq	u3, 0(up)
-C 2
 	ADDSUB	ps, cy1, rr	C consume carry from previous operation
-	srl	v0, 63, cy0	C carry out #1
-C 3
 	CARRY(	ps, u0, cy)	C carry out #2
 	stq	rr, 0(rp)
-C 4
 	addq	cy, cy0, cy0	C combine carry out #1 and #2
 	CARRY(	rr, ps, cy)	C carry out #3
-C 5
 	addq	cy, cy0, cy0	C final carry out
 	lda	vp, 32(vp)	C bookkeeping
-C 6
-$LL10:	sll	v1, 1, sl
-	ldq	v0, -24(vp)
-C 7
+L(lo0):	addq	v1, v1, sl
+	ldq	v0, -40(vp)
 	ADDSUB	u1, sl, ps
-	ldq	u0, 8(up)
-C 8
+	cmplt	v1, r31, cy1
+	ldq	u0, 24(up)
 	ADDSUB	ps, cy0, rr
-	srl	v1, 63, cy1
-C 9
-	CARRY(	ps, u1, cy)
-	stq	rr, 8(rp)
-C 10
-	addq	cy, cy1, cy1
-	CARRY(	rr, ps, cy)
-C 11
-	addq	cy, cy1, cy1
-	lda	rp, 32(rp)	C bookkeeping
-C 12
-$LL01:	sll	v2, 1, sl
-	ldq	v1, -16(vp)
-C 13
-	ADDSUB	u2, sl, ps
-	ldq	u1, 16(up)
-C 14
-	ADDSUB	ps, cy1, rr
-	srl	v2, 63, cy0
-C 15
-	CARRY(	ps, u2, cy)
-	stq	rr, -16(rp)
-C 16
-	addq	cy, cy0, cy0
-	CARRY(	rr, ps, cy)
-C 17
-	addq	cy, cy0, cy0
-$LL00x:	lda	up, 32(up)	C bookkeeping
-C 18
-	sll	v3, 1, sl
-	ldq	v2, -8(vp)
-C 19
-	ADDSUB	u3, sl, ps
-	ldq	u2, -8(up)
-C 20
-	ADDSUB	ps, cy0, rr
-	srl	v3, 63, cy1
-C 21
-	CARRY(	ps, u3, cy)
-	stq	rr, -8(rp)
-C 22


More information about the gmp-commit mailing list