[Gmp-commit] /var/hg/gmp: 21 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Aug 29 18:18:46 CEST 2013
details: /var/hg/gmp/rev/bb2f3994ff5f
changeset: 15933:bb2f3994ff5f
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:39:44 2013 +0200
description:
Rewrite.
details: /var/hg/gmp/rev/0f3e9bb2155d
changeset: 15934:0f3e9bb2155d
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:40:32 2013 +0200
description:
New file.
details: /var/hg/gmp/rev/1e678d02cc50
changeset: 15935:1e678d02cc50
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:40:42 2013 +0200
description:
New file.
details: /var/hg/gmp/rev/6145d67a4549
changeset: 15936:6145d67a4549
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:42:29 2013 +0200
description:
Replace alpha sqr_diagonal implementations by one sqr_diag_addlsh1.
details: /var/hg/gmp/rev/651d92c0366f
changeset: 15937:651d92c0366f
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:43:44 2013 +0200
description:
Replace powerpc32 sqr_diagonal implementation by sqr_diag_addlsh1.
details: /var/hg/gmp/rev/f4afc53adbd2
changeset: 15938:f4afc53adbd2
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:44:43 2013 +0200
description:
New file.
details: /var/hg/gmp/rev/fe5b5dd9317d
changeset: 15939:fe5b5dd9317d
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:46:23 2013 +0200
description:
Fix comment typos.
details: /var/hg/gmp/rev/5b32c45d29ff
changeset: 15940:5b32c45d29ff
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 16:57:25 2013 +0200
description:
Use "test R8(reg)" instead of "bt".
details: /var/hg/gmp/rev/ef41d169dba1
changeset: 15941:ef41d169dba1
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 18:05:36 2013 +0200
description:
New file, closely based on copyi-palignr.asm.
details: /var/hg/gmp/rev/3e7af4c44d40
changeset: 15942:3e7af4c44d40
user: Torbjorn Granlund <tege at gmplib.org>
date: Fri Aug 23 18:08:02 2013 +0200
description:
Trivially simplify wind-down code.
details: /var/hg/gmp/rev/3272fcdafcaf
changeset: 15943:3272fcdafcaf
user: Torbjorn Granlund <tege at gmplib.org>
date: Wed Aug 28 23:09:47 2013 +0200
description:
Grab default divrem_1.asm for sbr and thereby for ibr and hwl.
details: /var/hg/gmp/rev/dea09c0000a0
changeset: 15944:dea09c0000a0
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 16:55:02 2013 +0200
description:
Fix typo.
details: /var/hg/gmp/rev/6da67516e0ab
changeset: 15945:6da67516e0ab
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 17:17:02 2013 +0200
description:
Handle mulx insn using m4, allowing use without assembler support.
details: /var/hg/gmp/rev/f6f0e7f22ceb
changeset: 15946:f6f0e7f22ceb
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 17:17:36 2013 +0200
description:
More of last change.
details: /var/hg/gmp/rev/b5339c79bfc5
changeset: 15947:b5339c79bfc5
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 17:18:06 2013 +0200
description:
Fix comment typo.
details: /var/hg/gmp/rev/a8cdcbd95ae3
changeset: 15948:a8cdcbd95ae3
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 17:25:46 2013 +0200
description:
(x86): Remove any mulx paths. Let bwl path = hwl path.
(fat_path): Add coreihwl.
details: /var/hg/gmp/rev/6c85622c4c10
changeset: 15949:6c85622c4c10
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 17:26:44 2013 +0200
description:
Cosmetic change.
details: /var/hg/gmp/rev/ede4cbbb7737
changeset: 15950:ede4cbbb7737
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 17:49:58 2013 +0200
description:
Support Haswell.
details: /var/hg/gmp/rev/06c60e7089bf
changeset: 15951:06c60e7089bf
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 18:10:01 2013 +0200
description:
Add nops to ABI=32 operand extension code.
details: /var/hg/gmp/rev/f6b6b6dd8b8c
changeset: 15952:f6b6b6dd8b8c
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 18:17:44 2013 +0200
description:
Clean up some bundlings.
details: /var/hg/gmp/rev/00242b464786
changeset: 15953:00242b464786
user: Torbjorn Granlund <tege at gmplib.org>
date: Thu Aug 29 18:18:37 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 54 ++
configure.ac | 6 +-
mpn/alpha/aorslsh1_n.asm | 227 +++--------
mpn/alpha/aorslsh2_n.asm | 156 ++++++++
mpn/alpha/ev6/aorslsh1_n.asm | 161 ++++++++
mpn/alpha/ev6/sqr_diagonal.asm | 115 ------
mpn/alpha/sqr_diag_addlsh1.asm | 82 ++++
mpn/alpha/sqr_diagonal.asm | 65 ---
mpn/arm/v6/sqr_basecase.asm | 2 +-
mpn/ia64/add_n_sub_n.asm | 1 +
mpn/ia64/aors_n.asm | 13 +-
mpn/ia64/aorsorrlshC_n.asm | 2 +
mpn/ia64/cnd_aors_n.asm | 4 +-
mpn/ia64/divrem_2.asm | 3 +-
mpn/ia64/logops_n.asm | 2 +
mpn/ia64/lorrshift.asm | 2 +
mpn/ia64/lshiftc.asm | 2 +
mpn/ia64/mod_34lsub1.asm | 1 +
mpn/ia64/mul_2.asm | 2 +-
mpn/ia64/popcount.asm | 1 +
mpn/ia64/rsh1aors_n.asm | 2 +
mpn/powerpc32/sqr_diag_addlsh1.asm | 68 +++
mpn/powerpc32/sqr_diagonal.asm | 103 -----
mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm | 82 ++++
mpn/x86_64/atom/copyd.asm | 2 +-
mpn/x86_64/atom/copyi.asm | 2 +-
mpn/x86_64/coreihwl/addmul_2.asm | 227 ++++++++++++
mpn/x86_64/coreihwl/aorsmul_1.asm | 186 ++++++++++
mpn/x86_64/coreihwl/mul_1.asm | 142 +++++++
mpn/x86_64/coreihwl/mul_2.asm | 162 ++++++++
mpn/x86_64/coreihwl/mul_basecase.asm | 431 +++++++++++++++++++++++
mpn/x86_64/coreihwl/mulx/addmul_2.asm | 227 ------------
mpn/x86_64/coreihwl/mulx/aorsmul_1.asm | 187 ----------
mpn/x86_64/coreihwl/mulx/mul_1.asm | 142 -------
mpn/x86_64/coreihwl/mulx/mul_2.asm | 162 --------
mpn/x86_64/coreihwl/mulx/mul_basecase.asm | 431 -----------------------
mpn/x86_64/coreihwl/mulx/sqr_basecase.asm | 474 -------------------------
mpn/x86_64/coreihwl/sqr_basecase.asm | 476 ++++++++++++++++++++++++++
mpn/x86_64/coreisbr/divrem_1.asm | 26 +
mpn/x86_64/divrem_1.asm | 3 +-
mpn/x86_64/fastsse/com-palignr.asm | 291 +++++++++++++++
mpn/x86_64/fastsse/copyd-palignr.asm | 44 +-
mpn/x86_64/fastsse/copyi-palignr.asm | 44 +-
mpn/x86_64/fastsse/copyi.asm | 8 +-
mpn/x86_64/fastsse/lshift-movdqu2.asm | 8 +-
mpn/x86_64/fastsse/lshiftc-movdqu2.asm | 8 +-
mpn/x86_64/fastsse/rshift-movdqu2.asm | 8 +-
mpn/x86_64/fastsse/tabselect.asm | 6 +-
mpn/x86_64/fat/fat.c | 6 +
mpn/x86_64/x86_64-defs.m4 | 57 +++
50 files changed, 2776 insertions(+), 2140 deletions(-)
diffs (truncated from 5553 to 300 lines):
diff -r 4b3679330116 -r 00242b464786 ChangeLog
--- a/ChangeLog Fri Aug 16 17:57:19 2013 +0200
+++ b/ChangeLog Thu Aug 29 18:18:37 2013 +0200
@@ -1,3 +1,57 @@
+2013-08-29 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/ia64/aors_n.asm: Clean up some bundlings.
+
+ * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Support Haswell.
+ (fake_cpuid_table): Likewise.
+
+ * configure.ac (x86): Remove any mulx paths. Let bwl path = hwl path.
+ (fat_path): Add coreihwl.
+
+ * mpn/x86_64/coreihwl/aorsmul_1.asm: Move from `mulx' directory, use
+ mulx() macro.
+ * mpn/x86_64/coreihwl/mul_1.asm: Likewise.
+ * mpn/x86_64/coreihwl/mul_2.asm: Likewise.
+ * mpn/x86_64/coreihwl/mul_basecase.asm: Likewise.
+ * mpn/x86_64/coreihwl/sqr_basecase.asm: Likewise.
+
+ * mpn/x86_64/x86_64-defs.m4 (mulx): New macro.
+ (regnum, regnumh, ix): Supporting macros.
+
+2013-08-28 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/coreisbr/divrem_1.asm: New file.
+
+2013-08-23 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/fastsse/com-palignr.asm: New file, closely based on
+ copyi-palignr.asm.
+
+ * mpn/x86_64/fastsse/copyi.asm Use "test R8(reg)" instead of "bt".
+ * mpn/x86_64/fastsse/copyd-palignr.asm: Likewise.
+ * mpn/x86_64/fastsse/copyi-palignr.asm: Likewise.
+ * mpn/x86_64/fastsse/lshift-movdqu2.asm: Likewise.
+ * mpn/x86_64/fastsse/lshiftc-movdqu2.asm: Likewise.
+ * mpn/x86_64/fastsse/rshift-movdqu2.asm: Likewise.
+ * mpn/x86_64/fastsse/tabselect.asm: Likewise.
+
+ * mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm: New file.
+
+ * mpn/alpha/aorslsh2_n.asm: New file.
+ * mpn/alpha/aorslsh1_n.asm: Rewrite.
+ * mpn/alpha/ev6/aorslsh1_n.asm: New file.
+
+2013-08-21 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/alpha/sqr_diag_addlsh1.asm: New file.
+ * mpn/alpha/sqr_diagonal.asm: Remove.
+ * mpn/alpha/ev6/sqr_diagonal.asm: Remove.
+
+2013-08-20 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc32/sqr_diag_addlsh1.asm: New file.
+ * mpn/powerpc32/sqr_diagonal.asm: Remove.
+
2013-08-15 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/coreihwl/mulx/sqr_basecase.asm: New file.
diff -r 4b3679330116 -r 00242b464786 configure.ac
--- a/configure.ac Fri Aug 16 17:57:19 2013 +0200
+++ b/configure.ac Thu Aug 29 18:18:37 2013 +0200
@@ -1737,13 +1737,13 @@
gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
- path_64="x86_64/coreihwl/mulx x86_64/coreihwl x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+ path_64="x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
;;
coreibwl)
gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
- path_64="x86_64/mulx/adx x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+ path_64="x86_64/coreihwl x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
# extra_functions_64="missing" # enable for bmi2/adx simulation
;;
atom)
@@ -2104,7 +2104,7 @@
fat_path="x86_64 x86_64/fat
x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat
x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
- x86_64/atom x86_64/nano"
+ x86_64/coreihwl x86_64/atom x86_64/nano"
fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
fi
diff -r 4b3679330116 -r 00242b464786 mpn/alpha/aorslsh1_n.asm
--- a/mpn/alpha/aorslsh1_n.asm Fri Aug 16 17:57:19 2013 +0200
+++ b/mpn/alpha/aorslsh1_n.asm Thu Aug 29 18:18:37 2013 +0200
@@ -1,6 +1,6 @@
dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
-dnl Copyright 2003 Free Software Foundation, Inc.
+dnl Copyright 2003, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -20,17 +20,10 @@
include(`../config.m4')
C cycles/limb
-C EV4: 12.5
+C EV4: ?
C EV5: 6.25
-C EV6: 4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
+C EV6: 4.5
-C TODO
-C * Write special version for ev6, as this is a slowdown for 100 < n < 2200
-C compared to separate mpn_lshift and mpn_add_n.
-C * Use addq instead of sll for left shift, and similarly cmplt instead of srl
-C for right shift.
-
-dnl INPUT PARAMETERS
define(`rp',`r16')
define(`up',`r17')
define(`vp',`r18')
@@ -38,12 +31,8 @@
define(`u0', `r8')
define(`u1', `r1')
-define(`u2', `r2')
-define(`u3', `r3')
define(`v0', `r4')
define(`v1', `r5')
-define(`v2', `r6')
-define(`v3', `r7')
define(`cy0', `r0')
define(`cy1', `r20')
@@ -67,168 +56,98 @@
ASM_START()
PROLOGUE(func)
- lda n, -4(n)
- bis r31, r31, cy1
- and n, 3, r1
- beq r1, $Lb00
- cmpeq r1, 1, r2
- bne r2, $Lb01
- cmpeq r1, 2, r2
- bne r2, $Lb10
-$Lb11: C n = 3, 7, 11, ...
- ldq v0, 0(vp)
+ and n, 2, cy0
+ blbs n, L(bx1)
+L(bx0): ldq v1, 0(vp)
+ ldq u1, 0(up)
+ nop
+ bne cy0, L(b10)
+
+L(b00): lda vp, 48(vp)
+ lda up, -16(up)
+ lda rp, -8(rp)
+ br r31, L(lo0)
+
+L(b10): lda vp, 32(vp)
+ lda rp, 8(rp)
+ lda cy0, 0(r31)
+ br r31, L(lo2)
+
+L(bx1): ldq v0, 0(vp)
ldq u0, 0(up)
- ldq v1, 8(vp)
- ldq u1, 8(up)
- ldq v2, 16(vp)
- ldq u2, 16(up)
+ lda cy1, 0(r31)
+ beq cy0, L(b01)
+
+L(b11): lda vp, 40(vp)
+ lda up, -24(up)
+ lda rp, 16(rp)
+ br r31, L(lo3)
+
+L(b01): lda n, -4(n)
+ ble n, L(end)
lda vp, 24(vp)
- lda up, 24(up)
- bge n, $Loop
- br r31, $Lcj3
-$Lb10: C n = 2, 6, 10, ...
- bis r31, r31, cy0
- ldq v1, 0(vp)
- ldq u1, 0(up)
- ldq v2, 8(vp)
- ldq u2, 8(up)
- lda rp, -8(rp)
- blt n, $Lcj2
- ldq v3, 16(vp)
- ldq u3, 16(up)
- lda vp, 48(vp)
- lda up, 16(up)
- br r31, $LL10
-$Lb01: C n = 1, 5, 9, ...
- ldq v2, 0(vp)
- ldq u2, 0(up)
- lda rp, -16(rp)
- blt n, $Lcj1
- ldq v3, 8(vp)
- ldq u3, 8(up)
- ldq v0, 16(vp)
- ldq u0, 16(up)
- lda vp, 40(vp)
- lda up, 8(up)
- lda rp, 32(rp)
- br r31, $LL01
-$Lb00: C n = 4, 8, 12, ...
- bis r31, r31, cy0
- ldq v3, 0(vp)
- ldq u3, 0(up)
- ldq v0, 8(vp)
- ldq u0, 8(up)
- ldq v1, 16(vp)
+ lda up, -8(up)
+
+ ALIGN(16)
+L(top): addq v0, v0, sl C left shift vlimb
+ ldq v1, -16(vp)
+ ADDSUB u0, sl, ps C ulimb + (vlimb << 1)
+ cmplt v0, r31, cy0 C carry out #1
ldq u1, 16(up)
- lda vp, 32(vp)
- lda rp, 8(rp)
- br r31, $LL00x
- ALIGN(16)
-C 0
-$Loop: sll v0, 1, sl C left shift vlimb
- ldq v3, 0(vp)
-C 1
- ADDSUB u0, sl, ps C ulimb + (vlimb << 1)
- ldq u3, 0(up)
-C 2
ADDSUB ps, cy1, rr C consume carry from previous operation
- srl v0, 63, cy0 C carry out #1
-C 3
CARRY( ps, u0, cy) C carry out #2
stq rr, 0(rp)
-C 4
addq cy, cy0, cy0 C combine carry out #1 and #2
CARRY( rr, ps, cy) C carry out #3
-C 5
addq cy, cy0, cy0 C final carry out
lda vp, 32(vp) C bookkeeping
-C 6
-$LL10: sll v1, 1, sl
- ldq v0, -24(vp)
-C 7
+L(lo0): addq v1, v1, sl
+ ldq v0, -40(vp)
ADDSUB u1, sl, ps
- ldq u0, 8(up)
-C 8
+ cmplt v1, r31, cy1
+ ldq u0, 24(up)
ADDSUB ps, cy0, rr
- srl v1, 63, cy1
-C 9
- CARRY( ps, u1, cy)
- stq rr, 8(rp)
-C 10
- addq cy, cy1, cy1
- CARRY( rr, ps, cy)
-C 11
- addq cy, cy1, cy1
- lda rp, 32(rp) C bookkeeping
-C 12
-$LL01: sll v2, 1, sl
- ldq v1, -16(vp)
-C 13
- ADDSUB u2, sl, ps
- ldq u1, 16(up)
-C 14
- ADDSUB ps, cy1, rr
- srl v2, 63, cy0
-C 15
- CARRY( ps, u2, cy)
- stq rr, -16(rp)
-C 16
- addq cy, cy0, cy0
- CARRY( rr, ps, cy)
-C 17
- addq cy, cy0, cy0
-$LL00x: lda up, 32(up) C bookkeeping
-C 18
- sll v3, 1, sl
- ldq v2, -8(vp)
-C 19
- ADDSUB u3, sl, ps
- ldq u2, -8(up)
-C 20
- ADDSUB ps, cy0, rr
- srl v3, 63, cy1
-C 21
- CARRY( ps, u3, cy)
- stq rr, -8(rp)
-C 22
More information about the gmp-commit
mailing list