[Gmp-commit] /var/hg/gmp: Implement workaround for FreeBSD's stale assembler.
mercurial at gmplib.org
mercurial at gmplib.org
Mon Apr 16 17:06:51 CEST 2012
details: /var/hg/gmp/rev/be64af8a4a0b
changeset: 14836:be64af8a4a0b
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Apr 16 17:06:48 2012 +0200
description:
Implement workaround for FreeBSD's stale assembler.
diffstat:
ChangeLog | 10 ++++++
mpn/x86_64/fastsse/copyd-palignr.asm | 30 +++++++++---------
mpn/x86_64/fastsse/copyi-palignr.asm | 34 +++++++++++---------
mpn/x86_64/x86_64-defs.m4 | 59 +++++++++++++++++++++++++++++++++++-
4 files changed, 102 insertions(+), 31 deletions(-)
diffs (266 lines):
diff -r 1e4085291ae4 -r be64af8a4a0b ChangeLog
--- a/ChangeLog Mon Apr 16 07:35:54 2012 +0200
+++ b/ChangeLog Mon Apr 16 17:06:48 2012 +0200
@@ -1,3 +1,13 @@
+2012-04-16 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/x86_64-defs.m4 (palignr): New macro.
+ * mpn/x86_64/fastsse/copyd-palignr.asm: Use palignr macro.
+ * mpn/x86_64/fastsse/copyi-palignr.asm: Likewise.
+
+ (x86_opcode_regxmm, x86_opcode_regxmm_list): New, made from x86 mmx
+ counterparts.
+ (x86_lookup): Copy from x86/x86-defs.m4.
+
2012-04-15 Marco Bodrato <bodrato at mail.dm.unipi.it>
* tests/mpz/t-bin.c: Add more tests on small values.
diff -r 1e4085291ae4 -r be64af8a4a0b mpn/x86_64/fastsse/copyd-palignr.asm
--- a/mpn/x86_64/fastsse/copyd-palignr.asm Mon Apr 16 07:35:54 2012 +0200
+++ b/mpn/x86_64/fastsse/copyd-palignr.asm Mon Apr 16 17:06:48 2012 +0200
@@ -125,28 +125,28 @@
ALIGN(16)
L(utop):sub $16, n
movdqa -16(up), %xmm1
- palignr $8, %xmm1, %xmm0
+ palignr($8, %xmm1, %xmm0)
movdqa %xmm0, -8(rp)
movdqa -32(up), %xmm2
- palignr $8, %xmm2, %xmm1
+ palignr($8, %xmm2, %xmm1)
movdqa %xmm1, -24(rp)
movdqa -48(up), %xmm3
- palignr $8, %xmm3, %xmm2
+ palignr($8, %xmm3, %xmm2)
movdqa %xmm2, -40(rp)
movdqa -64(up), %xmm0
- palignr $8, %xmm0, %xmm3
+ palignr($8, %xmm0, %xmm3)
movdqa %xmm3, -56(rp)
movdqa -80(up), %xmm1
- palignr $8, %xmm1, %xmm0
+ palignr($8, %xmm1, %xmm0)
movdqa %xmm0, -72(rp)
movdqa -96(up), %xmm2
- palignr $8, %xmm2, %xmm1
+ palignr($8, %xmm2, %xmm1)
movdqa %xmm1, -88(rp)
movdqa -112(up), %xmm3
- palignr $8, %xmm3, %xmm2
+ palignr($8, %xmm3, %xmm2)
movdqa %xmm2, -104(rp)
movdqa -128(up), %xmm0
- palignr $8, %xmm0, %xmm3
+ palignr($8, %xmm0, %xmm3)
movdqa %xmm3, -120(rp)
lea -128(up), up
lea -128(rp), rp
@@ -155,16 +155,16 @@
L(uend):bt $3, R32(n)
jnc 1f
movdqa -16(up), %xmm1
- palignr $8, %xmm1, %xmm0
+ palignr($8, %xmm1, %xmm0)
movdqa %xmm0, -8(rp)
movdqa -32(up), %xmm0
- palignr $8, %xmm0, %xmm1
+ palignr($8, %xmm0, %xmm1)
movdqa %xmm1, -24(rp)
movdqa -48(up), %xmm1
- palignr $8, %xmm1, %xmm0
+ palignr($8, %xmm1, %xmm0)
movdqa %xmm0, -40(rp)
movdqa -64(up), %xmm0
- palignr $8, %xmm0, %xmm1
+ palignr($8, %xmm0, %xmm1)
movdqa %xmm1, -56(rp)
lea -64(up), up
lea -64(rp), rp
@@ -172,10 +172,10 @@
1: bt $2, R32(n)
jnc 1f
movdqa -16(up), %xmm1
- palignr $8, %xmm1, %xmm0
+ palignr($8, %xmm1, %xmm0)
movdqa %xmm0, -8(rp)
movdqa -32(up), %xmm0
- palignr $8, %xmm0, %xmm1
+ palignr($8, %xmm0, %xmm1)
movdqa %xmm1, -24(rp)
lea -32(up), up
lea -32(rp), rp
@@ -183,7 +183,7 @@
1: bt $1, R32(n)
jnc 1f
movdqa -16(up), %xmm1
- palignr $8, %xmm1, %xmm0
+ palignr($8, %xmm1, %xmm0)
movdqa %xmm0, -8(rp)
lea -16(up), up
lea -16(rp), rp
diff -r 1e4085291ae4 -r be64af8a4a0b mpn/x86_64/fastsse/copyi-palignr.asm
--- a/mpn/x86_64/fastsse/copyi-palignr.asm Mon Apr 16 07:35:54 2012 +0200
+++ b/mpn/x86_64/fastsse/copyi-palignr.asm Mon Apr 16 17:06:48 2012 +0200
@@ -127,28 +127,28 @@
sub $16, n
movdqa 104(up), %xmm2
movdqa %xmm0, -128(rp)
-L(um): palignr $8, %xmm2, %xmm3
+L(um): palignr($8, %xmm2, %xmm3)
movdqa 88(up), %xmm1
movdqa %xmm3, 112(rp)
- palignr $8, %xmm1, %xmm2
+ palignr($8, %xmm1, %xmm2)
movdqa 72(up), %xmm0
movdqa %xmm2, 96(rp)
- palignr $8, %xmm0, %xmm1
+ palignr($8, %xmm0, %xmm1)
movdqa 56(up), %xmm3
movdqa %xmm1, 80(rp)
- palignr $8, %xmm3, %xmm0
+ palignr($8, %xmm3, %xmm0)
movdqa 40(up), %xmm2
movdqa %xmm0, 64(rp)
- palignr $8, %xmm2, %xmm3
+ palignr($8, %xmm2, %xmm3)
movdqa 24(up), %xmm1
movdqa %xmm3, 48(rp)
- palignr $8, %xmm1, %xmm2
+ palignr($8, %xmm1, %xmm2)
movdqa 8(up), %xmm0
movdqa %xmm2, 32(rp)
- palignr $8, %xmm0, %xmm1
+ palignr($8, %xmm0, %xmm1)
movdqa -8(up), %xmm3
movdqa %xmm1, 16(rp)
- palignr $8, %xmm3, %xmm0
+ palignr($8, %xmm3, %xmm0)
lea 128(up), up
lea 128(rp), rp
jnc L(utop)
@@ -159,16 +159,16 @@
jnc 1f
movdqa 56(up), %xmm3
movdqa 40(up), %xmm2
- palignr $8, %xmm2, %xmm3
+ palignr($8, %xmm2, %xmm3)
movdqa 24(up), %xmm1
movdqa %xmm3, 48(rp)
- palignr $8, %xmm1, %xmm2
+ palignr($8, %xmm1, %xmm2)
movdqa 8(up), %xmm0
movdqa %xmm2, 32(rp)
- palignr $8, %xmm0, %xmm1
+ palignr($8, %xmm0, %xmm1)
movdqa -8(up), %xmm3
movdqa %xmm1, 16(rp)
- palignr $8, %xmm3, %xmm0
+ palignr($8, %xmm3, %xmm0)
lea 64(up), up
movdqa %xmm0, (rp)
lea 64(rp), rp
@@ -177,10 +177,10 @@
jnc 1f
movdqa 24(up), %xmm1
movdqa 8(up), %xmm0
- palignr $8, %xmm0, %xmm1
+ palignr($8, %xmm0, %xmm1)
movdqa -8(up), %xmm3
movdqa %xmm1, 16(rp)
- palignr $8, %xmm3, %xmm0
+ palignr($8, %xmm3, %xmm0)
lea 32(up), up
movdqa %xmm0, (rp)
lea 32(rp), rp
@@ -189,7 +189,7 @@
jnc 1f
movdqa 8(up), %xmm0
movdqa -8(up), %xmm3
- palignr $8, %xmm3, %xmm0
+ palignr($8, %xmm3, %xmm0)
lea 16(up), up
movdqa %xmm0, (rp)
lea 16(rp), rp
@@ -239,4 +239,8 @@
mov %r9, 16(rp)
1: DOS64_EXIT()
ret
+
+palignr($7, %xmm8, %xmm9)
+palignr($7, %xmm0, %xmm9)
+palignr($7, %xmm8, %xmm1)
EPILOGUE()
diff -r 1e4085291ae4 -r be64af8a4a0b mpn/x86_64/x86_64-defs.m4
--- a/mpn/x86_64/x86_64-defs.m4 Mon Apr 16 07:35:54 2012 +0200
+++ b/mpn/x86_64/x86_64-defs.m4 Mon Apr 16 17:06:48 2012 +0200
@@ -2,7 +2,7 @@
dnl m4 macros for amd64 assembler.
-dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2011
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2011, 2012
dnl Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -211,4 +211,61 @@
define(`PROTECT', `.protected $1')
+dnl Usage: x86_lookup(target, key,value, key,value, ...)
+dnl
+dnl Look for `target' among the `key' parameters.
+dnl
+dnl x86_lookup expands to the corresponding `value', or generates an error
+dnl if `target' isn't found.
+
+define(x86_lookup,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+
+dnl Usage: x86_opcode_regxmm(reg)
+dnl
+dnl Validate the given xmm register, and return its number, 0 to 7.
+
+define(x86_opcode_regxmm,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_regxmm_list)')
+
+define(x86_opcode_regxmm_list,
+``%xmm0',0,
+`%xmm1',1,
+`%xmm2',2,
+`%xmm3',3,
+`%xmm4',4,
+`%xmm5',5,
+`%xmm6',6,
+`%xmm7',7,
+`%xmm8',8,
+`%xmm9',9,
+`%xmm10',10,
+`%xmm11',11,
+`%xmm12',12,
+`%xmm13',13,
+`%xmm14',14,
+`%xmm15',15')
+
+dnl Usage: palignr($imm,%srcreg,%dstreg)
+dnl
+dnl Emit a palignr instruction, using a .byte sequence, since obsolete but
+dnl still distributed versions of gas don't know SSSE3 instructions.
+
+define(`palignr',
+m4_assert_numargs(3)
+`.byte 0x66,dnl
+ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
+ `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
+0x0f,0x3a,0x0f,dnl
+eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
+substr($1,1)')
+
+
divert`'dnl
More information about the gmp-commit
mailing list