[Gmp-commit] /var/hg/gmp: New code.
mercurial at gmplib.org
mercurial at gmplib.org
Sun Feb 27 15:42:33 CET 2011
details: /var/hg/gmp/rev/83477359bd29
changeset: 13918:83477359bd29
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Sun Feb 27 15:42:28 2011 +0100
description:
New code.
diffstat:
ChangeLog | 4 +
mpn/x86/atom/sse2/mul_1.asm | 95 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 98 insertions(+), 1 deletions(-)
diffs (120 lines):
diff -r 7c2810e5f9d0 -r 83477359bd29 ChangeLog
--- a/ChangeLog Sun Feb 27 11:58:15 2011 +0100
+++ b/ChangeLog Sun Feb 27 15:42:28 2011 +0100
@@ -1,3 +1,7 @@
+2011-02-27 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86/atom/sse2/mul_1.asm: New code.
+
2011-02-27 Niels Möller <nisse at lysator.liu.se>
* gmp-impl.h (udiv_rnnd_preinv): New macro.
diff -r 7c2810e5f9d0 -r 83477359bd29 mpn/x86/atom/sse2/mul_1.asm
--- a/mpn/x86/atom/sse2/mul_1.asm Sun Feb 27 11:58:15 2011 +0100
+++ b/mpn/x86/atom/sse2/mul_1.asm Sun Feb 27 15:42:28 2011 +0100
@@ -1,5 +1,7 @@
dnl Intel Atom mpn_mul_1.
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
dnl Copyright 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -19,5 +21,96 @@
include(`../config.m4')
+C cycles/limb
+C cycles/limb
+C P5 -
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 7.5
+C AMD K6
+C AMD K7 -
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+define(`rp', `%edx')
+define(`up', `%esi')
+define(`n', `%ecx')
+
MULFUNC_PROLOGUE(mpn_mul_1 mpn_mul_1c)
-include_mpn(`x86/pentium4/sse2/mul_1.asm')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_mul_1c)
+ movd PARAM_CARRY, %mm6 C carry
+ jmp L(ent)
+EPILOGUE()
+
+ ALIGN(8) C for a compact code
+PROLOGUE(mpn_mul_1)
+ pxor %mm6, %mm6
+L(ent): push %esi FRAME_pushl()
+ mov PARAM_SRC, up
+ mov PARAM_SIZE, %eax C size
+ movd PARAM_MUL, %mm7
+ movd (up), %mm0
+ mov %eax, n
+ and $3, %eax
+ pmuludq %mm7, %mm0
+ mov PARAM_DST, rp
+ jz L(lo0)
+ cmp $2, %eax
+ lea -16(up,%eax,4),up
+ lea -16(rp,%eax,4),rp
+ jc L(lo1)
+ jz L(lo2)
+ jmp L(lo3)
+
+ ALIGN(16)
+L(top): movd (up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+L(lo0): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+L(lo3): paddq %mm0, %mm6
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 4(rp)
+ psrlq $32, %mm6
+L(lo2): paddq %mm0, %mm6
+ movd 12(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 8(rp)
+ psrlq $32, %mm6
+L(lo1): paddq %mm0, %mm6
+ sub $4, n
+ movd %mm6, 12(rp)
+ lea 16(up), up
+ ja L(top)
+
+ psrlq $32, %mm6
+ movd %mm6, %eax
+ emms
+ pop %esi FRAME_popl()
+ ret
+EPILOGUE()
+
+ASM_END()
More information about the gmp-commit
mailing list