[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Thu Mar 10 11:16:42 CET 2011
details: /var/hg/gmp/rev/184a69bd4ff2
changeset: 14023:184a69bd4ff2
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Mar 10 11:12:03 2011 +0100
description:
Move bdiv_dbm1c from atom to pentium4.
details: /var/hg/gmp/rev/c55f4c6e9cb1
changeset: 14024:c55f4c6e9cb1
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Thu Mar 10 11:16:38 2011 +0100
description:
Small corrections on some comments.
diffstat:
ChangeLog | 5 +
mpn/x86/atom/sse2/bdiv_dbm1c.asm | 108 +----------------------------
mpn/x86/pentium4/sse2/bdiv_dbm1c.asm | 130 +++++++++++++++++++++++++++++++++++
mpn/x86_64/aorrlsh_n.asm | 4 +-
mpn/x86_64/coreinhm/aorrlsh_n.asm | 4 +-
mpn/x86_64/coreisbr/aorrlsh_n.asm | 4 +-
6 files changed, 143 insertions(+), 112 deletions(-)
diffs (truncated from 304 to 300 lines):
diff -r a9f43e21085c -r c55f4c6e9cb1 ChangeLog
--- a/ChangeLog Wed Mar 09 23:42:45 2011 +0100
+++ b/ChangeLog Thu Mar 10 11:16:38 2011 +0100
@@ -1,3 +1,8 @@
+2011-03-10 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86/pentium4/sse2/bdiv_dbm1c.asm: New file, was in atom.
+ * mpn/x86/atom/sse2/bdiv_dbm1c.asm: Grab file above.
+
2011-03-09 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86_64/aorrlsh_n.asm: Complete rewrite.
diff -r a9f43e21085c -r c55f4c6e9cb1 mpn/x86/atom/sse2/bdiv_dbm1c.asm
--- a/mpn/x86/atom/sse2/bdiv_dbm1c.asm Wed Mar 09 23:42:45 2011 +0100
+++ b/mpn/x86/atom/sse2/bdiv_dbm1c.asm Thu Mar 10 11:16:38 2011 +0100
@@ -1,7 +1,5 @@
dnl Intel Atom mpn_bdiv_dbm1.
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-dnl
dnl Copyright 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
@@ -21,107 +19,5 @@
include(`../config.m4')
-C cycles/limb
-C cycles/limb
-C P5 -
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) 9.75
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 8
-C AMD K6 -
-C AMD K7 -
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_MUL, 16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(SAVE_RP,`PARAM_MUL')
-define(SAVE_UP,`PARAM_SIZE')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`n', `%ecx')
-define(`reg', `%edx')
-define(`cy', `%eax') C contains the return value
-
-ASM_START()
- TEXT
- ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(mpn_bdiv_dbm1c)
- mov PARAM_SIZE, n C size
- mov up, SAVE_UP
- mov PARAM_SRC, up
- movd PARAM_MUL, %mm7
- mov rp, SAVE_RP
- mov PARAM_DST, rp
-
- movd (up), %mm0
- pmuludq %mm7, %mm0
- shr n
- mov PARAM_CARRY, cy
- jz L(eq1)
-
- movd 4(up), %mm1
- jc L(odd)
-
- lea 4(up), up
- pmuludq %mm7, %mm1
- movd %mm0, reg
- psrlq $32, %mm0
- sub reg, cy
- movd %mm0, reg
- movq %mm1, %mm0
- dec n
- mov cy, (rp)
- lea 4(rp), rp
- jz L(end)
-
-C ALIGN(16)
-L(top): movd 4(up), %mm1
- sbb reg, cy
-L(odd): movd %mm0, reg
- psrlq $32, %mm0
- pmuludq %mm7, %mm1
- sub reg, cy
- lea 8(up), up
- movd %mm0, reg
- movd (up), %mm0
- mov cy, (rp)
- sbb reg, cy
- movd %mm1, reg
- psrlq $32, %mm1
- sub reg, cy
- movd %mm1, reg
- pmuludq %mm7, %mm0
- dec n
- mov cy, 4(rp)
- lea 8(rp), rp
- jnz L(top)
-
-L(end): sbb reg, cy
-
-L(eq1): movd %mm0, reg
- psrlq $32, %mm0
- mov SAVE_UP, up
- sub reg, cy
- movd %mm0, reg
- emms
- mov cy, (rp)
- sbb reg, cy
-
- mov SAVE_RP, rp
- ret
-EPILOGUE()
-ASM_END()
+MULFUNC_PROLOGUE(mpn_bdiv_dbm1c)
+include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm')
diff -r a9f43e21085c -r c55f4c6e9cb1 mpn/x86/pentium4/sse2/bdiv_dbm1c.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm Thu Mar 10 11:16:38 2011 +0100
@@ -0,0 +1,130 @@
+dnl Intel Atom mpn_bdiv_dbm1.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C cycles/limb
+C P5 -
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) 9.75
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood) 8.25
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 8
+C AMD K6 -
+C AMD K7 -
+C AMD K8
+C AMD K10
+
+C TODO: This code was optimised for atom-32, consider moving it back to atom
+C dir(atom currently grabs this code), and write a 4-way version(7c/l).
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_RP,`PARAM_MUL')
+define(SAVE_UP,`PARAM_SIZE')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n', `%ecx')
+define(`reg', `%edx')
+define(`cy', `%eax') C contains the return value
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_bdiv_dbm1c)
+ mov PARAM_SIZE, n C size
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ movd PARAM_MUL, %mm7
+ mov rp, SAVE_RP
+ mov PARAM_DST, rp
+
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ shr n
+ mov PARAM_CARRY, cy
+ jz L(eq1)
+
+ movd 4(up), %mm1
+ jc L(odd)
+
+ lea 4(up), up
+ pmuludq %mm7, %mm1
+ movd %mm0, reg
+ psrlq $32, %mm0
+ sub reg, cy
+ movd %mm0, reg
+ movq %mm1, %mm0
+ dec n
+ mov cy, (rp)
+ lea 4(rp), rp
+ jz L(end)
+
+C ALIGN(16)
+L(top): movd 4(up), %mm1
+ sbb reg, cy
+L(odd): movd %mm0, reg
+ psrlq $32, %mm0
+ pmuludq %mm7, %mm1
+ sub reg, cy
+ lea 8(up), up
+ movd %mm0, reg
+ movd (up), %mm0
+ mov cy, (rp)
+ sbb reg, cy
+ movd %mm1, reg
+ psrlq $32, %mm1
+ sub reg, cy
+ movd %mm1, reg
+ pmuludq %mm7, %mm0
+ dec n
+ mov cy, 4(rp)
+ lea 8(rp), rp
+ jnz L(top)
+
+L(end): sbb reg, cy
+
+L(eq1): movd %mm0, reg
+ psrlq $32, %mm0
+ mov SAVE_UP, up
+ sub reg, cy
+ movd %mm0, reg
+ emms
+ mov cy, (rp)
+ sbb reg, cy
+
+ mov SAVE_RP, rp
+ ret
+EPILOGUE()
+ASM_END()
diff -r a9f43e21085c -r c55f4c6e9cb1 mpn/x86_64/aorrlsh_n.asm
--- a/mpn/x86_64/aorrlsh_n.asm Wed Mar 09 23:42:45 2011 +0100
+++ b/mpn/x86_64/aorrlsh_n.asm Thu Mar 10 11:16:38 2011 +0100
@@ -24,9 +24,9 @@
C AMD K10 2.75 < 3.85 for lshift + add_n
C Intel P4 22 > 7.33 for lshift + add_n
C Intel core2 4.1 > 3.27 for lshift + add_n
-C Intel NHM 4.4 < 3.75 for lshift + add_n
+C Intel NHM 4.4 > 3.75 for lshift + add_n
C Intel SBR 3.17 < 3.46 for lshift + add_n
-C Intel atom ? < 8.75 for lshift + add_n
+C Intel atom ? ? 8.75 for lshift + add_n
C VIA nano 4.7 < 6.25 for lshift + add_n
C TODO
diff -r a9f43e21085c -r c55f4c6e9cb1 mpn/x86_64/coreinhm/aorrlsh_n.asm
--- a/mpn/x86_64/coreinhm/aorrlsh_n.asm Wed Mar 09 23:42:45 2011 +0100
+++ b/mpn/x86_64/coreinhm/aorrlsh_n.asm Thu Mar 10 11:16:38 2011 +0100
@@ -1,5 +1,5 @@
-dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+dnl AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
+dnl AMD64 mpn_rsblsh_n -- rp[] = (vp[] << k) - up[]
dnl Optimised for Nehalem.
dnl Contributed to the GNU project by Torbjorn Granlund.
diff -r a9f43e21085c -r c55f4c6e9cb1 mpn/x86_64/coreisbr/aorrlsh_n.asm
--- a/mpn/x86_64/coreisbr/aorrlsh_n.asm Wed Mar 09 23:42:45 2011 +0100
+++ b/mpn/x86_64/coreisbr/aorrlsh_n.asm Thu Mar 10 11:16:38 2011 +0100
@@ -1,5 +1,5 @@
-dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+dnl AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
More information about the gmp-commit
mailing list