[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Mar 26 00:09:12 CET 2013
details: /var/hg/gmp/rev/41c3c1ab4899
changeset: 15644:41c3c1ab4899
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 26 00:08:42 2013 +0100
description:
Add basic x86 cnd_add_n and cnd_sub_n files.
details: /var/hg/gmp/rev/e909c2822f18
changeset: 15645:e909c2822f18
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Mar 26 00:09:06 2013 +0100
description:
ChangeLog
diffstat:
ChangeLog | 6 +
mpn/x86/cnd_aors_n.asm | 116 ++++++++++++++++++++++++++++++++++++
mpn/x86/pentium4/sse2/cnd_add_n.asm | 84 ++++++++++++++++++++++++++
mpn/x86/pentium4/sse2/cnd_sub_n.asm | 103 +++++++++++++++++++++++++++++++
4 files changed, 309 insertions(+), 0 deletions(-)
diffs (truncated from 328 to 300 lines):
diff -r 82870a93a714 -r e909c2822f18 ChangeLog
--- a/ChangeLog Mon Mar 25 19:51:21 2013 +0100
+++ b/ChangeLog Tue Mar 26 00:09:06 2013 +0100
@@ -1,3 +1,9 @@
+2013-03-26 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86/pentium4/sse2/cnd_add_n.asm: New file.
+ * mpn/x86/pentium4/sse2/cnd_sub_n.asm: New file.
+ * mpn/x86/cnd_aors_n.asm: New file.
+
2013-03-25 David S. Miller <davem at davemloft.net>
* mpn/sparc64/ultrasparct3/hamdist.asm: New file.
diff -r 82870a93a714 -r e909c2822f18 mpn/x86/cnd_aors_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/cnd_aors_n.asm Tue Mar 26 00:09:06 2013 +0100
@@ -0,0 +1,116 @@
+dnl X86 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C P5 ?
+C P6 model 0-8,10-12 ?
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 5.4
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 14.5
+C P4 model 3-4 (Prescott) 21
+C Intel atom 11
+C AMD K6 ?
+C AMD K7 3.4
+C AMD K8 ?
+
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`vp', `%ebp')
+define(`n', `%ecx')
+define(`cnd', `20(%esp)')
+define(`cy', `%edx')
+
+ifdef(`OPERATION_cnd_add_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ add $-16, %esp
+ mov %ebp, (%esp)
+ mov %ebx, 4(%esp)
+ mov %esi, 8(%esp)
+ mov %edi, 12(%esp)
+
+ C make cnd into a full mask
+ mov cnd, %eax
+ neg %eax
+ sbb %eax, %eax
+ mov %eax, cnd
+
+ C load paramaters into registers
+ mov 24(%esp), rp
+ mov 28(%esp), up
+ mov 32(%esp), vp
+ mov 36(%esp), n
+
+ mov (vp), %eax
+ mov (up), %ebx
+
+ C put operand pointers just beyond their last limb
+ lea (vp,n,4), vp
+ lea (up,n,4), up
+ lea -4(rp,n,4), rp
+ neg n
+
+ and cnd, %eax
+ ADDSUB %eax, %ebx
+ sbb cy, cy
+ inc n
+ je L(end)
+
+ ALIGN(16)
+L(top): mov (vp,n,4), %eax
+ and cnd, %eax
+ mov %ebx, (rp,n,4)
+ mov (up,n,4), %ebx
+ add cy, cy
+ ADCSBB %eax, %ebx
+ sbb cy, cy
+ inc n
+ jne L(top)
+
+L(end): mov %ebx, (rp)
+ xor %eax, %eax
+ sub cy, %eax
+
+ mov (%esp), %ebp
+ mov 4(%esp), %ebx
+ mov 8(%esp), %esi
+ mov 12(%esp), %edi
+ add $16, %esp
+ ret
+EPILOGUE()
+ASM_END()
diff -r 82870a93a714 -r e909c2822f18 mpn/x86/pentium4/sse2/cnd_add_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/pentium4/sse2/cnd_add_n.asm Tue Mar 26 00:09:06 2013 +0100
@@ -0,0 +1,84 @@
+dnl Intel Pentium-4 mpn_cnd_add_n -- mpn addition.
+
+dnl Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 4.67
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 5
+C P4 model 3-4 (Prescott) 5.25
+
+defframe(PARAM_SIZE, 20)
+defframe(PARAM_SRC2, 16)
+defframe(PARAM_SRC1, 12)
+defframe(PARAM_DST, 8)
+defframe(PARAM_CND, 4)
+
+dnl re-use parameter space
+define(SAVE_EBX,`PARAM_SRC1')
+
+define(`cnd', `%mm3')
+
+ TEXT
+ ALIGN(8)
+
+ ALIGN(8)
+PROLOGUE(mpn_cnd_add_n)
+deflit(`FRAME',0)
+ pxor %mm0, %mm0
+
+ mov PARAM_CND, %eax
+ neg %eax
+ sbb %eax, %eax
+ movd %eax, cnd
+
+ mov PARAM_SRC1, %eax
+ mov %ebx, SAVE_EBX
+ mov PARAM_SRC2, %ebx
+ mov PARAM_DST, %edx
+ mov PARAM_SIZE, %ecx
+
+ lea (%eax,%ecx,4), %eax C src1 end
+ lea (%ebx,%ecx,4), %ebx C src2 end
+ lea (%edx,%ecx,4), %edx C dst end
+ neg %ecx C -size
+
+L(top): movd (%ebx,%ecx,4), %mm2
+ movd (%eax,%ecx,4), %mm1
+ pand cnd, %mm2
+ paddq %mm2, %mm1
+
+ paddq %mm1, %mm0
+ movd %mm0, (%edx,%ecx,4)
+
+ psrlq $32, %mm0
+
+ add $1, %ecx
+ jnz L(top)
+
+ movd %mm0, %eax
+ mov SAVE_EBX, %ebx
+ emms
+ ret
+
+EPILOGUE()
diff -r 82870a93a714 -r e909c2822f18 mpn/x86/pentium4/sse2/cnd_sub_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/pentium4/sse2/cnd_sub_n.asm Tue Mar 26 00:09:06 2013 +0100
@@ -0,0 +1,103 @@
+dnl Intel Pentium-4 mpn_cnd_sub_n -- mpn subtraction.
+
+dnl Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 4.67
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 5
+C P4 model 3-4 (Prescott) 5.25
+
+defframe(PARAM_SIZE, 20)
+defframe(PARAM_SRC2, 16)
+defframe(PARAM_SRC1, 12)
+defframe(PARAM_DST, 8)
+defframe(PARAM_CND, 4)
+
+dnl re-use parameter space
+define(SAVE_EBX,`PARAM_SRC1')
+
+define(`cnd', `%mm3')
+
+ TEXT
+ ALIGN(8)
+
+ ALIGN(8)
+PROLOGUE(mpn_cnd_sub_n)
+deflit(`FRAME',0)
+ pxor %mm0, %mm0
+
+ mov PARAM_CND, %eax
+ neg %eax
+ sbb %eax, %eax
+ movd %eax, cnd
+
+ mov PARAM_SRC1, %eax
+ mov %ebx, SAVE_EBX
+ mov PARAM_SRC2, %ebx
+ mov PARAM_DST, %edx
+ mov PARAM_SIZE, %ecx
+
+ lea (%eax,%ecx,4), %eax C src1 end
+ lea (%ebx,%ecx,4), %ebx C src2 end
+ lea (%edx,%ecx,4), %edx C dst end
+ neg %ecx C -size
+
+L(top): movd (%ebx,%ecx,4), %mm2
+ movd (%eax,%ecx,4), %mm1
+ pand cnd, %mm2
+ psubq %mm2, %mm1
+
+ psubq %mm0, %mm1
+ movd %mm1, (%edx,%ecx,4)
+
+ psrlq $63, %mm1
+
More information about the gmp-commit
mailing list