[Gmp-commit] /var/hg/gmp: Move addaddmul_1msb0.asm to k8 subdir.
mercurial at gmplib.org
mercurial at gmplib.org
Sun Oct 10 01:45:24 UTC 2021
details: /var/hg/gmp/rev/9d7487ca7ec4
changeset: 18257:9d7487ca7ec4
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Oct 10 03:35:48 2021 +0200
description:
Move addaddmul_1msb0.asm to k8 subdir.
diffstat:
mpn/x86_64/addaddmul_1msb0.asm | 153 --------------------------------------
mpn/x86_64/k8/addaddmul_1msb0.asm | 153 ++++++++++++++++++++++++++++++++++++++
2 files changed, 153 insertions(+), 153 deletions(-)
diffs (truncated from 314 to 300 lines):
diff -r 3dee30523768 -r 9d7487ca7ec4 mpn/x86_64/addaddmul_1msb0.asm
--- a/mpn/x86_64/addaddmul_1msb0.asm Fri Oct 08 16:05:05 2021 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,153 +0,0 @@
-dnl AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
-
-dnl Copyright 2008, 2021 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C AMD K8,K9 2.167
-C AMD K10 2.167
-C Intel P4 12.0
-C Intel core2 4.0
-C Intel corei ?
-C Intel atom ?
-C VIA nano ?
-
-C TODO
-C * Perhaps handle various n mod 3 sizes better. The code now is too large.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`ap', `%rsi')
-define(`bp_param', `%rdx')
-define(`n', `%rcx')
-define(`u0', `%r8')
-define(`v0', `%r9')
-
-
-define(`bp', `%rbp')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_addaddmul_1msb0)
- FUNC_ENTRY(4)
-IFDOS(` mov 56(%rsp), %r8 ')
-IFDOS(` mov 64(%rsp), %r9 ')
- push %rbp
-
- lea (ap,n,8), ap
- lea (bp_param,n,8), bp
- lea (rp,n,8), rp
- neg n
-
- mov (ap,n,8), %rax
- mul %r8
- mov %rax, %r11
- mov (bp,n,8), %rax
- mov %rdx, %r10
- add $3, n
- jns L(end)
-
- push %r13
-
- ALIGN(16)
-L(top): mul %r9
- add %rax, %r11
- mov -16(ap,n,8), %rax
- adc %rdx, %r10
- mov %r11, -24(rp,n,8)
- mul %r8
- add %rax, %r10
- mov -16(bp,n,8), %rax
- mov $0, R32(%r13)
- adc %rdx, %r13
- mul %r9
- add %rax, %r10
- mov -8(ap,n,8), %rax
- adc %rdx, %r13
- mov %r10, -16(rp,n,8)
- mul %r8
- add %rax, %r13
- mov -8(bp,n,8), %rax
- mov $0, R32(%r11)
- adc %rdx, %r11
- mul %r9
- add %rax, %r13
- adc %rdx, %r11
- mov (ap,n,8), %rax
- mul %r8
- add %rax, %r11
- mov %r13, -8(rp,n,8)
- mov (bp,n,8), %rax
- mov $0, R32(%r10)
- adc %rdx, %r10
- add $3, n
- js L(top)
-
- pop %r13
-
-L(end): mul %r9
- add %rax, %r11
- adc %rdx, %r10
- cmp $1, R32(n)
- ja L(two)
- mov -16(ap,n,8), %rax
- mov %r11, -24(rp,n,8)
- mov %r10, %r11
- jz L(one)
-
-L(nul): mul %r8
- add %rax, %r10
- mov -16(bp), %rax
- mov $0, R32(%r11)
- adc %rdx, %r11
- mul %r9
- add %rax, %r10
- mov -8(ap), %rax
- adc %rdx, %r11
- mov %r10, -16(rp)
-L(one): mul %r8
- add %rax, %r11
- mov -8(bp), %rax
- mov $0, R32(%r10)
- adc %rdx, %r10
- mul %r9
- add %rax, %r11
- adc %rdx, %r10
-
-L(two): mov %r11, -8(rp)
- mov %r10, %rax
-L(ret): pop %rbp
- FUNC_EXIT()
- ret
-EPILOGUE()
diff -r 3dee30523768 -r 9d7487ca7ec4 mpn/x86_64/k8/addaddmul_1msb0.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/k8/addaddmul_1msb0.asm Sun Oct 10 03:35:48 2021 +0200
@@ -0,0 +1,153 @@
+dnl AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
+
+dnl Copyright 2008, 2021 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 2.167
+C AMD K10 2.167
+C Intel P4 12.0
+C Intel core2 4.0
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
+
+C TODO
+C * Perhaps handle various n mod 3 sizes better. The code now is too large.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`ap', `%rsi')
+define(`bp_param', `%rdx')
+define(`n', `%rcx')
+define(`u0', `%r8')
+define(`v0', `%r9')
+
+
+define(`bp', `%rbp')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_addaddmul_1msb0)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+IFDOS(` mov 64(%rsp), %r9 ')
+ push %rbp
+
+ lea (ap,n,8), ap
+ lea (bp_param,n,8), bp
+ lea (rp,n,8), rp
+ neg n
+
+ mov (ap,n,8), %rax
+ mul %r8
+ mov %rax, %r11
+ mov (bp,n,8), %rax
+ mov %rdx, %r10
+ add $3, n
+ jns L(end)
+
+ push %r13
+
+ ALIGN(16)
+L(top): mul %r9
+ add %rax, %r11
+ mov -16(ap,n,8), %rax
+ adc %rdx, %r10
+ mov %r11, -24(rp,n,8)
+ mul %r8
+ add %rax, %r10
+ mov -16(bp,n,8), %rax
+ mov $0, R32(%r13)
+ adc %rdx, %r13
+ mul %r9
+ add %rax, %r10
+ mov -8(ap,n,8), %rax
+ adc %rdx, %r13
+ mov %r10, -16(rp,n,8)
+ mul %r8
+ add %rax, %r13
+ mov -8(bp,n,8), %rax
+ mov $0, R32(%r11)
+ adc %rdx, %r11
+ mul %r9
+ add %rax, %r13
+ adc %rdx, %r11
+ mov (ap,n,8), %rax
+ mul %r8
+ add %rax, %r11
+ mov %r13, -8(rp,n,8)
+ mov (bp,n,8), %rax
+ mov $0, R32(%r10)
+ adc %rdx, %r10
+ add $3, n
+ js L(top)
+
+ pop %r13
+
+L(end): mul %r9
+ add %rax, %r11
+ adc %rdx, %r10
+ cmp $1, R32(n)
+ ja L(two)
+ mov -16(ap,n,8), %rax
+ mov %r11, -24(rp,n,8)
+ mov %r10, %r11
+ jz L(one)
+
+L(nul): mul %r8
+ add %rax, %r10
+ mov -16(bp), %rax
+ mov $0, R32(%r11)
+ adc %rdx, %r11
+ mul %r9
+ add %rax, %r10
+ mov -8(ap), %rax
+ adc %rdx, %r11
+ mov %r10, -16(rp)
+L(one): mul %r8
More information about the gmp-commit
mailing list