[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Mar 12 22:25:59 CET 2012
details: /var/hg/gmp/rev/31cba1e10917
changeset: 14750:31cba1e10917
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Mar 12 22:20:50 2012 +0100
description:
Whitespace cleanup.
details: /var/hg/gmp/rev/bd083a3fc5a0
changeset: 14751:bd083a3fc5a0
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Mar 12 22:25:25 2012 +0100
description:
Add new gcd_1 files for x86-64.
details: /var/hg/gmp/rev/f49ec37e3c41
changeset: 14752:f49ec37e3c41
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Mar 12 22:25:54 2012 +0100
description:
Cleanup.
diffstat:
ChangeLog | 4 +
mpn/x86_64/addmul_2.asm | 1 -
mpn/x86_64/bd1/gcd_1.asm | 20 ++++++++
mpn/x86_64/core2/gcd_1.asm | 112 +++++++++++++++++++++++++++++++++++++++++++++
mpn/x86_64/gcd_1.asm | 31 ++++++------
mpn/x86_64/k10/gcd_1.asm | 20 ++++++++
6 files changed, 171 insertions(+), 17 deletions(-)
diffs (249 lines):
diff -r e8acb1f4ae01 -r f49ec37e3c41 ChangeLog
--- a/ChangeLog Mon Mar 12 14:02:42 2012 +0100
+++ b/ChangeLog Mon Mar 12 22:25:54 2012 +0100
@@ -1,5 +1,9 @@
2012-03-12 Torbjorn Granlund <tege at gmplib.org>
+ * mpn/x86_64/core2/gcd_1.asm: New file.
+ * mpn/x86_64/k10/gcd_1.asm: New file, grabbing core2 asm file.
+ * mpn/x86_64/bd1/gcd_1.asm: Likewise.
+
* mpn/x86_64/bobcat/sqr_basecase.asm: New file.
* mpn/x86_64/bobcat/mul_basecase.asm: Minor tuning.
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/addmul_2.asm
--- a/mpn/x86_64/addmul_2.asm Mon Mar 12 14:02:42 2012 +0100
+++ b/mpn/x86_64/addmul_2.asm Mon Mar 12 22:25:54 2012 +0100
@@ -171,4 +171,3 @@
DOS64_EXIT()
ret
EPILOGUE()
-
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/bd1/gcd_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bd1/gcd_1.asm Mon Mar 12 22:25:54 2012 +0100
@@ -0,0 +1,20 @@
+dnl AMD64 mpn_gcd_1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`x86_64/core2/gcd_1.asm')
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/core2/gcd_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/core2/gcd_1.asm Mon Mar 12 22:25:54 2012 +0100
@@ -0,0 +1,112 @@
+dnl AMD64 mpn_gcd_1 optimised for Intel C2, NHM, SBR and AMD K10, BD.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C AMD K8,K9 8.5
+C AMD K10 5
+C AMD bd1 5
+C AMD bobcat 11
+C Intel P4 24
+C Intel core2 5.5
+C Intel NHM 6
+C Intel SBR 6
+C Intel atom 17
+C VIA nano 6.5
+
+C Numbers measured with: speed -CD -s1-64 mpn_gcd_1
+
+
+C INPUT PARAMETERS
+define(`up', `%rdi')
+define(`n', `%rsi')
+define(`v0', `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+ DOS64_ENTRY(3)
+ mov (%rdi), %r8 C src low limb
+ mov %r8, %r10
+ or %rdx, %r8 C x | y
+
+ bsf %r8, %rcx
+
+ shr R8(%rcx), %r10
+ shr R8(%rcx), %rdx
+ mov R32(%rcx), R32(%r8) C common twos
+
+ bsf %rdx, %rcx
+ shr R8(%rcx), %rdx
+
+ push %r8
+ push %rdx
+ sub $8, %rsp C maintain ABI required rsp alignment
+
+IFDOS(` mov %rdx, %r8 ')
+IFDOS(` mov %rsi, %rdx ')
+IFDOS(` mov %rdi, %rcx ')
+ cmp $BMOD_1_TO_MOD_1_THRESHOLD, %rsi
+ jl L(bmod)
+ CALL( mpn_mod_1)
+ jmp L(reduced)
+L(bmod):
+ CALL( mpn_modexact_1_odd)
+L(reduced):
+
+ add $8, %rsp
+ pop %rdx
+ pop %r8
+
+ test %rax, %rax
+
+ mov %rax, %rcx
+ jnz L(mid)
+
+ mov %rdx, %rax
+ jmp L(done)
+
+ ALIGN(16) C K10 C2 NHM SBR
+L(top): cmovc %r10, %rax C if x-y carried 0,7 0,6 0,7 0
+ cmovc %rcx, %rdx C use x,y-x 0 1 1 1
+L(mid): bsf %rax, %rcx C 1 2 2 2
+ mov %rdx, %r10 C 1 3 3 3
+ shr R8(%rcx), %rax C 5 4 5 5
+ mov %rax, %rcx C 6 5 6 7
+ sub %rax, %r10 C 6 5 6 7
+ sub %rdx, %rax C 6 5 6 7
+ jnz L(top) C
+
+ mov %rcx, %rax
+L(done):
+ mov %r8, %rcx
+ shl R8(%rcx), %rax
+ DOS64_EXIT()
+ ret
+EPILOGUE()
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/gcd_1.asm
--- a/mpn/x86_64/gcd_1.asm Mon Mar 12 14:02:42 2012 +0100
+++ b/mpn/x86_64/gcd_1.asm Mon Mar 12 22:25:54 2012 +0100
@@ -3,8 +3,8 @@
dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn
dnl Granlund.
-dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011 Free Software Foundation,
-dnl Inc.
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -24,22 +24,23 @@
include(`../config.m4')
-C K8: 6.75 cycles/bit (approx) 1x1 gcd
-C 10.0 cycles/limb Nx1 reduction (modexact_1_odd)
+C cycles/bit (approx)
+C AMD K8,K9 6.75
+C AMD K10 6.75
+C AMD bd1 7.75
+C AMD bobcat 7.5
+C Intel P4 18
+C Intel core2 9
+C Intel NHM 9
+C Intel SBR 10
+C Intel atom 10.5
+C VIA nano 8.5
-
-dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
-dnl where x is the larger of the two. See tune/README for more.
-dnl
-dnl div at 80 cycles compared to the gcd at about 7 cycles/bitpair
-dnl suggests 80/7*2=23
-
-deflit(DIV_THRESHOLD, 23)
+C Numbers measured with: speed -CD -s1-64 mpn_gcd_1
C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-
deflit(MAXSHIFT, 6)
deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
@@ -50,13 +51,11 @@
')
END_OBJECT(ctz_table)
-C mp_limb_t mpn_gcd_1 (mp_srcptr up, mp_size_t n, mp_limb_t vlimb);
-
C INPUT PARAMETERS
define(`up', `%rdi')
define(`n', `%rsi')
-define(`vlimb', `%rdx')
+define(`v0', `%rdx')
ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/k10/gcd_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/k10/gcd_1.asm Mon Mar 12 22:25:54 2012 +0100
@@ -0,0 +1,20 @@
+dnl AMD64 mpn_gcd_1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`x86_64/core2/gcd_1.asm')
More information about the gmp-commit
mailing list