[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Mar 12 22:25:59 CET 2012


details:   /var/hg/gmp/rev/31cba1e10917
changeset: 14750:31cba1e10917
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Mar 12 22:20:50 2012 +0100
description:
Whitespace cleanup.

details:   /var/hg/gmp/rev/bd083a3fc5a0
changeset: 14751:bd083a3fc5a0
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Mar 12 22:25:25 2012 +0100
description:
Add new gcd_1 files for x86-64.

details:   /var/hg/gmp/rev/f49ec37e3c41
changeset: 14752:f49ec37e3c41
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Mar 12 22:25:54 2012 +0100
description:
Cleanup.

diffstat:

 ChangeLog                  |    4 +
 mpn/x86_64/addmul_2.asm    |    1 -
 mpn/x86_64/bd1/gcd_1.asm   |   20 ++++++++
 mpn/x86_64/core2/gcd_1.asm |  112 +++++++++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/gcd_1.asm       |   31 ++++++------
 mpn/x86_64/k10/gcd_1.asm   |   20 ++++++++
 6 files changed, 171 insertions(+), 17 deletions(-)

diffs (249 lines):

diff -r e8acb1f4ae01 -r f49ec37e3c41 ChangeLog
--- a/ChangeLog	Mon Mar 12 14:02:42 2012 +0100
+++ b/ChangeLog	Mon Mar 12 22:25:54 2012 +0100
@@ -1,5 +1,9 @@
 2012-03-12  Torbjorn Granlund  <tege at gmplib.org>
 
+	* mpn/x86_64/core2/gcd_1.asm: New file.
+	* mpn/x86_64/k10/gcd_1.asm: New file, grabbing core2 asm file.
+	* mpn/x86_64/bd1/gcd_1.asm: Likewise.
+
 	* mpn/x86_64/bobcat/sqr_basecase.asm: New file.
 	* mpn/x86_64/bobcat/mul_basecase.asm: Minor tuning.
 
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/addmul_2.asm
--- a/mpn/x86_64/addmul_2.asm	Mon Mar 12 14:02:42 2012 +0100
+++ b/mpn/x86_64/addmul_2.asm	Mon Mar 12 22:25:54 2012 +0100
@@ -171,4 +171,3 @@
 	DOS64_EXIT()
 	ret
 EPILOGUE()
-
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/bd1/gcd_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/bd1/gcd_1.asm	Mon Mar 12 22:25:54 2012 +0100
@@ -0,0 +1,20 @@
+dnl  AMD64 mpn_gcd_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`x86_64/core2/gcd_1.asm')
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/core2/gcd_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/core2/gcd_1.asm	Mon Mar 12 22:25:54 2012 +0100
@@ -0,0 +1,112 @@
+dnl  AMD64 mpn_gcd_1 optimised for Intel C2, NHM, SBR and AMD K10, BD.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/bit (approx)
+C AMD K8,K9	 8.5
+C AMD K10	 5
+C AMD bd1	 5
+C AMD bobcat	11
+C Intel P4	24 
+C Intel core2	 5.5
+C Intel NHM	 6
+C Intel SBR	 6
+C Intel atom	17
+C VIA nano	 6.5
+
+C Numbers measured with: speed -CD -s1-64 mpn_gcd_1
+
+
+C INPUT PARAMETERS
+define(`up',    `%rdi')
+define(`n',     `%rsi')
+define(`v0',    `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+	DOS64_ENTRY(3)
+	mov	(%rdi), %r8		C src low limb
+	mov	%r8, %r10
+	or	%rdx, %r8		C x | y
+
+	bsf	%r8, %rcx
+
+	shr	R8(%rcx), %r10
+	shr	R8(%rcx), %rdx
+	mov	R32(%rcx), R32(%r8)	C common twos
+
+	bsf	%rdx, %rcx
+	shr	R8(%rcx), %rdx
+
+	push	%r8
+	push	%rdx
+	sub	$8, %rsp		C maintain ABI required rsp alignment
+
+IFDOS(`	mov	%rdx, %r8	')
+IFDOS(`	mov	%rsi, %rdx	')
+IFDOS(`	mov	%rdi, %rcx	')
+	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, %rsi
+	jl	L(bmod)
+	CALL(	mpn_mod_1)
+	jmp	L(reduced)
+L(bmod):
+	CALL(	mpn_modexact_1_odd)
+L(reduced):
+
+	add	$8, %rsp
+	pop	%rdx
+	pop	%r8
+
+	test	%rax, %rax
+
+	mov	%rax, %rcx
+	jnz	L(mid)
+
+	mov	%rdx, %rax
+	jmp	L(done)
+
+	ALIGN(16)		C			K10	C2	NHM	SBR
+L(top):	cmovc	%r10, %rax	C if x-y carried	0,7	0,6	0,7	0
+	cmovc	%rcx, %rdx	C use x,y-x		0	1	1	1
+L(mid):	bsf	%rax, %rcx	C			1	2	2	2
+	mov	%rdx, %r10	C			1	3	3	3
+	shr	R8(%rcx), %rax	C			5	4	5	5
+	mov	%rax, %rcx	C			6	5	6	7
+	sub	%rax, %r10	C			6	5	6	7
+	sub	%rdx, %rax	C			6	5	6	7
+	jnz	L(top)		C
+
+	mov	%rcx, %rax
+L(done):
+	mov	%r8, %rcx
+	shl	R8(%rcx), %rax
+	DOS64_EXIT()
+	ret
+EPILOGUE()
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/gcd_1.asm
--- a/mpn/x86_64/gcd_1.asm	Mon Mar 12 14:02:42 2012 +0100
+++ b/mpn/x86_64/gcd_1.asm	Mon Mar 12 22:25:54 2012 +0100
@@ -3,8 +3,8 @@
 dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
 dnl  Granlund.
 
-dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011 Free Software Foundation,
-dnl  Inc.
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -24,22 +24,23 @@
 include(`../config.m4')
 
 
-C K8: 6.75 cycles/bit (approx)  1x1 gcd
-C     10.0 cycles/limb          Nx1 reduction (modexact_1_odd)
+C	     cycles/bit (approx)
+C AMD K8,K9	 6.75
+C AMD K10	 6.75
+C AMD bd1	 7.75
+C AMD bobcat	 7.5
+C Intel P4	18
+C Intel core2	 9
+C Intel NHM	 9
+C Intel SBR	10
+C Intel atom	10.5
+C VIA nano	 8.5
 
-
-dnl  Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
-dnl  where x is the larger of the two.  See tune/README for more.
-dnl
-dnl  div at 80 cycles compared to the gcd at about 7 cycles/bitpair
-dnl  suggests 80/7*2=23
-
-deflit(DIV_THRESHOLD, 23)
+C Numbers measured with: speed -CD -s1-64 mpn_gcd_1
 
 
 C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
 
-
 deflit(MAXSHIFT, 6)
 deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
 
@@ -50,13 +51,11 @@
 ')
 END_OBJECT(ctz_table)
 
-C mp_limb_t mpn_gcd_1 (mp_srcptr up, mp_size_t n, mp_limb_t vlimb);
-
 
 C INPUT PARAMETERS
 define(`up',    `%rdi')
 define(`n',     `%rsi')
-define(`vlimb', `%rdx')
+define(`v0',    `%rdx')
 
 ABI_SUPPORT(DOS64)
 ABI_SUPPORT(STD64)
diff -r e8acb1f4ae01 -r f49ec37e3c41 mpn/x86_64/k10/gcd_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/k10/gcd_1.asm	Mon Mar 12 22:25:54 2012 +0100
@@ -0,0 +1,20 @@
+dnl  AMD64 mpn_gcd_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`x86_64/core2/gcd_1.asm')


More information about the gmp-commit mailing list