[Gmp-commit] /home/hgfiles/gmp: 5 new changesets

Mon May 10 11:22:15 CEST 2010

details:   /home/hgfiles/gmp/rev/8e1260bce208
changeset: 13618:8e1260bce208
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun May 09 23:03:15 2010 +0200
description:
Clean up comments.

details:   /home/hgfiles/gmp/rev/c767c27a18d6
changeset: 13619:c767c27a18d6
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 10 00:11:45 2010 +0200
description:
Misc cleanup.

details:   /home/hgfiles/gmp/rev/7c6fd72092fb
changeset: 13620:7c6fd72092fb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 10 00:12:14 2010 +0200
description:
Remove redundant include.

details:   /home/hgfiles/gmp/rev/64228ef81450
changeset: 13621:64228ef81450
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 10 00:14:16 2010 +0200
description:
Overhaul of x86_64 {add,sub.rsb}lshC_n.asm code.

details:   /home/hgfiles/gmp/rev/563e3c3cf26a
changeset: 13622:563e3c3cf26a
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon May 10 11:22:02 2010 +0200
description:
Fix typo.

diffstat:

 ChangeLog                          |   18 ++++-
 mpn/ia64/aorslshC_n.asm            |    2 -
 mpn/x86_64/aorrlsh1_n.asm          |   12 +-
 mpn/x86_64/aorrlsh2_n.asm          |  144 +++-------------------------------
 mpn/x86_64/aorrlshC_n.asm          |  143 ++++++++++++++++++++++++++++++++++
 mpn/x86_64/core2/aorrlsh1_n.asm    |   39 +++++++++
 mpn/x86_64/core2/aorrlsh2_n.asm    |   39 +++++++++
 mpn/x86_64/core2/aorslsh1_n.asm    |  154 -------------------------------------
 mpn/x86_64/core2/sublsh1_n.asm     |   33 +++++++
 mpn/x86_64/core2/sublsh2_n.asm     |   33 +++++++
 mpn/x86_64/core2/sublshC_n.asm     |  144 ++++++++++++++++++++++++++++++++++
 mpn/x86_64/pentium4/aorslsh1_n.asm |   10 +-
 mpn/x86_64/pentium4/aorslsh2_n.asm |   10 +-
 13 files changed, 478 insertions(+), 303 deletions(-)

diffs (truncated from 892 to 300 lines):

diff -r d9d303fcd120 -r 563e3c3cf26a ChangeLog

--- a/ChangeLog	Sat May 08 00:50:30 2010 +0200
+++ b/ChangeLog	Mon May 10 11:22:02 2010 +0200
@@ -1,9 +1,25 @@
+2010-05-10  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/aorrlsh2_n.asm: Fix typo.
+
+2010-05-09  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/aorrlshC_n.asm: New file based on aorrlsh2_n.asm.
+	* mpn/x86_64/aorrlsh2_n.asm: Now just include aorrlshC_n.asm.
+	* mpn/x86_64/core2/aorrlsh1_n.asm: New file, include ../aorrlshC_n.asm.
+	* mpn/x86_64/core2/aorrlsh2_n.asm: Likewise.
+
+	* mpn/x86_64/core2/sublshC_n.asm: New file based on aorslsh1_n.asm.
+	* mpn/x86_64/core2/aorslsh1_n.asm: Remove.
+	* mpn/x86_64/core2/sublsh1_n.asm: Just include sublshC_n.asm.
+	* mpn/x86_64/core2/sublsh2_n.asm: Likewise.
+
 2010-05-08  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/x86_64/atom/gmp-mparam.h: Disable mpn_rsh1add_n, mpn_rsh1sub_n.
 
 	* mpn/x86_64/pentium4/aorslshC_n.asm: New file based on aorslsh1_n.asm.
-	* mpn/x86_64/pentium4/aorslsh1_n.asm: Just include aorslshC_n.asm.
+	* mpn/x86_64/pentium4/aorslsh1_n.asm: Now just include aorslshC_n.asm.
 	* mpn/x86_64/pentium4/aorslsh2_n.asm: New file.
 
 2010-05-07  Torbjorn Granlund  <tege at gmplib.org>
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/ia64/aorslshC_n.asm
--- a/mpn/ia64/aorslshC_n.asm	Sat May 08 00:50:30 2010 +0200
+++ b/mpn/ia64/aorslshC_n.asm	Mon May 10 11:22:02 2010 +0200
@@ -19,8 +19,6 @@
 dnl  You should have received a copy of the GNU Lesser General Public License
 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-include(`../config.m4')
-
 C           cycles/limb
 C Itanium:      ?
 C Itanium 2:    1.5
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/x86_64/aorrlsh1_n.asm
--- a/mpn/x86_64/aorrlsh1_n.asm	Sat May 08 00:50:30 2010 +0200
+++ b/mpn/x86_64/aorrlsh1_n.asm	Mon May 10 11:22:02 2010 +0200
@@ -44,13 +44,13 @@
 define(`n', `%rcx')
 
 ifdef(`OPERATION_addlsh1_n', `
-	define(ADDSUB,	      add)
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_addlsh1_n)')
+  define(ADDSUB,	add)
+  define(ADCSBB,	adc)
+  define(func,		mpn_addlsh1_n)')
 ifdef(`OPERATION_rsblsh1_n', `
-	define(ADDSUB,	      sub)
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_rsblsh1_n)')
+  define(ADDSUB,	sub)
+  define(ADCSBB,	sbb)
+  define(func,		mpn_rsblsh1_n)')
 
 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
 
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/x86_64/aorrlsh2_n.asm
--- a/mpn/x86_64/aorrlsh2_n.asm	Sat May 08 00:50:30 2010 +0200
+++ b/mpn/x86_64/aorrlsh2_n.asm	Mon May 10 11:22:02 2010 +0200
@@ -1,8 +1,9 @@
-dnl  AMD64 mpn_addlsh2_n and mpn_rsblsh2_n.  R = 2*V +- U.
-dnl  ("rsb" means reversed subtract, name mandated by mpn_sublsh2_n which
-dnl  subtacts the shifted operand from the unshifted operand.)
+dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
 
-dnl  Copyright 2009 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -21,135 +22,18 @@
 
 include(`../config.m4')
 
-
-C	     cycles/limb
-C AMD K8,K9	 2
-C AMD K10	 2
-C Intel P4	 ?
-C Intel core2 	 3
-C Intel corei	 2.75
-C Intel atom	 ?
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`n',	`%rcx')
+define(LSH, 2)
+define(RSH, 62)
 
 ifdef(`OPERATION_addlsh2_n',`
-  define(ADDSUB,        `add')
-  define(ADCSBB,       `adc')
-  define(func, mpn_addlsh2_n)')
+  define(ADDSUB,	add)
+  define(ADCSBB,	adc)
+  define(func,		mpn_addlsh2_n)')
 ifdef(`OPERATION_rsblsh2_n',`
-  define(ADDSUB,        `sub')
-  define(ADCSBB,       `sbb')
-  define(func, mpn_rsblsh2_n)')
+  define(ADDSUB,	sub)
+  define(ADCSBB,	sbb)
+  define(func,		mpn_rsblsh2_n)')
 
 MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
 
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(vp), %r8
-	lea	(,%r8,4), %r12
-	shr	$62, %r8
-
-	mov	R32(n), R32(%rax)
-	lea	(rp,n,8), rp
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-	neg	n
-	and	$3, R8(%rax)
-	je	L(b00)
-	cmp	$2, R8(%rax)
-	jc	L(b01)
-	je	L(b10)
-
-L(b11):	mov	8(vp,n,8), %r10
-	lea	(%r8,%r10,4), %r14
-	shr	$62, %r10
-	mov	16(vp,n,8), %r11
-	lea	(%r10,%r11,4), %r15
-	shr	$62, %r11
-	ADDSUB	(up,n,8), %r12
-	ADCSBB	8(up,n,8), %r14
-	ADCSBB	16(up,n,8), %r15
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r12, (rp,n,8)
-	mov	%r14, 8(rp,n,8)
-	mov	%r15, 16(rp,n,8)
-	add	$3, n
-	js	L(top)
-	jmp	L(end)
-
-L(b01):	mov	%r8, %r11
-	ADDSUB	(up,n,8), %r12
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r12, (rp,n,8)
-	add	$1, n
-	js	L(top)
-	jmp	L(end)
-
-L(b10):	mov	8(vp,n,8), %r11
-	lea	(%r8,%r11,4), %r15
-	shr	$62, %r11
-	ADDSUB	(up,n,8), %r12
-	ADCSBB	8(up,n,8), %r15
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r12, (rp,n,8)
-	mov	%r15, 8(rp,n,8)
-	add	$2, n
-	js	L(top)
-	jmp	L(end)
-
-L(b00):	mov	8(vp,n,8), %r9
-	mov	16(vp,n,8), %r10
-	jmp	L(e00)
-
-	ALIGN(16)
-L(top):	mov	16(vp,n,8), %r10
-	mov	(vp,n,8), %r8
-	mov	8(vp,n,8), %r9
-	lea	(%r11,%r8,4), %r12
-	shr	$62, %r8
-L(e00):	lea	(%r8,%r9,4), %r13
-	shr	$62, %r9
-	mov	24(vp,n,8), %r11
-	lea	(%r9,%r10,4), %r14
-	shr	$62, %r10
-	lea	(%r10,%r11,4), %r15
-	shr	$62, %r11
-	add	R32(%rax), R32(%rax)		  C restore carry
-	ADCSBB	(up,n,8), %r12
-	ADCSBB	8(up,n,8), %r13
-	ADCSBB	16(up,n,8), %r14
-	ADCSBB	24(up,n,8), %r15
-	mov	%r12, (rp,n,8)
-	mov	%r13, 8(rp,n,8)
-	mov	%r14, 16(rp,n,8)
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r15, 24(rp,n,8)
-	add	$4, n
-	js	L(top)
-L(end):
-
-ifdef(`OPERATION_addlsh2_n',`
-	sub	R32(%r11), R32(%rax)
-	neg	R32(%rax)')
-ifdef(`OPERATION_rsblsh2_n',`
-	add	R32(%r11), R32(%rax)
-	movslq	R32(%rax), %rax')
-
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	ret
-EPILOGUE()
+include_mpn(`x86_64/aorrlshC_n.asm')
diff -r d9d303fcd120 -r 563e3c3cf26a mpn/x86_64/aorrlshC_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/aorrlshC_n.asm	Mon May 10 11:22:02 2010 +0200
@@ -0,0 +1,143 @@
+dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+C	     cycles/limb
+C AMD K8,K9	 2
+C AMD K10	 2
+C Intel P4	 ?
+C Intel core2 	 3
+C Intel corei	 2.75
+C Intel atom	 ?
+C VIA nano	 ?
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`n',	`%rcx')
+
+define(M, eval(1<<LSH))
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	mov	(vp), %r8
+	lea	(,%r8,M), %r12
+	shr	$RSH, %r8
+
+	mov	R32(n), R32(%rax)
+	lea	(rp,n,8), rp
+	lea	(up,n,8), up
+	lea	(vp,n,8), vp
+	neg	n
+	and	$3, R8(%rax)
+	je	L(b00)
+	cmp	$2, R8(%rax)
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	mov	8(vp,n,8), %r10
+	lea	(%r8,%r10,M), %r14
+	shr	$RSH, %r10
+	mov	16(vp,n,8), %r11
+	lea	(%r10,%r11,M), %r15
+	shr	$RSH, %r11
+	ADDSUB	(up,n,8), %r12