[Gmp-commit] /var/hg/gmp: 7 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Jul 15 13:08:15 CEST 2013


details:   /var/hg/gmp/rev/86ae171eb987
changeset: 15865:86ae171eb987
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Jul 13 22:55:49 2013 +0200
description:
Provide Atom/32 cnd_add_n and cnd_sub_n.

details:   /var/hg/gmp/rev/e642c1816a23
changeset: 15866:e642c1816a23
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sat Jul 13 23:36:25 2013 +0200
description:
Remove dead ptr update.

details:   /var/hg/gmp/rev/483551488a65
changeset: 15867:483551488a65
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Jul 14 00:40:10 2013 +0200
description:
Remove explicit nop after CALL.

details:   /var/hg/gmp/rev/a26b2d1f8d47
changeset: 15868:a26b2d1f8d47
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Jul 14 00:41:42 2013 +0200
description:
Minor layout fix.

details:   /var/hg/gmp/rev/fefeaebaacdf
changeset: 15869:fefeaebaacdf
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Jul 14 00:42:38 2013 +0200
description:
Add more CPU types to table.

details:   /var/hg/gmp/rev/25f10d63513b
changeset: 15870:25f10d63513b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Jul 15 13:07:37 2013 +0200
description:
Compute inverse as floor(B^2/(dh+1)), per Niels' suggestion.
Remove inverse rounding-up code.

details:   /var/hg/gmp/rev/78b8732e93ae
changeset: 15871:78b8732e93ae
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Jul 15 13:08:07 2013 +0200
description:
ChangeLog

diffstat:

 ChangeLog                         |   27 +++++++++
 mpn/arm64/aors_n.asm              |    2 +-
 mpn/generic/sb_div_sec.c          |   15 +----
 mpn/generic/sbpi1_div_sec.c       |    3 -
 mpn/powerpc64/mode64/divrem_1.asm |    1 -
 mpn/powerpc64/mode64/divrem_2.asm |    1 -
 mpn/powerpc64/mode64/gcd_1.asm    |    2 +-
 mpn/powerpc64/mode64/mod_1_1.asm  |    1 -
 mpn/powerpc64/mode64/mod_1_4.asm  |    1 -
 mpn/powerpc64/mode64/p7/gcd_1.asm |    2 +-
 mpn/x86/atom/cnd_add_n.asm        |  102 ++++++++++++++++++++++++++++++++++
 mpn/x86/atom/cnd_sub_n.asm        |  113 ++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/aorrlsh1_n.asm         |    5 +-
 13 files changed, 252 insertions(+), 23 deletions(-)

diffs (truncated from 392 to 300 lines):

diff -r 8de68af9fb4b -r 78b8732e93ae ChangeLog
--- a/ChangeLog	Fri Jul 12 12:21:42 2013 +0200
+++ b/ChangeLog	Mon Jul 15 13:08:07 2013 +0200
@@ -1,3 +1,30 @@
+2013-07-15  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/sb_div_sec.c: Compute inverse as floor(B^2/(dh+1)), per
+	Niels' suggestion.
+	* mpn/generic/sbpi1_div_sec.c: Remove inverse rounding-up code.
+
+2013-07-14  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/powerpc64/mode64/divrem_1.asm: Remove explicit nop after CALL.
+	* mpn/powerpc64/mode64/divrem_2.asm: Likewise.
+	* mpn/powerpc64/mode64/mod_1_1.asm:  Likewise.
+	* mpn/powerpc64/mode64/mod_1_4.asm:  Likewise.
+
+2013-07-13  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86/atom/cnd_add_n.asm: New file.
+	* mpn/x86/atom/cnd_sub_n.asm: New file.o
+
+2013-07-12  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/sbpi1_div_sec.c: Partial rewrite.
+
+2013-07-11  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/cnd_aors_n.asm: Tweak for better speed on K8, bobcat, bd1,
+	NHM, Atom.
+
 2013-07-05  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/powerpc64/p7/copyi.asm: Handle n = 0.
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/arm64/aors_n.asm
--- a/mpn/arm64/aors_n.asm	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/arm64/aors_n.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -81,7 +81,7 @@
 	ADDSUBC	x9, x5, x7
 	cbnz	n, L(top)
 
-L(end):	stp	x8, x9, [rp],#16
+L(end):	stp	x8, x9, [rp]
 L(rt):	RETVAL
 	ret
 EPILOGUE()
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/generic/sb_div_sec.c
--- a/mpn/generic/sb_div_sec.c	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/generic/sb_div_sec.c	Mon Jul 15 13:08:07 2013 +0200
@@ -81,18 +81,9 @@
       np2 = np;
     }
 
-  if (dn == 1)
-    {
-      d0 = dp2[dn - 1];
-      invert_limb (inv32, d0);
-    }
-  else
-    {
-      d1 = dp2[dn - 1];
-      d0 = dp2[dn - 2];
-      invert_pi1 (dinv, d1, d0);
-      inv32 = dinv.inv32;
-    }
+  d0 = dp2[dn - 1];
+  d0 += (~d0 != 0);
+  invert_limb (inv32, d0);
 
   /* We add nn + dn to tp here, not nn + 1 + dn, as expected.  This is since nn
      here will have been incremented.  */
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/generic/sbpi1_div_sec.c
--- a/mpn/generic/sbpi1_div_sec.c	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/generic/sbpi1_div_sec.c	Mon Jul 15 13:08:07 2013 +0200
@@ -94,9 +94,6 @@
 #endif
     }
 
-  /* Decremenet inverse to keep quotient half limbs from being too large.  */
-  dinv -= dinv != 0;				/* FIXME: cmp-to-int */
-
   /* Create a divisor copy shifted half a limb.  */
   hp = tp;					/* (dn + 1) limbs */
   hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/divrem_1.asm
--- a/mpn/powerpc64/mode64/divrem_1.asm	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/divrem_1.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -97,7 +97,6 @@
 	sld	r31, r31, r27
 	mr	r3, r30
 	CALL(	mpn_invert_limb)
-	nop
 	beq-	cr4, L(110)
 	sldi	r9, r28, 3
 	addic.	r6, r28, -2
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/divrem_2.asm
--- a/mpn/powerpc64/mode64/divrem_2.asm	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/divrem_2.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -96,7 +96,6 @@
 	blt	cr0, L(18)
 	mr	r3, r30
 	CALL(	mpn_invert_limb)
-	nop
 	mulld	r10, r3, r30
 	mulhdu	r0, r3, r28
 	addc	r8, r10, r28
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/gcd_1.asm
--- a/mpn/powerpc64/mode64/gcd_1.asm	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/gcd_1.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -20,7 +20,7 @@
 
 include(`../config.m4')
 
-C	 	    cycles/bit (approx)
+C		    cycles/bit (approx)
 C POWER3/PPC630		 ?
 C POWER4/PPC970		 8.5
 C POWER5		 ?
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/mod_1_1.asm
--- a/mpn/powerpc64/mode64/mod_1_1.asm	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/mod_1_1.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -116,7 +116,6 @@
 	sld	r30, r4, r31
 	mr	r3, r30
 	CALL(	mpn_invert_limb)
-	nop
 	cmpdi	cr7, r31, 0
 	neg	r0, r30
 	beq-	cr7, L(13)
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/mod_1_4.asm
--- a/mpn/powerpc64/mode64/mod_1_4.asm	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/mod_1_4.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -195,7 +195,6 @@
 	sld	r30, r4, r31
 	mr	r3, r30
 	CALL(	mpn_invert_limb)
-	nop
 	subfic	r9, r31, 64
 	li	r10, 1
 	sld	r10, r10, r31
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/p7/gcd_1.asm
--- a/mpn/powerpc64/mode64/p7/gcd_1.asm	Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/p7/gcd_1.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -20,7 +20,7 @@
 
 include(`../config.m4')
 
-C	 	    cycles/bit (approx)
+C		    cycles/bit (approx)
 C POWER3/PPC630		 -
 C POWER4/PPC970		 -
 C POWER5		 -
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/x86/atom/cnd_add_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/cnd_add_n.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -0,0 +1,102 @@
+dnl  X86 mpn_cnd_add_n optimised for Intel Atom.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C			    cycles/limb
+C P5				 ?
+C P6 model 0-8,10-12		 ?
+C P6 model 9   (Banias)		 ?
+C P6 model 13  (Dothan)		 ?
+C P4 model 0-1 (Willamette)	 ?
+C P4 model 2   (Northwood)	 ?
+C P4 model 3-4 (Prescott)	 ?
+C Intel atom			 4.67
+C AMD K6			 ?
+C AMD K7			 ?
+C AMD K8			 ?
+
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebp')
+define(`n',   `%ecx')
+define(`cnd', `20(%esp)')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_cnd_add_n)
+	push	%edi
+	push	%esi
+	push	%ebx
+	push	%ebp
+
+	mov	cnd, %eax		C make cnd into a mask (1)
+	mov	24(%esp), rp
+	neg	%eax			C make cnd into a mask (1)
+	mov	28(%esp), up
+	sbb	%eax, %eax		C make cnd into a mask (1)
+	mov	32(%esp), vp
+	mov	%eax, cnd		C make cnd into a mask (1)
+	mov	36(%esp), n
+
+	xor	%edx, %edx
+
+	shr	$1, n
+	jnc	L(top)
+
+	mov	0(vp), %eax
+	and	cnd, %eax
+	lea	4(vp), vp
+	add	0(up), %eax
+	lea	4(rp), rp
+	lea	4(up), up
+	sbb	%edx, %edx
+	mov	%eax, -4(rp)
+	inc	n
+	dec	n
+	je	L(end)
+
+L(top):	sbb	%edx, %edx
+	mov	0(vp), %eax
+	and	cnd, %eax
+	lea	8(vp), vp
+	lea	8(rp), rp
+	mov	-4(vp), %ebx
+	and	cnd, %ebx
+	add	%edx, %edx
+	adc	0(up), %eax
+	lea	8(up), up
+	mov	%eax, -8(rp)
+	adc	-4(up), %ebx
+	dec	n
+	mov	%ebx, -4(rp)
+	jne	L(top)
+
+L(end):	mov	$0, %eax
+	adc	%eax, %eax
+
+	pop	%ebp
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+EPILOGUE()
+ASM_END()
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/x86/atom/cnd_sub_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/cnd_sub_n.asm	Mon Jul 15 13:08:07 2013 +0200
@@ -0,0 +1,113 @@
+dnl  X86 mpn_cnd_sub_n optimised for Intel Atom.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C			    cycles/limb
+C P5				 ?
+C P6 model 0-8,10-12		 ?
+C P6 model 9   (Banias)		 ?
+C P6 model 13  (Dothan)		 ?
+C P4 model 0-1 (Willamette)	 ?
+C P4 model 2   (Northwood)	 ?
+C P4 model 3-4 (Prescott)	 ?
+C Intel atom			 5.67
+C AMD K6			 ?
+C AMD K7			 ?
+C AMD K8			 ?
+
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebp')


More information about the gmp-commit mailing list