[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Mar 28 14:26:18 CEST 2011


details:   /var/hg/gmp/rev/0335e516ec11
changeset: 14105:0335e516ec11
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Mar 28 14:25:40 2011 +0200
description:
Protect movzwl register parameters from being interpreted as m4 macro parameters.

details:   /var/hg/gmp/rev/b8313129fdab
changeset: 14106:b8313129fdab
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Mon Mar 28 14:26:11 2011 +0200
description:
*** empty log message ***

diffstat:

 ChangeLog                  |   5 +++++
 mpn/x86/k7/invert_limb.asm |  27 ++++++++++++++++-----------
 2 files changed, 21 insertions(+), 11 deletions(-)

diffs (71 lines):

diff -r eddb0d9d7ce2 -r b8313129fdab ChangeLog
--- a/ChangeLog	Tue Mar 22 22:03:13 2011 +0100
+++ b/ChangeLog	Mon Mar 28 14:26:11 2011 +0200
@@ -1,3 +1,8 @@
+2011-03-28  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86/k7/invert_limb.asm: Protect movzwl register parameters from
+	being interpreted as m4 macro parameters.
+
 2011-03-22  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/x86_64/div_qr_2_pi1_norm.asm: Copied optimized inner loop
diff -r eddb0d9d7ce2 -r b8313129fdab mpn/x86/k7/invert_limb.asm
--- a/mpn/x86/k7/invert_limb.asm	Tue Mar 22 22:03:13 2011 +0100
+++ b/mpn/x86/k7/invert_limb.asm	Mon Mar 28 14:26:11 2011 +0200
@@ -1,8 +1,8 @@
- dnl  x86 mpn_invert_limb
+dnl  x86 mpn_invert_limb
 
 dnl  Contributed to the GNU project by Niels Möller
 
-dnl  Copyright 2009 Free Software Foundation, Inc.
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -35,12 +35,17 @@
 C AMD K7			46		53
 C AMD K8			 ?
 
+C TODO
+C  * These c/l numbers are for a non-PIC build.  Consider falling back to using
+C    the 'div' instruction for PIC builds.
+C  * Avoid push/pop on k7.
+C  * Perhaps use this file--or at least the algorithm--for more machines than k7.
+
 C Register usage:
-
-C Input D in %edi
-C Current approximation is in %eax and/or %ecx
-C %ebx and %edx are temporaries.
-C %esi and %ebp is unused.
+C   Input D in %edi
+C   Current approximation is in %eax and/or %ecx
+C   %ebx and %edx are temporaries
+C   %esi and %ebp are unused
 
 defframe(PARAM_DIVISOR,4)
 
@@ -55,9 +60,9 @@
 	ALIGN(16)
 PROLOGUE(mpn_invert_limb)
 deflit(`FRAME', 0)
-	C Adding the push of %ebp and the corresponding pop seems to
-	C reduce running time from 46 to 43 cycles on K7. Don't know
-	C if this is a benchmark artefact or some alignment issue.
+	C Adding the unnecessary push of %ebp and the corresponding pop seems
+	C to *reduce* running time from 46 to 43 cycles on K7.  Don't know if
+	C this is a benchmark artefact or some alignment issue.
 
 	push	%ebx	FRAME_pushl()
 	C push	%ebp	FRAME_pushl()
@@ -70,7 +75,7 @@
 	LEA(	approx_tab, %ebx)
 	movzwl	-1024(%ebx, %eax, 2), %eax
 ',`
-	movzwl	-1024+approx_tab(%eax, %eax), %eax	C %eax = v0
+	movzwl	-1024+approx_tab`'(%eax, %eax), %eax	C %eax = v0
 ')
 
 	C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1


More information about the gmp-commit mailing list