[Gmp-commit] /home/hgfiles/gmp: Native mpn{_pi1, }_bdiv_q_1 for x86.

mercurial at gmplib.org mercurial at gmplib.org
Tue Jan 18 08:15:47 CET 2011


details:   /home/hgfiles/gmp/rev/0bf61597048d
changeset: 13746:0bf61597048d
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Jan 18 07:59:02 2011 +0100
description:
Native mpn{_pi1,}_bdiv_q_1 for x86.

diffstat:

 ChangeLog            |    4 +
 mpn/x86/bdiv_q_1.asm |  197 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 201 insertions(+), 0 deletions(-)

diffs (212 lines):

diff -r 2eb47368ef96 -r 0bf61597048d ChangeLog
--- a/ChangeLog	Sat Jan 15 08:25:10 2011 +0100
+++ b/ChangeLog	Tue Jan 18 07:59:02 2011 +0100
@@ -1,3 +1,7 @@
+2011-01-18 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/x86/bdiv_q_1.asm: New file (same core alg. as dive_1).
+
 2011-01-15 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/generic/divexact.c: Avoid COPY if not needed.
diff -r 2eb47368ef96 -r 0bf61597048d mpn/x86/bdiv_q_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/bdiv_q_1.asm	Tue Jan 18 07:59:02 2011 +0100
@@ -0,0 +1,197 @@
+dnl  x86 mpn_bdiv_q_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb
+C P54    30.0
+C P55    29.0
+C P6     13.0 odd divisor, 12.0 even (strangely)
+C K6     14.0
+C K7     12.0
+C P4     42.0
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)	
+
+defframe(PARAM_SHIFT,  24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+	TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C		    mp_limb_t inverse, int shift)
+
+	ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SHIFT, %ecx
+	pushl	%ebp	FRAME_pushl()
+
+	movl	PARAM_INVERSE, %eax
+	movl	PARAM_SIZE, %ebp
+	pushl	%ebx	FRAME_pushl()
+L(common):
+	pushl	%edi	FRAME_pushl()
+	pushl	%esi	FRAME_pushl()
+
+	movl	PARAM_SRC, %esi
+	movl	PARAM_DST, %edi
+
+	leal	(%esi,%ebp,4), %esi	C src end
+	leal	(%edi,%ebp,4), %edi	C dst end
+	negl	%ebp			C -size
+
+	movl	%eax, VAR_INVERSE
+	movl	(%esi,%ebp,4), %eax	C src[0]
+
+	xorl	%ebx, %ebx
+	xorl	%edx, %edx
+
+	incl	%ebp
+	jz	L(one)
+
+	movl	(%esi,%ebp,4), %edx	C src[1]
+
+	shrdl(	%cl, %edx, %eax)
+
+	movl	VAR_INVERSE, %edx
+	jmp	L(entry)
+
+
+	ALIGN(8)
+	nop	C k6 code alignment
+	nop
+L(top):
+	C eax	q
+	C ebx	carry bit, 0 or -1
+	C ecx	shift
+	C edx	carry limb
+	C esi	src end
+	C edi	dst end
+	C ebp	counter, limbs, negative
+
+	movl	-4(%esi,%ebp,4), %eax
+	subl	%ebx, %edx		C accumulate carry bit
+
+	movl	(%esi,%ebp,4), %ebx
+
+	shrdl(	%cl, %ebx, %eax)
+
+	subl	%edx, %eax		C apply carry limb
+	movl	VAR_INVERSE, %edx
+
+	sbbl	%ebx, %ebx
+
+L(entry):
+	imull	%edx, %eax
+
+	movl	%eax, -4(%edi,%ebp,4)
+	movl	PARAM_DIVISOR, %edx
+
+	mull	%edx
+
+	incl	%ebp
+	jnz	L(top)
+
+
+	movl	-4(%esi), %eax		C src high limb
+L(one):
+	shrl	%cl, %eax
+	popl	%esi	FRAME_popl()
+
+	addl	%ebx, %eax		C apply carry bit
+
+	subl	%edx, %eax		C apply carry limb
+
+	imull	VAR_INVERSE, %eax
+
+	movl	%eax, -4(%edi)
+
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+
+	ret
+
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+
+	ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_DIVISOR, %eax
+	pushl	%ebp	FRAME_pushl()
+
+	movl	$-1, %ecx		C shift count
+	movl	PARAM_SIZE, %ebp
+
+	pushl	%ebx	FRAME_pushl()
+
+L(strip_twos):
+	incl	%ecx
+
+	shrl	%eax
+	jnc	L(strip_twos)
+
+	leal	1(%eax,%eax), %ebx	C d without twos
+	andl	$127, %eax		C d/2, 7 bits
+
+ifdef(`PIC',`
+	LEA(	binvert_limb_table, %edx)
+	movzbl	(%eax,%edx), %eax		C inv 8 bits
+',`
+	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
+')
+
+	leal	(%eax,%eax), %edx	C 2*inv
+	movl	%ebx, PARAM_DIVISOR	C d without twos
+	imull	%eax, %eax		C inv*inv
+	imull	%ebx, %eax		C inv*inv*d
+	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
+
+	leal	(%edx,%edx), %eax	C 2*inv
+	imull	%edx, %edx		C inv*inv
+	imull	%ebx, %edx		C inv*inv*d
+	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
+
+	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	pushl	%eax	FRAME_pushl()
+	imull	PARAM_DIVISOR, %eax
+	cmpl	$1, %eax
+	popl	%eax	FRAME_popl()')
+
+	jmp	L(common)
+EPILOGUE()
+


More information about the gmp-commit mailing list