[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Feb 24 22:24:52 CET 2011


details:   /var/hg/gmp/rev/56b84d514294
changeset: 13893:56b84d514294
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Feb 24 22:19:20 2011 +0100
description:
Fix typo in MULFUNC_PROLOGUE.

details:   /var/hg/gmp/rev/44b583f583d4
changeset: 13894:44b583f583d4
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Feb 24 22:20:25 2011 +0100
description:
New file.

details:   /var/hg/gmp/rev/71bcec497b55
changeset: 13895:71bcec497b55
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Feb 24 22:24:46 2011 +0100
description:
Trivial merge.

diffstat:

 ChangeLog                   |    9 ++
 mpn/x86/atom/aors_n.asm     |    6 +-
 mpn/x86/atom/logops_n.asm   |  142 ++++++++++++++++++++++++++++--
 mpn/x86/k7/mod_1_1.asm      |  197 +++++++++++++++++++++++++++++--------------
 mpn/x86/p6/sse2/mod_1_1.asm |   23 +++++
 mpn/x86/p6/sse2/mod_1_4.asm |   23 ++--
 6 files changed, 308 insertions(+), 92 deletions(-)

diffs (truncated from 510 to 300 lines):

diff -r 980fee0af6d5 -r 71bcec497b55 ChangeLog
--- a/ChangeLog	Wed Feb 23 11:28:44 2011 +0100
+++ b/ChangeLog	Thu Feb 24 22:24:46 2011 +0100
@@ -1,3 +1,12 @@
+2011-02-24  Niels Möller  <nisse at lysator.liu.se>
+
+	* mpn/x86/k7/mod_1_1.asm (mpn_mod_1_1p): Rewrite using the same
+	algorithm as the x86_64 version.
+
+2011-02-23 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/x86/atom/logops_n.asm: New file (same loop as aors_n).
+
 2011-02-23  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Shaved off one
diff -r 980fee0af6d5 -r 71bcec497b55 mpn/x86/atom/aors_n.asm
--- a/mpn/x86/atom/aors_n.asm	Wed Feb 23 11:28:44 2011 +0100
+++ b/mpn/x86/atom/aors_n.asm	Thu Feb 24 22:24:46 2011 +0100
@@ -78,7 +78,7 @@
 
 define(`rp',  `%edi')
 define(`up',  `%esi')
-define(`vp',  `%ebp')
+define(`vp',  `%ebx')
 define(`cy',  `%ecx')
 define(`r1',  `%ecx')
 define(`r2',  `%edx')
@@ -95,8 +95,8 @@
 	mov	rp, SAVE_RP
 	mov	PARAM_DST, rp
 	mov	up, SAVE_UP
+	mov	PARAM_SRC1, up
 	shr	%eax			C size >> 1
-	mov	PARAM_SRC1, up
 	mov	vp, SAVE_VP
 	mov	PARAM_SRC2, vp
 	jz	L(one)			C size == 1
@@ -125,8 +125,8 @@
 	mov	r1, (rp)
 L(entry):
 	M4_inst	-4(vp), r2
+	lea	8(rp), rp
 	dec	%eax
-	lea	8(rp), rp
 	mov	(up), r1
 	mov	r2, -4(rp)
 	jnz	L(oop)
diff -r 980fee0af6d5 -r 71bcec497b55 mpn/x86/atom/logops_n.asm
--- a/mpn/x86/atom/logops_n.asm	Wed Feb 23 11:28:44 2011 +0100
+++ b/mpn/x86/atom/logops_n.asm	Thu Feb 24 22:24:46 2011 +0100
@@ -1,24 +1,140 @@
 dnl  Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
 
 dnl  Copyright 2011 Free Software Foundation, Inc.
-dnl
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
 dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
 dnl  You should have received a copy of the GNU Lesser General Public License
 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
+C				   cycles/limb
+C				op	nop	opn
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 3	 3.5	 3.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+define(M4_choose_op,
+`ifdef(`OPERATION_$1',`
+define(`M4_function', `mpn_$1')
+define(`M4_want_pre', `$4')
+define(`M4_inst',     `$3')
+define(`M4_want_post',`$2')
+')')
+define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
+define(M4post,`ifelse(M4_want_post,yes,`$1')')
+
+M4_choose_op( and_n,     , andl,    )
+M4_choose_op( andn_n,    , andl, yes)
+M4_choose_op( nand_n, yes, andl,    )
+M4_choose_op( ior_n,     ,  orl,    )
+M4_choose_op( iorn_n,    ,  orl, yes)
+M4_choose_op( nior_n, yes,  orl,    )
+M4_choose_op( xor_n,     , xorl,    )
+M4_choose_op( xnor_n, yes, xorl,    )
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
 
 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-include_mpn(`x86/pentium/logops_n.asm')
+
+C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size);
+C
+
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC1, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_RP,`PARAM_SIZE')
+define(SAVE_VP,`PARAM_SRC1')
+define(SAVE_UP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebx')
+define(`cnt', `%eax')
+define(`r1',  `%ecx')
+define(`r2',  `%edx')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function)
+	mov	PARAM_SIZE, cnt		C size
+	mov	rp, SAVE_RP
+	mov	PARAM_DST, rp
+	mov	up, SAVE_UP
+	mov	PARAM_SRC1, up
+	shr	cnt			C size >> 1
+	mov	vp, SAVE_VP
+	mov	PARAM_SRC2, vp
+	mov	(up), r1
+	jz	L(end)			C size == 1
+	jnc	L(even)			C size % 2 == 0
+
+	ALIGN(16)
+L(oop):
+M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
+	M4_inst	(vp), r1
+	lea	8(up), up
+	mov	-4(up), r2
+M4post(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
+	lea	8(vp), vp
+	mov	r1, (rp)
+L(entry):
+M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r2)')
+	M4_inst	-4(vp), r2
+	lea	8(rp), rp
+M4post(`	notl_or_xorl_GMP_NUMB_MASK(r2)')
+	dec	cnt
+	mov	(up), r1
+	mov	r2, -4(rp)
+	jnz	L(oop)
+
+L(end):
+M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
+	mov	SAVE_UP, up
+	M4_inst	(vp), r1
+M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)')
+	mov	SAVE_VP, vp
+	mov	r1, (rp)
+	mov	SAVE_RP, rp
+	ret
+
+L(even):
+	mov	r1, r2
+	lea	4(up), up
+	lea	4(vp), vp
+	lea	-4(rp), rp
+	jmp	L(entry)
+EPILOGUE()
+ASM_END()
diff -r 980fee0af6d5 -r 71bcec497b55 mpn/x86/k7/mod_1_1.asm
--- a/mpn/x86/k7/mod_1_1.asm	Wed Feb 23 11:28:44 2011 +0100
+++ b/mpn/x86/k7/mod_1_1.asm	Thu Feb 24 22:24:46 2011 +0100
@@ -1,8 +1,8 @@
 dnl  x86-32 mpn_mod_1_1p, requiring cmov.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
+dnl  Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
 dnl
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -25,16 +25,45 @@
 C P5				 ?
 C P6 model 0-8,10-12		 ?
 C P6 model 9  (Banias)		 ?
-C P6 model 13 (Dothan)		11.75
+C P6 model 13 (Dothan)		 ?
 C P4 model 0  (Willamette)	 ?
 C P4 model 1  (?)		 ?
 C P4 model 2  (Northwood)	 ?
 C P4 model 3  (Prescott)	 ?
 C P4 model 4  (Nocona)		 ?
 C AMD K6			 ?
-C AMD K7			 8
+C AMD K7			 7
 C AMD K8			 ?
 
+define(`B2mb', `%ebx')
+define(`r0', `%esi')
+define(`r2', `%ebp')
+define(`t0', `%edi')
+define(`ap', `%ecx')  C Also shift count
+
+C Stack frame
+C	pre	36(%esp)
+C	b	32(%esp)
+C	n	28(%esp)
+C	ap	24(%esp)
+C	return	20(%esp)
+C	%ebp	16(%esp)
+C	%edi	12(%esp)
+C	%esi	8(%esp)
+C	%ebx	4(%esp)
+C	B2mod	(%esp)
+
+define(`B2modb', `(%esp)')
+define(`n', `28(%esp)')
+define(`b', `32(%esp)')
+define(`pre', `36(%esp)')
+
+C mp_limb_t
+C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t pre[4])
+C
+C The pre array contains bi, cnt, B1modb, B2modb
+C Note: This implementation needs B1modb only when cnt > 0
+
 ASM_START()
 	TEXT
 	ALIGN(8)
@@ -43,74 +72,116 @@
 	push	%edi
 	push	%esi
 	push	%ebx
-	mov	24(%esp), %ebx
-	mov	20(%esp), %esi
-	mov	32(%esp), %ebp		C cps[]
-	lea	(%esi,%ebx,4), %esi
+	mov	32(%esp), %ebp		C pre[]
 
-	mov	8(%ebp), %edi		C B1modb
-	mov	12(%ebp), %ebp		C B2modb
-	mov	-4(%esi), %eax
-	mul	%edi
-	xor	%ecx, %ecx
-	add	-8(%esi), %eax
-	adc	%edx, %ecx
-	sub	$2, 24(%esp)
-	jle	L(end)
+	mov	12(%ebp), %eax		C B2modb
+	push	%eax			C Put it on stack
+
+	mov	4(%ebp), %cl
+	shrl	%cl, b
+
+	mov	n, %edx
+	mov	24(%esp), ap
+
+	lea	(ap, %edx, 4), ap
+	mov	-4(ap), %eax
+	cmp	$3, %edx
+	jnc	L(first)
+	mov	-8(ap), r0


More information about the gmp-commit mailing list