[Gmp-commit] /home/hgfiles/gmp: 7 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Jun 15 16:20:51 CEST 2010


details:   /home/hgfiles/gmp/rev/7490adf9c268
changeset: 13652:7490adf9c268
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jun 15 14:24:11 2010 +0200
description:
Moved from mpn/x86/invert_limb.asm.

details:   /home/hgfiles/gmp/rev/5966d4b9edfb
changeset: 13653:5966d4b9edfb
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jun 15 14:36:54 2010 +0200
description:
Store results as they are computed.

details:   /home/hgfiles/gmp/rev/2de6c3fc20e9
changeset: 13654:2de6c3fc20e9
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jun 15 14:42:01 2010 +0200
description:
New file.

details:   /home/hgfiles/gmp/rev/9094c40cba68
changeset: 13655:9094c40cba68
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jun 15 14:42:38 2010 +0200
description:
Rewrite 'cps' functions.

details:   /home/hgfiles/gmp/rev/d80a8c69e71b
changeset: 13656:d80a8c69e71b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jun 15 14:45:47 2010 +0200
description:
Simplify udiv_rnd_preinv.

details:   /home/hgfiles/gmp/rev/10b23903984d
changeset: 13657:10b23903984d
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jun 15 16:17:04 2010 +0200
description:
Rewrite 'cps' functions.

details:   /home/hgfiles/gmp/rev/e2000cb9f785
changeset: 13658:e2000cb9f785
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jun 15 16:20:47 2010 +0200
description:
Trivial merge.

diffstat:

 ChangeLog                         |   25 ++++
 gmp-impl.h                        |    3 +-
 mpn/generic/mod_1_1.c             |    9 +-
 mpn/generic/mod_1_2.c             |   12 +-
 mpn/generic/mod_1_4.c             |   18 +-
 mpn/x86/invert_limb.asm           |  170 ----------------------------
 mpn/x86/k7/invert_limb.asm        |  181 ++++++++++++++++++++++++++++++
 mpn/x86/k7/mod_1_1.asm            |  152 +++++++++++++++++++++++++
 mpn/x86/k7/mod_1_4.asm            |  227 ++++++++++++++++---------------------
 mpn/x86/pentium4/sse2/mod_1_1.asm |   71 ++++-------
 mpn/x86/pentium4/sse2/mod_1_4.asm |  126 ++++++++++----------
 mpn/x86_64/mod_1_1.asm            |   95 +++++++--------
 mpn/x86_64/mod_1_2.asm            |  110 +++++++++--------
 mpn/x86_64/mod_1_4.asm            |  135 ++++++++++++----------
 tests/mpn/Makefile.am             |    2 +-
 tests/mpn/t-mod_1.c               |  118 +++++++++++++++++++
 16 files changed, 868 insertions(+), 586 deletions(-)

diffs (truncated from 1762 to 300 lines):

diff -r c173a930ec87 -r e2000cb9f785 ChangeLog
--- a/ChangeLog	Tue May 25 10:07:38 2010 +0200
+++ b/ChangeLog	Tue Jun 15 16:20:47 2010 +0200
@@ -1,3 +1,28 @@
+2010-06-15  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Rewrite.
+	* mpn/x86_64/mod_1_2.asm (mpn_mod_1s_2p_cps): Rewrite.
+	* mpn/x86_64/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+	* gmp-impl.h (udiv_rnd_preinv): Simplify.
+
+	* mpn/x86/k7/mod_1_1.asm: New file.
+	* mpn/x86/pentium4/sse2/mod_1_1.asm (mpn_mod_1_1p_cps): Rewrite.
+	* mpn/x86/k7/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+	* mpn/x86/pentium4/sse2/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+	* mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Store results as they are
+	computed.
+	* mpn/generic/mod_1_2.c (mpn_mod_1s_2p_cps): Likewise.
+	* mpn/generic/mod_1_4.c (mpn_mod_1s_4p_cps): Likewise.
+
+	* mpn/x86/k7/invert_limb.asm: Moved from mpn/x86/invert_limb.asm.
+
+2010-06-15  Niels Möller  <nisse at lysator.liu.se>
+
+	* tests/mpn/Makefile.am (check_PROGRAMS): Added t-mod_1.
+	* tests/mpn/t-mod_1.c: New file.
+
 2010-05-25  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): Trim out space
diff -r c173a930ec87 -r e2000cb9f785 gmp-impl.h
--- a/gmp-impl.h	Tue May 25 10:07:38 2010 +0200
+++ b/gmp-impl.h	Tue Jun 15 16:20:47 2010 +0200
@@ -2767,8 +2767,7 @@
   do {									\
     mp_limb_t _qh, _ql, _r;						\
     umul_ppmm (_qh, _ql, (nh), (di));					\
-    _qh += (nh) + 1;							\
-    _r = - _qh * (d);							\
+    _r = ~(_qh + (nh)) * (d);						\
     if (_r > _ql)							\
       _r += (d);							\
     (r) = _r;								\
diff -r c173a930ec87 -r e2000cb9f785 mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c	Tue May 25 10:07:38 2010 +0200
+++ b/mpn/generic/mod_1_1.c	Tue Jun 15 16:20:47 2010 +0200
@@ -41,15 +41,16 @@
   b <<= cnt;
   invert_limb (bi, b);
 
+  cps[0] = bi;
+  cps[1] = cnt;
+
   B1modb = -b;
   if (LIKELY (cnt != 0))
     B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
   udiv_rnd_preinv (B2modb, B1modb, b, bi);
-
-  cps[0] = bi;
-  cps[1] = cnt;
-  cps[2] = B1modb >> cnt;
   cps[3] = B2modb >> cnt;
 }
 
diff -r c173a930ec87 -r e2000cb9f785 mpn/generic/mod_1_2.c
--- a/mpn/generic/mod_1_2.c	Tue May 25 10:07:38 2010 +0200
+++ b/mpn/generic/mod_1_2.c	Tue Jun 15 16:20:47 2010 +0200
@@ -44,15 +44,17 @@
   b <<= cnt;
   invert_limb (bi, b);
 
+  cps[0] = bi;
+  cps[1] = cnt;
+
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
   udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  cps[3] = B2modb >> cnt;
+
   udiv_rnd_preinv (B3modb, B2modb, b, bi);
-
-  cps[0] = bi;
-  cps[1] = cnt;
-  cps[2] = B1modb >> cnt;
-  cps[3] = B2modb >> cnt;
   cps[4] = B3modb >> cnt;
 
 #if WANT_ASSERT
diff -r c173a930ec87 -r e2000cb9f785 mpn/generic/mod_1_4.c
--- a/mpn/generic/mod_1_4.c	Tue May 25 10:07:38 2010 +0200
+++ b/mpn/generic/mod_1_4.c	Tue Jun 15 16:20:47 2010 +0200
@@ -44,19 +44,23 @@
   b <<= cnt;
   invert_limb (bi, b);
 
+  cps[0] = bi;
+  cps[1] = cnt;
+
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
+  cps[2] = B1modb >> cnt;
+
   udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  cps[3] = B2modb >> cnt;
+
   udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  cps[4] = B3modb >> cnt;
+
   udiv_rnd_preinv (B4modb, B3modb, b, bi);
+  cps[5] = B4modb >> cnt;
+
   udiv_rnd_preinv (B5modb, B4modb, b, bi);
-
-  cps[0] = bi;
-  cps[1] = cnt;
-  cps[2] = B1modb >> cnt;
-  cps[3] = B2modb >> cnt;
-  cps[4] = B3modb >> cnt;
-  cps[5] = B4modb >> cnt;
   cps[6] = B5modb >> cnt;
 
 #if WANT_ASSERT
diff -r c173a930ec87 -r e2000cb9f785 mpn/x86/invert_limb.asm
--- a/mpn/x86/invert_limb.asm	Tue May 25 10:07:38 2010 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,170 +0,0 @@
- dnl  x86 mpn_invert_limb
-
-dnl  Contributed to the GNU project by Niels Möller
-
-dnl  Copyright 2009 Free Software Foundation, Inc.
-dnl
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles (approx)	div
-C K7:		 46		53
-
-C Register usage:
-
-C Input D in %edi
-C Current approximation is in %eax and/or %ecx
-C %ebx and %edx are temporaries.
-C %esi and %ebp is unused.
-
-defframe(PARAM_DIVISOR,4)
-
-ASM_START()
-
-C Make approx_tab global to work around Apple relocation bug.
-ifdef(`DARWIN',`
-	define(`approx_tab', MPN(invert_limb_tab))
-	GLOBL	approx_tab')
-
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_invert_limb)
-deflit(`FRAME', 0)
-	C Adding the push of %ebp and the corresponding pop seems to
-	C reduce running time from 46 to 43 cycles on K7. Don't know
-	C if this is a benchmark artefact or some alignment issue.
-
-	push	%ebx	FRAME_pushl()
-	C push	%ebp	FRAME_pushl()
-	push	%edi	FRAME_pushl()
-
-	mov	PARAM_DIVISOR, %edi
-	mov	%edi, %eax
-	shr	$22, %eax
-ifdef(`PIC',`
-	LEA(	approx_tab, %ebx)
-	movzwl	-1024(%ebx, %eax, 2), %eax
-',`
-	movzwl	-1024+approx_tab(%eax, %eax), %eax	C %eax = v0
-')
-
-	C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
-	mov	%eax, %ecx
-	imul	%eax, %eax
-	mov	%edi, %ebx
-	shr	$11, %ebx
-	inc	%ebx
-	mul	%ebx
-	mov	%edi, %ebx				C Prepare
-	shr	%ebx
-	sbb	%eax, %eax
-	sub	%eax, %ebx				C %ebx = d_31, %eax = mask
-	shl	$4, %ecx
-	dec	%ecx
-	sub	%edx, %ecx				C %ecx = v1
-
-	C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
-	imul	%ecx, %ebx
-	and	%ecx, %eax
-	shr	%eax
-	sub	%ebx, %eax
-	mul	%ecx
-	mov	%edi, %eax				C Prepare for next mul
-	shl	$15, %ecx
-	shr	%edx
-	add	%edx, %ecx				C %ecx = v2
-
-	mul	%ecx
-	add	%edi, %eax
-	mov	%ecx, %eax
-	adc	%edi, %edx
-	sub	%edx, %eax				C %eax = v3
-
-	pop	%edi
-	C pop	%ebp
-	pop	%ebx
-
-	ret
-
-EPILOGUE()
-
-DEF_OBJECT(approx_tab,2)
-	.value	0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
-	.value	0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
-	.value	0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
-	.value	0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
-	.value	0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
-	.value	0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
-	.value	0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
-	.value	0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
-	.value	0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
-	.value	0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
-	.value	0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
-	.value	0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
-	.value	0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
-	.value	0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
-	.value	0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
-	.value	0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
-	.value	0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
-	.value	0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
-	.value	0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
-	.value	0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
-	.value	0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
-	.value	0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
-	.value	0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
-	.value	0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
-	.value	0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
-	.value	0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
-	.value	0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
-	.value	0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
-	.value	0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
-	.value	0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
-	.value	0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
-	.value	0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
-	.value	0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
-	.value	0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
-	.value	0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
-	.value	0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
-	.value	0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
-	.value	0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
-	.value	0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
-	.value	0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
-	.value	0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
-	.value	0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
-	.value	0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
-	.value	0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
-	.value	0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
-	.value	0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
-	.value	0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
-	.value	0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
-	.value	0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
-	.value	0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
-	.value	0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
-	.value	0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
-	.value	0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
-	.value	0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
-	.value	0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
-	.value	0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
-	.value	0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
-	.value	0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
-	.value	0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
-	.value	0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
-	.value	0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
-	.value	0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104


More information about the gmp-commit mailing list