[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sun Feb 20 15:41:31 CET 2011


details:   /var/hg/gmp/rev/28c161dea4a7
changeset: 13865:28c161dea4a7
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Feb 20 15:33:52 2011 +0100
description:
Save a jump.

details:   /var/hg/gmp/rev/3c0dbda7f526
changeset: 13866:3c0dbda7f526
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Feb 20 15:40:56 2011 +0100
description:
x86 c/l table cleanup.

diffstat:

 ChangeLog                           |   4 +++
 mpn/x86/aors_n.asm                  |  10 ++++----
 mpn/x86/aorsmul_1.asm               |  20 +++++++++---------
 mpn/x86/atom/aorsmul_1.asm          |  14 ++++++------
 mpn/x86/bdiv_dbm1c.asm              |  40 +++++++++++++++++++++---------------
 mpn/x86/copyd.asm                   |  10 ++++----
 mpn/x86/copyi.asm                   |  10 ++++----
 mpn/x86/k6/aorsmul_1.asm            |  14 ++++++------
 mpn/x86/k6/mul_1.asm                |  14 ++++++------
 mpn/x86/k7/addlsh1_n.asm            |  10 ++++----
 mpn/x86/k7/aorsmul_1.asm            |  12 +++++-----
 mpn/x86/k7/invert_limb.asm          |  10 ++++----
 mpn/x86/k7/mod_1_1.asm              |  10 ++++----
 mpn/x86/k7/mod_1_4.asm              |  10 ++++----
 mpn/x86/k7/mul_1.asm                |  10 ++++----
 mpn/x86/k7/sublsh1_n.asm            |  10 ++++----
 mpn/x86/lshift.asm                  |  12 +++++-----
 mpn/x86/mod_34lsub1.asm             |  10 ++++----
 mpn/x86/mul_1.asm                   |  20 +++++++++---------
 mpn/x86/mul_basecase.asm            |  10 ++++----
 mpn/x86/p6/aors_n.asm               |   8 +++---
 mpn/x86/p6/aorsmul_1.asm            |  14 ++++++------
 mpn/x86/pentium4/sse2/add_n.asm     |   8 +++---
 mpn/x86/pentium4/sse2/addlsh1_n.asm |  10 ++++----
 mpn/x86/pentium4/sse2/addmul_1.asm  |  16 +++++++-------
 mpn/x86/pentium4/sse2/mod_1_1.asm   |  12 +++++-----
 mpn/x86/pentium4/sse2/mod_1_4.asm   |  14 ++++++------
 mpn/x86/pentium4/sse2/mul_1.asm     |  10 ++++----
 mpn/x86/pentium4/sse2/popcount.asm  |  34 +++++++++++++++---------------
 mpn/x86/pentium4/sse2/sub_n.asm     |   8 +++---
 mpn/x86/pentium4/sse2/submul_1.asm  |  14 ++++++------
 mpn/x86/rshift.asm                  |  12 +++++-----
 mpn/x86/sqr_basecase.asm            |  10 ++++----
 33 files changed, 220 insertions(+), 210 deletions(-)

diffs (truncated from 877 to 300 lines):

diff -r 153ddf846890 -r 3c0dbda7f526 ChangeLog
--- a/ChangeLog	Sun Feb 20 10:59:12 2011 +0100
+++ b/ChangeLog	Sun Feb 20 15:40:56 2011 +0100
@@ -1,3 +1,7 @@
+2011-02-20  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86/bdiv_dbm1c.asm: Save a jump.
+
 2011-02-20 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mpn/x86/atom/aors_n.asm: New code.
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/aors_n.asm
--- a/mpn/x86/aors_n.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aors_n.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -22,11 +22,11 @@
 
 
 C     cycles/limb
-C P5:   3.375
-C P6:   3.125
-C K6:   3.5
-C K7:   2.25
-C P4:   8.75
+C P5	3.375
+C P6	3.125
+C K6	3.5
+C K7	2.25
+C P4	8.75
 
 
 ifdef(`OPERATION_add_n',`
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/aorsmul_1.asm
--- a/mpn/x86/aorsmul_1.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aorsmul_1.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -22,19 +22,19 @@
 include(`../config.m4')
 
 
-C                           cycles/limb
-C P5:                           14.75
-C P6 model 0-8,10-12)            7.5
+C			    cycles/limb
+C P5				14.75
+C P6 model 0-8,10-12		 7.5
 C P6 model 9  (Banias)		 6.7
-C P6 model 13 (Dothan)           6.75
-C P4 model 0  (Willamette)      24.0
-C P4 model 1  (?)               24.0
-C P4 model 2  (Northwood)       24.0
+C P6 model 13 (Dothan)		 6.75
+C P4 model 0  (Willamette)	24.0
+C P4 model 1  (?)		24.0
+C P4 model 2  (Northwood)	24.0
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C K6:                           12.5
-C K7:                            5.25
-C K8:
+C AMD K6			12.5
+C AMD K7			 5.25
+C AMD K8
 
 
 ifdef(`OPERATION_addmul_1',`
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/atom/aorsmul_1.asm
--- a/mpn/x86/atom/aorsmul_1.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/atom/aorsmul_1.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -20,19 +20,19 @@
 include(`../config.m4')
 
 
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)            6.35
+C			    cycles/limb
+C P5
+C P6 model 0-8,10-12		 6.35
 C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           6.25
+C P6 model 13 (Dothan)		 6.25
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C K6:
-C K7:                            3.9
-C K8:
+C AMD K6
+C AMD K7			 3.9
+C AMD K8
 
 
 dnl  K7: UNROLL_COUNT  cycles/limb
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/bdiv_dbm1c.asm
--- a/mpn/x86/bdiv_dbm1c.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/bdiv_dbm1c.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -1,6 +1,6 @@
 dnl  x86 mpn_bdiv_dbm1.
 
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -19,14 +19,22 @@
 
 include(`../config.m4')
 
-C	    cycles/limb
-C K7:		 3.5
-C P4 m0:	  ?
-C P4 m1:	  ?
-C P4 m2:	13.67
-C P4 m3:	  ?
-C P4 m4:	  ?
-C P6-13:	 5.1
+C			    cycles/limb
+C P5
+C P6 model 0-8,10-12)
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)		 5.1
+C P4 model 0  (Willamette)	 
+C P4 model 1  (?)
+C P4 model 2  (Northwood)	13.67
+C P4 model 3  (Prescott)	 
+C P4 model 4  (Nocona)
+C Intel Atom
+C AMD K6
+C AMD K7			 3.5
+C AMD K8
+C AMD K10
+
 
 C TODO
 C  * Optimize for more x86 processors
@@ -57,18 +65,17 @@
 	cmp	$2, %eax
 	jc	L(b1)
 	jz	L(b2)
-	jmp	L(b3)
+
+L(b3):	lea	-8(%esi), %esi
+	lea	8(%edi), %edi
+	add	$-3, %ebp
+	jmp	L(3)
 
 L(b0):	mov	4(%esi), %eax
 	lea	-4(%esi), %esi
 	lea	12(%edi), %edi
 	add	$-4, %ebp
 	jmp	L(0)
-L(b3):
-	lea	-8(%esi), %esi
-	lea	8(%edi), %edi
-	add	$-3, %ebp
-	jmp	L(3)
 
 L(b2):	mov	4(%esi), %eax
 	lea	4(%esi), %esi
@@ -77,8 +84,7 @@
 	jmp	L(2)
 
 	ALIGN(8)
-L(top):
-	mov	4(%esi), %eax
+L(top):	mov	4(%esi), %eax
 	mul	%ecx
 	lea	16(%edi), %edi
 	sub	%eax, %ebx
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/copyd.asm
--- a/mpn/x86/copyd.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/copyd.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -21,11 +21,11 @@
 
 
 C     cycles/limb  startup (approx)
-C P5:     1.0         40
-C P6      2.4         70
-C K6      1.0         55
-C K7:     1.3         75
-C P4:     2.6        175
+C P5	  1.0	      40
+C P6	  2.4	      70
+C K6	  1.0	      55
+C K7	  1.3	      75
+C P4	  2.6	     175
 C
 C (Startup time includes some function call overheads.)
 
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/copyi.asm
--- a/mpn/x86/copyi.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/copyi.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -21,11 +21,11 @@
 
 
 C     cycles/limb  startup (approx)
-C P5:     1.0         35
-C P6      0.75        45
-C K6      1.0         30
-C K7:     1.3         65
-C P4:     1.0        120
+C P5	  1.0	      35
+C P6	  0.75	      45
+C K6	  1.0	      30
+C K7	  1.3	      65
+C P4	  1.0	     120
 C
 C (Startup time includes some function call overheads.)
 
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k6/aorsmul_1.asm
--- a/mpn/x86/k6/aorsmul_1.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/k6/aorsmul_1.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -21,19 +21,19 @@
 include(`../config.m4')
 
 
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)            5.94
+C			    cycles/limb
+C P5
+C P6 model 0-8,10-12		 5.94
 C P6 model 9  (Banias)		 5.51
-C P6 model 13 (Dothan)           5.57
+C P6 model 13 (Dothan)		 5.57
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C K6:                           7.65-8.5 (data dependent)
-C K7:
-C K8:
+C AMD K6			7.65-8.5 (data dependent)
+C AMD K7
+C AMD K8
 
 
 dnl  K6:           large multipliers  small multipliers
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k6/mul_1.asm
--- a/mpn/x86/k6/mul_1.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/k6/mul_1.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -20,19 +20,19 @@
 include(`../config.m4')
 
 
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)            5.5
+C			    cycles/limb
+C P5
+C P6 model 0-8,10-12		 5.5
 C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           4.87
+C P6 model 13 (Dothan)		 4.87
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C K6:                            6.25
-C K7:
-C K8:
+C AMD K6			 6.25
+C AMD K7
+C AMD K8
 
 
 C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k7/addlsh1_n.asm
--- a/mpn/x86/k7/addlsh1_n.asm	Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/k7/addlsh1_n.asm	Sun Feb 20 15:40:56 2011 +0100
@@ -26,19 +26,19 @@
 C registers.  It seems tricky to use the same structure for rsblsh1_n, since we
 C cannot feed carry between operations there.
 
-C                           cycles/limb
+C			    cycles/limb
 C P5
-C P6 model 0-8,10-12)
+C P6 model 0-8,10-12
 C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           5.4	(worse than add_n + lshift)
+C P6 model 13 (Dothan)		 5.4	(worse than add_n + lshift)
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
 C Intel Atom			 6
-C AMD K6                         ?
-C AMD K7                         2.5
+C AMD K6			 ?
+C AMD K7			 2.5
 C AMD K8
 
 C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k7/aorsmul_1.asm
--- a/mpn/x86/k7/aorsmul_1.asm	Sun Feb 20 10:59:12 2011 +0100


More information about the gmp-commit mailing list