[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Feb 20 15:41:31 CET 2011
details: /var/hg/gmp/rev/28c161dea4a7
changeset: 13865:28c161dea4a7
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Feb 20 15:33:52 2011 +0100
description:
Save a jump.
details: /var/hg/gmp/rev/3c0dbda7f526
changeset: 13866:3c0dbda7f526
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Feb 20 15:40:56 2011 +0100
description:
x86 c/l table cleanup.
diffstat:
ChangeLog | 4 +++
mpn/x86/aors_n.asm | 10 ++++----
mpn/x86/aorsmul_1.asm | 20 +++++++++---------
mpn/x86/atom/aorsmul_1.asm | 14 ++++++------
mpn/x86/bdiv_dbm1c.asm | 40 +++++++++++++++++++++---------------
mpn/x86/copyd.asm | 10 ++++----
mpn/x86/copyi.asm | 10 ++++----
mpn/x86/k6/aorsmul_1.asm | 14 ++++++------
mpn/x86/k6/mul_1.asm | 14 ++++++------
mpn/x86/k7/addlsh1_n.asm | 10 ++++----
mpn/x86/k7/aorsmul_1.asm | 12 +++++-----
mpn/x86/k7/invert_limb.asm | 10 ++++----
mpn/x86/k7/mod_1_1.asm | 10 ++++----
mpn/x86/k7/mod_1_4.asm | 10 ++++----
mpn/x86/k7/mul_1.asm | 10 ++++----
mpn/x86/k7/sublsh1_n.asm | 10 ++++----
mpn/x86/lshift.asm | 12 +++++-----
mpn/x86/mod_34lsub1.asm | 10 ++++----
mpn/x86/mul_1.asm | 20 +++++++++---------
mpn/x86/mul_basecase.asm | 10 ++++----
mpn/x86/p6/aors_n.asm | 8 +++---
mpn/x86/p6/aorsmul_1.asm | 14 ++++++------
mpn/x86/pentium4/sse2/add_n.asm | 8 +++---
mpn/x86/pentium4/sse2/addlsh1_n.asm | 10 ++++----
mpn/x86/pentium4/sse2/addmul_1.asm | 16 +++++++-------
mpn/x86/pentium4/sse2/mod_1_1.asm | 12 +++++-----
mpn/x86/pentium4/sse2/mod_1_4.asm | 14 ++++++------
mpn/x86/pentium4/sse2/mul_1.asm | 10 ++++----
mpn/x86/pentium4/sse2/popcount.asm | 34 +++++++++++++++---------------
mpn/x86/pentium4/sse2/sub_n.asm | 8 +++---
mpn/x86/pentium4/sse2/submul_1.asm | 14 ++++++------
mpn/x86/rshift.asm | 12 +++++-----
mpn/x86/sqr_basecase.asm | 10 ++++----
33 files changed, 220 insertions(+), 210 deletions(-)
diffs (truncated from 877 to 300 lines):
diff -r 153ddf846890 -r 3c0dbda7f526 ChangeLog
--- a/ChangeLog Sun Feb 20 10:59:12 2011 +0100
+++ b/ChangeLog Sun Feb 20 15:40:56 2011 +0100
@@ -1,3 +1,7 @@
+2011-02-20 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86/bdiv_dbm1c.asm: Save a jump.
+
2011-02-20 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mpn/x86/atom/aors_n.asm: New code.
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/aors_n.asm
--- a/mpn/x86/aors_n.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aors_n.asm Sun Feb 20 15:40:56 2011 +0100
@@ -22,11 +22,11 @@
C cycles/limb
-C P5: 3.375
-C P6: 3.125
-C K6: 3.5
-C K7: 2.25
-C P4: 8.75
+C P5 3.375
+C P6 3.125
+C K6 3.5
+C K7 2.25
+C P4 8.75
ifdef(`OPERATION_add_n',`
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/aorsmul_1.asm
--- a/mpn/x86/aorsmul_1.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/aorsmul_1.asm Sun Feb 20 15:40:56 2011 +0100
@@ -22,19 +22,19 @@
include(`../config.m4')
-C cycles/limb
-C P5: 14.75
-C P6 model 0-8,10-12) 7.5
+C cycles/limb
+C P5 14.75
+C P6 model 0-8,10-12 7.5
C P6 model 9 (Banias) 6.7
-C P6 model 13 (Dothan) 6.75
-C P4 model 0 (Willamette) 24.0
-C P4 model 1 (?) 24.0
-C P4 model 2 (Northwood) 24.0
+C P6 model 13 (Dothan) 6.75
+C P4 model 0 (Willamette) 24.0
+C P4 model 1 (?) 24.0
+C P4 model 2 (Northwood) 24.0
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 12.5
-C K7: 5.25
-C K8:
+C AMD K6 12.5
+C AMD K7 5.25
+C AMD K8
ifdef(`OPERATION_addmul_1',`
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/atom/aorsmul_1.asm
--- a/mpn/x86/atom/aorsmul_1.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/atom/aorsmul_1.asm Sun Feb 20 15:40:56 2011 +0100
@@ -20,19 +20,19 @@
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12) 6.35
+C cycles/limb
+C P5
+C P6 model 0-8,10-12 6.35
C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 6.25
+C P6 model 13 (Dothan) 6.25
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6:
-C K7: 3.9
-C K8:
+C AMD K6
+C AMD K7 3.9
+C AMD K8
dnl K7: UNROLL_COUNT cycles/limb
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/bdiv_dbm1c.asm
--- a/mpn/x86/bdiv_dbm1c.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/bdiv_dbm1c.asm Sun Feb 20 15:40:56 2011 +0100
@@ -1,6 +1,6 @@
dnl x86 mpn_bdiv_dbm1.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -19,14 +19,22 @@
include(`../config.m4')
-C cycles/limb
-C K7: 3.5
-C P4 m0: ?
-C P4 m1: ?
-C P4 m2: 13.67
-C P4 m3: ?
-C P4 m4: ?
-C P6-13: 5.1
+C cycles/limb
+C P5
+C P6 model 0-8,10-12)
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan) 5.1
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood) 13.67
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom
+C AMD K6
+C AMD K7 3.5
+C AMD K8
+C AMD K10
+
C TODO
C * Optimize for more x86 processors
@@ -57,18 +65,17 @@
cmp $2, %eax
jc L(b1)
jz L(b2)
- jmp L(b3)
+
+L(b3): lea -8(%esi), %esi
+ lea 8(%edi), %edi
+ add $-3, %ebp
+ jmp L(3)
L(b0): mov 4(%esi), %eax
lea -4(%esi), %esi
lea 12(%edi), %edi
add $-4, %ebp
jmp L(0)
-L(b3):
- lea -8(%esi), %esi
- lea 8(%edi), %edi
- add $-3, %ebp
- jmp L(3)
L(b2): mov 4(%esi), %eax
lea 4(%esi), %esi
@@ -77,8 +84,7 @@
jmp L(2)
ALIGN(8)
-L(top):
- mov 4(%esi), %eax
+L(top): mov 4(%esi), %eax
mul %ecx
lea 16(%edi), %edi
sub %eax, %ebx
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/copyd.asm
--- a/mpn/x86/copyd.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/copyd.asm Sun Feb 20 15:40:56 2011 +0100
@@ -21,11 +21,11 @@
C cycles/limb startup (approx)
-C P5: 1.0 40
-C P6 2.4 70
-C K6 1.0 55
-C K7: 1.3 75
-C P4: 2.6 175
+C P5 1.0 40
+C P6 2.4 70
+C K6 1.0 55
+C K7 1.3 75
+C P4 2.6 175
C
C (Startup time includes some function call overheads.)
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/copyi.asm
--- a/mpn/x86/copyi.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/copyi.asm Sun Feb 20 15:40:56 2011 +0100
@@ -21,11 +21,11 @@
C cycles/limb startup (approx)
-C P5: 1.0 35
-C P6 0.75 45
-C K6 1.0 30
-C K7: 1.3 65
-C P4: 1.0 120
+C P5 1.0 35
+C P6 0.75 45
+C K6 1.0 30
+C K7 1.3 65
+C P4 1.0 120
C
C (Startup time includes some function call overheads.)
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k6/aorsmul_1.asm
--- a/mpn/x86/k6/aorsmul_1.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/k6/aorsmul_1.asm Sun Feb 20 15:40:56 2011 +0100
@@ -21,19 +21,19 @@
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12) 5.94
+C cycles/limb
+C P5
+C P6 model 0-8,10-12 5.94
C P6 model 9 (Banias) 5.51
-C P6 model 13 (Dothan) 5.57
+C P6 model 13 (Dothan) 5.57
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 7.65-8.5 (data dependent)
-C K7:
-C K8:
+C AMD K6 7.65-8.5 (data dependent)
+C AMD K7
+C AMD K8
dnl K6: large multipliers small multipliers
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k6/mul_1.asm
--- a/mpn/x86/k6/mul_1.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/k6/mul_1.asm Sun Feb 20 15:40:56 2011 +0100
@@ -20,19 +20,19 @@
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12) 5.5
+C cycles/limb
+C P5
+C P6 model 0-8,10-12 5.5
C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 4.87
+C P6 model 13 (Dothan) 4.87
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 6.25
-C K7:
-C K8:
+C AMD K6 6.25
+C AMD K7
+C AMD K8
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k7/addlsh1_n.asm
--- a/mpn/x86/k7/addlsh1_n.asm Sun Feb 20 10:59:12 2011 +0100
+++ b/mpn/x86/k7/addlsh1_n.asm Sun Feb 20 15:40:56 2011 +0100
@@ -26,19 +26,19 @@
C registers. It seems tricky to use the same structure for rsblsh1_n, since we
C cannot feed carry between operations there.
-C cycles/limb
+C cycles/limb
C P5
-C P6 model 0-8,10-12)
+C P6 model 0-8,10-12
C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 5.4 (worse than add_n + lshift)
+C P6 model 13 (Dothan) 5.4 (worse than add_n + lshift)
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
C Intel Atom 6
-C AMD K6 ?
-C AMD K7 2.5
+C AMD K6 ?
+C AMD K7 2.5
C AMD K8
C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32
diff -r 153ddf846890 -r 3c0dbda7f526 mpn/x86/k7/aorsmul_1.asm
--- a/mpn/x86/k7/aorsmul_1.asm Sun Feb 20 10:59:12 2011 +0100
More information about the gmp-commit
mailing list