[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Oct 18 00:14:15 CEST 2011
details: /var/hg/gmp/rev/a8caf40f4e45
changeset: 14355:a8caf40f4e45
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Oct 18 00:13:32 2011 +0200
description:
Retune.
details: /var/hg/gmp/rev/082efa54e608
changeset: 14356:082efa54e608
user: Torbjorn Granlund <tege at gmplib.org>
date: Tue Oct 18 00:14:11 2011 +0200
description:
Add cycle tables.
diffstat:
mpn/s390_64/add_n.asm | 6 +++++-
mpn/s390_64/addmul_1.asm | 7 +++++++
mpn/s390_64/aorslsh1_n.asm | 6 +++++-
mpn/s390_64/bdiv_dbm1c.asm | 6 +++++-
mpn/s390_64/copyd.asm | 7 ++++++-
mpn/s390_64/copyi.asm | 4 ++++
mpn/s390_64/gmp-mparam.h | 16 ++++++++--------
mpn/s390_64/invert_limb.asm | 6 +++++-
mpn/s390_64/lshift.asm | 6 +++++-
mpn/s390_64/mod_34lsub1.asm | 4 ++++
mpn/s390_64/mul_1.asm | 7 +++++++
mpn/s390_64/rshift.asm | 6 +++++-
mpn/s390_64/sqr_diag_addlsh1.asm | 6 +++++-
mpn/s390_64/sub_n.asm | 6 +++++-
mpn/s390_64/submul_1.asm | 7 +++++++
15 files changed, 83 insertions(+), 17 deletions(-)
diffs (264 lines):
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/add_n.asm
--- a/mpn/s390_64/add_n.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/add_n.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 3.5
+C z900 6.5
+C z990 3.5
+C z9 ?
+C z10 ?
+C z196 ?
C TODO
C * Optimise for small n
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/addmul_1.asm
--- a/mpn/s390_64/addmul_1.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/addmul_1.asm Tue Oct 18 00:14:11 2011 +0200
@@ -19,6 +19,13 @@
include(`../config.m4')
+C cycles/limb
+C z900 34
+C z990 23
+C z9 ?
+C z10 ?
+C z196 ?
+
C INPUT PARAMETERS
define(`rp', `%r2')
define(`up', `%r3')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/aorslsh1_n.asm
--- a/mpn/s390_64/aorslsh1_n.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/aorslsh1_n.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 5
+C z900 10
+C z990 5
+C z9 ?
+C z10 ?
+C z196 ?
C TODO
C * Optimise for small n
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/bdiv_dbm1c.asm
--- a/mpn/s390_64/bdiv_dbm1c.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/bdiv_dbm1c.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 23
+C z900 29
+C z990 23
+C z9 ?
+C z10 ?
+C z196 ?
C INPUT PARAMETERS
define(`qp', `%r2')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/copyd.asm
--- a/mpn/s390_64/copyd.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/copyd.asm Tue Oct 18 00:14:11 2011 +0200
@@ -21,12 +21,17 @@
include(`../config.m4')
C cycles/limb
+C z900 2.67
C z990 1.5
+C z9 ?
+C z10 ?
+C z196 ?
C FIXME:
C * Avoid saving/restoring callee-saves registers for n < 3. This could be
C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
-C We coould then use r3...r10 in main loop.
+C We could then use r3...r10 in main loop.
+C * Could we use some EX trick, modifying lmg/stmg, for the feed-in code?
C INPUT PARAMETERS
define(`rp_param', `%r2')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/copyi.asm
--- a/mpn/s390_64/copyi.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/copyi.asm Tue Oct 18 00:14:11 2011 +0200
@@ -21,7 +21,11 @@
include(`../config.m4')
C cycles/limb
+C z900 1.25
C z990 0.75
+C z9 ?
+C z10 ?
+C z196 ?
C NOTE
C * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/gmp-mparam.h
--- a/mpn/s390_64/gmp-mparam.h Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/gmp-mparam.h Tue Oct 18 00:14:11 2011 +0200
@@ -40,21 +40,21 @@
#define MUL_TOOM22_THRESHOLD 14
#define MUL_TOOM33_THRESHOLD 45
-#define MUL_TOOM44_THRESHOLD 106
-#define MUL_TOOM6H_THRESHOLD 148
+#define MUL_TOOM44_THRESHOLD 96
+#define MUL_TOOM6H_THRESHOLD 143
#define MUL_TOOM8H_THRESHOLD 212
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 57
#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 49
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56
-#define SQR_BASECASE_THRESHOLD 5
-#define SQR_TOOM2_THRESHOLD 24
+#define SQR_BASECASE_THRESHOLD 0
+#define SQR_TOOM2_THRESHOLD 22
#define SQR_TOOM3_THRESHOLD 65
#define SQR_TOOM4_THRESHOLD 112
-#define SQR_TOOM6_THRESHOLD 159
-#define SQR_TOOM8_THRESHOLD 238
+#define SQR_TOOM6_THRESHOLD 156
+#define SQR_TOOM8_THRESHOLD 224
#define MULMID_TOOM42_THRESHOLD 20
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/invert_limb.asm
--- a/mpn/s390_64/invert_limb.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/invert_limb.asm Tue Oct 18 00:14:11 2011 +0200
@@ -22,7 +22,11 @@
include(`../config.m4')
C cycles/limb
-C z990 92
+C z900 142
+C z990 88
+C z9 ?
+C z10 ?
+C z196 ?
ASM_START()
TEXT
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/lshift.asm
--- a/mpn/s390_64/lshift.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/lshift.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 4.3
+C z900 7.25
+C z990 4.25
+C z9 ?
+C z10 ?
+C z196 ?
C FIXME
C * Streamline feed-in code.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/mod_34lsub1.asm
--- a/mpn/s390_64/mod_34lsub1.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/mod_34lsub1.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
+C z900 5.8
C z990 2
+C z9 ?
+C z10 ?
+C z196 ?
C TODO
C * Optimise summation code, see x86_64.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/mul_1.asm
--- a/mpn/s390_64/mul_1.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/mul_1.asm Tue Oct 18 00:14:11 2011 +0200
@@ -19,6 +19,13 @@
include(`../config.m4')
+C cycles/limb
+C z900 29
+C z990 22
+C z9 ?
+C z10 ?
+C z196 ?
+
C INPUT PARAMETERS
define(`rp', `%r2')
define(`up', `%r3')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/rshift.asm
--- a/mpn/s390_64/rshift.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/rshift.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 4.3
+C z900 7.25
+C z990 4.25
+C z9 ?
+C z10 ?
+C z196 ?
C FIXME
C * Streamline feed-in code.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/sqr_diag_addlsh1.asm
--- a/mpn/s390_64/sqr_diag_addlsh1.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/sqr_diag_addlsh1.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 19
+C z900 24.5
+C z990 18.5
+C z9 ?
+C z10 ?
+C z196 ?
C INPUT PARAMETERS
define(`rp', `%r2')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/sub_n.asm
--- a/mpn/s390_64/sub_n.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/sub_n.asm Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
include(`../config.m4')
C cycles/limb
-C z990 3.5
+C z900 6.5
+C z990 3.5
+C z9 ?
+C z10 ?
+C z196 ?
C TODO
C * Optimise for small n
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/submul_1.asm
--- a/mpn/s390_64/submul_1.asm Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/submul_1.asm Tue Oct 18 00:14:11 2011 +0200
@@ -19,6 +19,13 @@
include(`../config.m4')
+C cycles/limb
+C z900 35
+C z990 24
+C z9 ?
+C z10 ?
+C z196 ?
+
C INPUT PARAMETERS
define(`rp', `%r2')
define(`up', `%r3')
More information about the gmp-commit
mailing list