[Gmp-commit] /var/hg/gmp: 2 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Oct 18 00:14:15 CEST 2011


details:   /var/hg/gmp/rev/a8caf40f4e45
changeset: 14355:a8caf40f4e45
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Oct 18 00:13:32 2011 +0200
description:
Retune.

details:   /var/hg/gmp/rev/082efa54e608
changeset: 14356:082efa54e608
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Oct 18 00:14:11 2011 +0200
description:
Add cycle tables.

diffstat:

 mpn/s390_64/add_n.asm            |   6 +++++-
 mpn/s390_64/addmul_1.asm         |   7 +++++++
 mpn/s390_64/aorslsh1_n.asm       |   6 +++++-
 mpn/s390_64/bdiv_dbm1c.asm       |   6 +++++-
 mpn/s390_64/copyd.asm            |   7 ++++++-
 mpn/s390_64/copyi.asm            |   4 ++++
 mpn/s390_64/gmp-mparam.h         |  16 ++++++++--------
 mpn/s390_64/invert_limb.asm      |   6 +++++-
 mpn/s390_64/lshift.asm           |   6 +++++-
 mpn/s390_64/mod_34lsub1.asm      |   4 ++++
 mpn/s390_64/mul_1.asm            |   7 +++++++
 mpn/s390_64/rshift.asm           |   6 +++++-
 mpn/s390_64/sqr_diag_addlsh1.asm |   6 +++++-
 mpn/s390_64/sub_n.asm            |   6 +++++-
 mpn/s390_64/submul_1.asm         |   7 +++++++
 15 files changed, 83 insertions(+), 17 deletions(-)

diffs (264 lines):

diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/add_n.asm
--- a/mpn/s390_64/add_n.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/add_n.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           3.5
+C z900		 6.5
+C z990		 3.5
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C TODO
 C  * Optimise for small n
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/addmul_1.asm
--- a/mpn/s390_64/addmul_1.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/addmul_1.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -19,6 +19,13 @@
 
 include(`../config.m4')
 
+C            cycles/limb
+C z900		34
+C z990		23
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
 C INPUT PARAMETERS
 define(`rp',	`%r2')
 define(`up',	`%r3')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/aorslsh1_n.asm
--- a/mpn/s390_64/aorslsh1_n.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/aorslsh1_n.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           5
+C z900		10
+C z990		 5
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C TODO
 C  * Optimise for small n
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/bdiv_dbm1c.asm
--- a/mpn/s390_64/bdiv_dbm1c.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/bdiv_dbm1c.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990          23
+C z900		29
+C z990		23
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C INPUT PARAMETERS
 define(`qp',	  `%r2')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/copyd.asm
--- a/mpn/s390_64/copyd.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/copyd.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -21,12 +21,17 @@
 include(`../config.m4')
 
 C            cycles/limb
+C z900		 2.67
 C z990           1.5
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C FIXME:
 C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
 C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
-C    We coould then use r3...r10 in main loop.
+C    We could then use r3...r10 in main loop.
+C  * Could we use some EX trick, modifying lmg/stmg, for the feed-in code?
 
 C INPUT PARAMETERS
 define(`rp_param',	`%r2')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/copyi.asm
--- a/mpn/s390_64/copyi.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/copyi.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -21,7 +21,11 @@
 include(`../config.m4')
 
 C            cycles/limb
+C z900		 1.25
 C z990           0.75
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C NOTE
 C  * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/gmp-mparam.h
--- a/mpn/s390_64/gmp-mparam.h	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/gmp-mparam.h	Tue Oct 18 00:14:11 2011 +0200
@@ -40,21 +40,21 @@
 
 #define MUL_TOOM22_THRESHOLD                14
 #define MUL_TOOM33_THRESHOLD                45
-#define MUL_TOOM44_THRESHOLD               106
-#define MUL_TOOM6H_THRESHOLD               148
+#define MUL_TOOM44_THRESHOLD                96
+#define MUL_TOOM6H_THRESHOLD               143
 #define MUL_TOOM8H_THRESHOLD               212
 
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      57
 #define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
 #define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      49
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
 
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 24
+#define SQR_BASECASE_THRESHOLD               0
+#define SQR_TOOM2_THRESHOLD                 22
 #define SQR_TOOM3_THRESHOLD                 65
 #define SQR_TOOM4_THRESHOLD                112
-#define SQR_TOOM6_THRESHOLD                159
-#define SQR_TOOM8_THRESHOLD                238
+#define SQR_TOOM6_THRESHOLD                156
+#define SQR_TOOM8_THRESHOLD                224
 
 #define MULMID_TOOM42_THRESHOLD             20
 
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/invert_limb.asm
--- a/mpn/s390_64/invert_limb.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/invert_limb.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -22,7 +22,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           92
+C z900	       142
+C z990          88
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 ASM_START()
 	TEXT
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/lshift.asm
--- a/mpn/s390_64/lshift.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/lshift.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           4.3
+C z900		 7.25
+C z990		 4.25
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C FIXME
 C  * Streamline feed-in code.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/mod_34lsub1.asm
--- a/mpn/s390_64/mod_34lsub1.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/mod_34lsub1.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
+C z900		 5.8
 C z990           2
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C TODO
 C  * Optimise summation code, see x86_64.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/mul_1.asm
--- a/mpn/s390_64/mul_1.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/mul_1.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -19,6 +19,13 @@
 
 include(`../config.m4')
 
+C            cycles/limb
+C z900		29
+C z990		22
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
 C INPUT PARAMETERS
 define(`rp',	`%r2')
 define(`up',	`%r3')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/rshift.asm
--- a/mpn/s390_64/rshift.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/rshift.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           4.3
+C z900		 7.25
+C z990		 4.25
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C FIXME
 C  * Streamline feed-in code.
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/sqr_diag_addlsh1.asm
--- a/mpn/s390_64/sqr_diag_addlsh1.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/sqr_diag_addlsh1.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           19
+C z900		 24.5
+C z990           18.5
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C INPUT PARAMETERS
 define(`rp',	`%r2')
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/sub_n.asm
--- a/mpn/s390_64/sub_n.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/sub_n.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -20,7 +20,11 @@
 include(`../config.m4')
 
 C            cycles/limb
-C z990           3.5
+C z900		 6.5
+C z990		 3.5
+C z9		 ?
+C z10		 ?
+C z196		 ?
 
 C TODO
 C  * Optimise for small n
diff -r 3f02dac43987 -r 082efa54e608 mpn/s390_64/submul_1.asm
--- a/mpn/s390_64/submul_1.asm	Mon Oct 17 00:34:27 2011 +0200
+++ b/mpn/s390_64/submul_1.asm	Tue Oct 18 00:14:11 2011 +0200
@@ -19,6 +19,13 @@
 
 include(`../config.m4')
 
+C            cycles/limb
+C z900		35
+C z990		24
+C z9		 ?
+C z10		 ?
+C z196		 ?
+
 C INPUT PARAMETERS
 define(`rp',	`%r2')
 define(`up',	`%r3')


More information about the gmp-commit mailing list