[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Fri Nov 30 16:35:21 UTC 2018


details:   /var/hg/gmp/rev/d0e7d3489be4
changeset: 17718:d0e7d3489be4
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Nov 30 17:33:57 2018 +0100
description:
* mpn/powerpc64/mode64/p9/add_n_sub_n.asm: New file.

details:   /var/hg/gmp/rev/ae2950bd55e4
changeset: 17719:ae2950bd55e4
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Nov 30 17:35:04 2018 +0100
description:
New file.

details:   /var/hg/gmp/rev/ba8b7b6c9b1a
changeset: 17720:ba8b7b6c9b1a
user:      Torbjorn Granlund <tg at gmplib.org>
date:      Fri Nov 30 17:35:17 2018 +0100
description:
ChangeLog

diffstat:

 ChangeLog                               |   17 +++
 mpn/powerpc64/mode64/p9/add_n_sub_n.asm |  112 +++++++++++++++++++++
 mpn/powerpc64/mode64/p9/gmp-mparam.h    |  166 ++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 0 deletions(-)

diffs (truncated from 310 to 300 lines):

diff -r 80949837f4c1 -r ba8b7b6c9b1a ChangeLog
--- a/ChangeLog	Wed Nov 28 23:18:30 2018 +0100
+++ b/ChangeLog	Fri Nov 30 17:35:17 2018 +0100
@@ -1,3 +1,20 @@
+2018-11-30  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/powerpc64/mode64/p9/gmp-mparam.h: New file.
+
+	* mpn/powerpc64/mode64/p9/add_n_sub_n.asm: New file.
+
+2018-11-28  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/powerpc64/mode64/p9/sqr_basecase.asm: New file.
+	* mpn/powerpc64/mode64/p9/mul_1.asm: New file.
+
+2018-11-18  Torbjörn Granlund  <tg at gmplib.org>
+
+	* mpn/powerpc64/mode64/p9/mul_basecase.asm: New file.
+
+	* mpn/powerpc64/mode64/p9/addmul_1.asm: Optimise.
+
 2018-11-12  Torbjörn Granlund  <tg at gmplib.org>
 
 	* mpn/powerpc64/mode64/p9/aorsmul_1.asm: New file, providing fast
diff -r 80949837f4c1 -r ba8b7b6c9b1a mpn/powerpc64/mode64/p9/add_n_sub_n.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/mode64/p9/add_n_sub_n.asm	Fri Nov 30 17:35:17 2018 +0100
@@ -0,0 +1,112 @@
+dnl  PowerPC-64 mpn_add_n_sub_n optimised for POWER9.
+
+dnl  Copyright 2018 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C POWER3/PPC630		 -
+C POWER4/PPC970		 -
+C POWER5		 -
+C POWER6		 -
+C POWER7		 -
+C POWER8		 -
+C POWER9		 2.25
+
+
+C INPUT PARAMETERS
+define(`arp',	`r3')
+define(`srp',	`r4')
+define(`up',	`r5')
+define(`vp',	`r6')
+define(`n',	`r7')
+
+ASM_START()
+PROLOGUE(mpn_add_n_sub_n)
+	cmpdi	cr7, n, 2
+	subfo	r0, r0, r0		C clear OV
+	rldicl.	r9, n, 0, 63		C n & 1
+	beq	cr0, L(bx0)
+
+L(bx1):	ld	r10, 0(up)
+	ld	r11, 0(vp)
+	ble	cr7, L(1)
+	srdi	r7, r7, 1
+	mtctr	r7
+	ld	r8, 8(up)
+	ld	r9, 8(vp)
+	addex(	r0, r10, r11, 0)
+	subfc	r12, r11, r10
+	addi	up, up, -8
+	addi	vp, vp, -8
+	b	L(lo1)
+
+L(bx0):	ld	r8, 0(up)
+	ld	r9, 0(vp)
+	ld	r10, 8(up)
+	ld	r11, 8(vp)
+	addex(	r0, r8, r9, 0)
+	subfc	r12, r9, r8
+	addi	arp, arp, 8
+	addi	srp, srp, 8
+	ble	cr7, L(end)
+	addi	r7, r7, -1
+	srdi	r7, r7, 1
+	mtctr	r7
+
+L(top):	ld	r8, 16(up)
+	ld	r9, 16(vp)
+	std	r0, -8(arp)
+	std	r12, -8(srp)
+	addex(	r0, r10, r11, 0)
+	subfe	r12, r11, r10
+L(lo1):	ld	r10, 24(up)
+	ld	r11, 24(vp)
+	std	r0, 0(arp)
+	std	r12, 0(srp)
+	addex(	r0, r8, r9, 0)
+	subfe	r12, r9, r8
+	addi	up, up, 16
+	addi	vp, vp, 16
+	addi	arp, arp, 16
+	addi	srp, srp, 16
+	bdnz	L(top)
+	
+L(end):	std	r0, -8(arp)
+	std	r12, -8(srp)
+L(1):	addex(	r0, r10, r11, 0)
+	subfe	r12, r11, r10
+	std	r0, 0(arp)
+	std	r12, 0(srp)
+	subfe	r3, r3, r3
+	addex(	r3, r3, r3, 0)
+	rldicl	r3, r3, 1, 62
+	blr
+EPILOGUE()
+ASM_END()
diff -r 80949837f4c1 -r ba8b7b6c9b1a mpn/powerpc64/mode64/p9/gmp-mparam.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/powerpc64/mode64/p9/gmp-mparam.h	Fri Nov 30 17:35:17 2018 +0100
@@ -0,0 +1,166 @@
+/* POWER9 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2018 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 3700MHz POWER9 */
+
+/* Generated by tuneup.c, 2018-11-30, gcc 4.8 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        20
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
+#define USE_PREINV_DIVREM_1                  0
+#define DIV_QR_1N_PI1_METHOD                 2
+#define DIV_QR_1_NORM_THRESHOLD              3
+#define DIV_QR_1_UNNORM_THRESHOLD            2
+#define DIV_QR_2_PI2_THRESHOLD              13
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           21
+
+#define DIV_1_VS_MUL_1_PERCENT             359
+
+#define MUL_TOOM22_THRESHOLD                34
+#define MUL_TOOM33_THRESHOLD               105
+#define MUL_TOOM44_THRESHOLD               450
+#define MUL_TOOM6H_THRESHOLD               517
+#define MUL_TOOM8H_THRESHOLD               608
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     114
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     279
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     204
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     211
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     185
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 46
+#define SQR_TOOM3_THRESHOLD                158
+#define SQR_TOOM4_THRESHOLD                686
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                866
+
+#define MULMID_TOOM42_THRESHOLD             66
+
+#define MULMOD_BNM1_THRESHOLD               19
+#define SQRMOD_BNM1_THRESHOLD               26
+
+#define MUL_FFT_MODF_THRESHOLD             530  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    530, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     27, 7}, {     14, 6}, {     29, 7}, {     15, 6}, \
+    {     31, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     36, 8}, \
+    {     19, 7}, {     39, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     49, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    143,10}, {    287,11}, {    159,12}, {     95,11}, \
+    {    191,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 68
+#define MUL_FFT_THRESHOLD                 5248
+
+#define SQR_FFT_MODF_THRESHOLD             496  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    496, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
+    {     13, 5}, {     27, 6}, {     29, 7}, {     15, 6}, \
+    {     31, 7}, {     16, 6}, {     33, 7}, {     28, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     36, 8}, \
+    {     19, 7}, {     39, 8}, {     29, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    159,11}, {     95,12}, {     63,11}, {    127,10}, \
+    {    255,11}, {    143,10}, {    287,11}, {    159,12}, \
+    {     95,11}, {    191,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 69
+#define SQR_FFT_THRESHOLD                 4224
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  40
+#define MULLO_MUL_N_THRESHOLD            10323
+#define SQRLO_BASECASE_THRESHOLD             6
+#define SQRLO_DC_THRESHOLD                  40
+#define SQRLO_SQR_THRESHOLD               8397
+
+#define DC_DIV_QR_THRESHOLD                 30
+#define DC_DIVAPPR_Q_THRESHOLD              91
+#define DC_BDIV_QR_THRESHOLD                36
+#define DC_BDIV_Q_THRESHOLD                 63
+
+#define INV_MULMOD_BNM1_THRESHOLD           78
+#define INV_NEWTON_THRESHOLD                11
+#define INV_APPR_THRESHOLD                  13
+
+#define BINV_NEWTON_THRESHOLD              272
+#define REDC_1_TO_REDC_2_THRESHOLD           8
+#define REDC_2_TO_REDC_N_THRESHOLD          79
+
+#define MU_DIV_QR_THRESHOLD               1589
+#define MU_DIVAPPR_Q_THRESHOLD            1589
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD              1652
+#define MU_BDIV_Q_THRESHOLD               2089
+
+#define POWM_SEC_TABLE  3,16,139,852
+
+#define GET_STR_DC_THRESHOLD                 9
+#define GET_STR_PRECOMPUTE_THRESHOLD        17
+#define SET_STR_DC_THRESHOLD               406
+#define SET_STR_PRECOMPUTE_THRESHOLD       956
+


More information about the gmp-commit mailing list