[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Aug 19 19:26:30 CEST 2023
details: /var/hg/gmp/rev/8c07c1f27b2c
changeset: 18431:8c07c1f27b2c
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Aug 19 18:25:02 2023 +0200
description:
Unroll top-level s390_64 mul_1, addmul_1, submul_1.
details: /var/hg/gmp/rev/d7265d0824d3
changeset: 18432:d7265d0824d3
user: Torbjorn Granlund <tg at gmplib.org>
date: Sat Aug 19 19:26:25 2023 +0200
description:
Provide z15-specific copyd.
diffstat:
mpn/s390_64/addmul_1.asm | 118 ++++++++++++++++++++++++++++++++++--------
mpn/s390_64/mul_1.asm | 92 ++++++++++++++++++++++++++-------
mpn/s390_64/submul_1.asm | 127 +++++++++++++++++++++++++++++++++++++--------
mpn/s390_64/z15/copyd.asm | 77 +++++++++++++++++++++++++++
4 files changed, 348 insertions(+), 66 deletions(-)
diffs (truncated from 506 to 300 lines):
diff -r bd0bd2059652 -r d7265d0824d3 mpn/s390_64/addmul_1.asm
--- a/mpn/s390_64/addmul_1.asm Sun Aug 13 22:08:05 2023 +0200
+++ b/mpn/s390_64/addmul_1.asm Sat Aug 19 19:26:25 2023 +0200
@@ -1,6 +1,6 @@
dnl S/390-64 mpn_addmul_1
-dnl Copyright 2011 Free Software Foundation, Inc.
+dnl Copyright 2023 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -30,43 +30,115 @@
include(`../config.m4')
+C TODO
+C * Delay saving of registers to handle n < 2 faster.
+
C cycles/limb
-C z900 34
-C z990 23
+C z900 ?
+C z990 ?
C z9 ?
-C z10 28
+C z10 ?
C z196 ?
+C z12 ?
+C z13 ?
+C z14 ?
+C z15 4.5
-C INPUT PARAMETERS
define(`rp', `%r2')
define(`up', `%r3')
define(`n', `%r4')
define(`v0', `%r5')
-define(`z', `%r9')
+define(`idx', `%r10')
+define(`cy', `%r11')
ASM_START()
+PROLOGUE(mpn_addmul_1c)
+ stmg %r6, %r13, 48(%r15)
+ lgr cy, %r6
+ j L(ent)
+EPILOGUE()
PROLOGUE(mpn_addmul_1)
- stmg %r9, %r12, 72(%r15)
- lghi %r12, 0 C zero index register
- aghi %r12, 0 C clear carry flag
- lghi %r11, 0 C clear carry limb
- lghi z, 0 C keep register zero
+ stmg %r6, %r13, 48(%r15)
+ lghi cy, 0
+L(ent): tmll n, 1
+ la n, 3(n)
+ je L(bx0)
+L(bx1): tmll n, 2
+ srlg n, n, 2
+ je L(b01)
+L(b11): lg %r7, 0(up)
+ mlgr %r6, v0
+ lg %r9, 8(up)
+ lg %r13, 16(up)
+ mlgr %r8, v0
+ mlgr %r12, v0
+ algr %r7, cy
+ alcgr %r9, %r6
+ lghi cy, 0
+ alcgr %r8, %r13
+ alcgr cy, %r12
+ alg %r7, 0(rp)
+ stg %r7, 0(rp)
+ lghi idx, -8
+ j L(m3)
+
+L(b01): lg %r9, 0(up)
+ mlgr %r8, v0
+ algr %r9, cy
+ lghi cy, 0
+ alcgr cy, %r8
+ alg %r9, 0(rp)
+ stg %r9, 0(rp)
+ lghi idx, 8
+ brctg n, L(top)
+ j L(end)
-L(top): lg %r1, 0(%r12,up)
- lg %r10, 0(%r12,rp)
+L(bx0): tmll n, 2
+ lghi idx, 0
+ srlg n, n, 2
+ jne L(b00)
+L(b10): lg %r9, 0(up)
+ lg %r13, 8(up)
+ mlgr %r8, v0
+ mlgr %r12, v0
+ algr %r9, cy
+ lghi cy, 0
+ alcgr %r8, %r13
+ alcgr cy, %r12
+ alg %r9, 0(rp)
+ lghi idx, -16
+ j L(m2)
+L(b00): aghi cy, 0 C clear CF
+
+L(top): lg %r1, 0(idx,up)
+ lg %r7, 8(idx,up)
mlgr %r0, v0
- alcgr %r1, %r10
- alcgr %r0, z
- algr %r1, %r11
- lgr %r11, %r0
- stg %r1, 0(%r12,rp)
- la %r12, 8(%r12)
+ mlgr %r6, v0
+ lg %r9, 16(idx,up)
+ lg %r13, 24(idx,up)
+ mlgr %r8, v0
+ mlgr %r12, v0
+ alcgr %r1, cy
+ alcgr %r0, %r7
+ alcgr %r9, %r6
+ lghi cy, 0
+ alcgr %r8, %r13
+ alcgr cy, %r12
+ alg %r1, 0(idx,rp)
+ alcg %r0, 8(idx,rp)
+ stg %r1, 0(idx,rp)
+ stg %r0, 8(idx,rp)
+L(m3): alcg %r9, 16(idx,rp)
+L(m2): alcg %r8, 24(idx,rp)
+ stg %r9, 16(idx,rp)
+ stg %r8, 24(idx,rp)
+ la idx, 32(idx)
brctg n, L(top)
- lghi %r2, 0
- alcgr %r2, %r11
-
- lmg %r9, %r12, 72(%r15)
+L(end): lghi %r2, 0
+ alcgr %r2, cy
+ lmg %r6, %r13, 48(%r15)
br %r14
EPILOGUE()
+ .section .note.GNU-stack
diff -r bd0bd2059652 -r d7265d0824d3 mpn/s390_64/mul_1.asm
--- a/mpn/s390_64/mul_1.asm Sun Aug 13 22:08:05 2023 +0200
+++ b/mpn/s390_64/mul_1.asm Sat Aug 19 19:26:25 2023 +0200
@@ -1,6 +1,6 @@
dnl S/390-64 mpn_mul_1
-dnl Copyright 2011 Free Software Foundation, Inc.
+dnl Copyright 2023 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -30,37 +30,89 @@
include(`../config.m4')
+C TODO
+C * Delay saving of registers to handle n < 2 faster.
+
C cycles/limb
-C z900 29
-C z990 22
+C z900 ?
+C z990 ?
C z9 ?
-C z10 20
+C z10 ?
C z196 ?
+C z12 ?
+C z13 ?
+C z14 ?
+C z15 3.5
-C INPUT PARAMETERS
define(`rp', `%r2')
define(`up', `%r3')
define(`n', `%r4')
define(`v0', `%r5')
-ASM_START()
-PROLOGUE(mpn_mul_1)
- stmg %r11, %r12, 88(%r15)
- lghi %r12, 0 C zero index register
- aghi %r12, 0 C clear carry flag
- lghi %r11, 0 C clear carry limb
+define(`idx', `%r10')
-L(top): lg %r1, 0(%r12,up)
+ASM_START()
+PROLOGUE(mpn_mul_1c)
+ stmg %r6, %r10, 48(%r15)
+ lgr %r8, %r6
+ j L(ent)
+EPILOGUE()
+PROLOGUE(mpn_mul_1)
+ stmg %r6, %r10, 48(%r15)
+ lghi %r8, 0 C clear carry limb
+ lghi %r6, 0 C clear carry limb
+L(ent): tmll n, 1
+ la n, 3(n)
+ je L(bx0)
+L(bx1): tmll n, 2
+ srlg n, n, 2
+ je L(b01)
+L(b11): lg %r7, 0(up)
+ mlgr %r6, v0
+ algr %r7, %r8
+ stg %r7, 0(rp)
+ lghi idx, -8
+ j L(mid)
+L(b01): lg %r9, 0(up)
+ mlgr %r8, v0
+ algr %r9, %r6
+ lghi %r6, 0
+ alcgr %r8, %r6
+ stg %r9, 0(rp)
+ lghi idx, 8
+ brctg n, L(top)
+ j L(end)
+L(bx0): tmll n, 2
+ srlg n, n, 2
+ jne L(b00)
+L(b10): lghi idx, -16
+C aghi %r8, 0 C clear CF
+ j L(mid)
+L(b00): aghi %r8, 0 C clear CF
+ lghi idx, 0
+
+L(top): lg %r1, 0(idx,up)
+ lg %r7, 8(idx,up)
mlgr %r0, v0
- alcgr %r1, %r11
- lgr %r11, %r0 C copy high part to carry limb
- stg %r1, 0(%r12,rp)
- la %r12, 8(%r12)
+ mlgr %r6, v0
+ alcgr %r1, %r8
+ alcgr %r0, %r7
+ stg %r1, 0(idx,rp)
+ stg %r0, 8(idx,rp)
+L(mid): lg %r1, 16(idx,up)
+ lg %r9, 24(idx,up)
+ mlgr %r0, v0
+ mlgr %r8, v0
+ alcgr %r1, %r6
+ alcgr %r0, %r9
+ stg %r1, 16(idx,rp)
+ stg %r0, 24(idx,rp)
+ la idx, 32(idx)
brctg n, L(top)
- lghi %r2, 0
- alcgr %r2, %r11
-
- lmg %r11, %r12, 88(%r15)
+L(end): lghi %r2, 0
+ alcgr %r2, %r8
+ lmg %r6, %r10, 48(%r15)
br %r14
EPILOGUE()
+ .section .note.GNU-stack
diff -r bd0bd2059652 -r d7265d0824d3 mpn/s390_64/submul_1.asm
--- a/mpn/s390_64/submul_1.asm Sun Aug 13 22:08:05 2023 +0200
+++ b/mpn/s390_64/submul_1.asm Sat Aug 19 19:26:25 2023 +0200
@@ -1,6 +1,6 @@
dnl S/390-64 mpn_submul_1
-dnl Copyright 2011 Free Software Foundation, Inc.
+dnl Copyright 2023 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -30,41 +30,122 @@
include(`../config.m4')
+C TODO
+C * Delay saving of registers to handle n < 2 faster.
+C * Clean up register usage in feed-in code (b11 is worst)
+
C cycles/limb
-C z900 35
-C z990 24
+C z900 ?
+C z990 ?
C z9 ?
-C z10 28
+C z10 ?
C z196 ?
+C z12 ?
+C z13 ?
+C z14 ?
More information about the gmp-commit
mailing list