[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Nov 7 21:41:18 UTC 2021
details: /var/hg/gmp/rev/ef6992b23e7d
changeset: 18275:ef6992b23e7d
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Nov 07 22:33:45 2021 +0100
description:
Avoid "slfi" for portability.
details: /var/hg/gmp/rev/66d736dfa9ee
changeset: 18276:66d736dfa9ee
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Nov 07 22:34:45 2021 +0100
description:
New file.
details: /var/hg/gmp/rev/12f3d019cacb
changeset: 18277:12f3d019cacb
user: Torbjorn Granlund <tg at gmplib.org>
date: Sun Nov 07 22:40:43 2021 +0100
description:
Make recently added code actually work.
diffstat:
mpn/generic/sec_tabselect.c | 62 +++++++++--------
mpn/s390_32/sec_tabselect.asm | 140 ++++++++++++++++++++++++++++++++++++++++++
mpn/s390_64/sec_tabselect.asm | 11 ++-
3 files changed, 180 insertions(+), 33 deletions(-)
diffs (271 lines):
diff -r 60912bb7ed3c -r 12f3d019cacb mpn/generic/sec_tabselect.c
--- a/mpn/generic/sec_tabselect.c Tue Nov 02 02:02:59 2021 +0100
+++ b/mpn/generic/sec_tabselect.c Sun Nov 07 22:40:43 2021 +0100
@@ -28,6 +28,7 @@
GNU Lesser General Public License along with the GNU MP Library. If not,
see https://www.gnu.org/licenses/. */
+#include <stdio.h>
#include "gmp-impl.h"
#ifndef SEC_TABSELECT_METHOD
@@ -78,6 +79,38 @@
mp_limb_t mask, r0, r1, r2, r3;
volatile const mp_limb_t * restrict tp;
+ if (n & 1)
+ {
+ tp = tab;
+ r0 = 0;
+ for (k = 0; k < nents; k++)
+ {
+ mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+ r0 += tp[0] & mask;
+ tp += n;
+ }
+ rp[0] = r0;
+ rp += 1;
+ tab += 1;
+ }
+
+ if (n & 2)
+ {
+ tp = tab;
+ r0 = r1 = 0;
+ for (k = 0; k < nents; k++)
+ {
+ mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
+ r0 += tp[0] & mask;
+ r1 += tp[1] & mask;
+ tp += n;
+ }
+ rp[0] = r0;
+ rp[1] = r1;
+ rp += 2;
+ tab += 2;
+ }
+
for (i = 0; i <= n - 4; i += 4)
{
tp = tab + i;
@@ -97,34 +130,5 @@
rp[3] = r3;
rp += 4;
}
-
- if (n & 2 != 0)
- {
- tp = tab + n - 2;
- r0 = r1 = 0;
- for (k = 0; k < nents; k++)
- {
- mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
- r0 += tp[0] & mask;
- r1 += tp[1] & mask;
- tp += n;
- }
- rp[0] = r0;
- rp[1] = r1;
- rp += 2;
- }
-
- if (n & 1 != 0)
- {
- tp = tab + n - 1;
- r0 = r1 = 0;
- for (k = 0; k < nents; k++)
- {
- mask = (mp_limb_t) ((-(unsigned long) (which ^ k)) >> (BITS_PER_ULONG - 1)) - 1;
- r0 += tp[0] & mask;
- tp += n;
- }
- rp[0] = r0;
- }
}
#endif
diff -r 60912bb7ed3c -r 12f3d019cacb mpn/s390_32/sec_tabselect.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/s390_32/sec_tabselect.asm Sun Nov 07 22:40:43 2021 +0100
@@ -0,0 +1,140 @@
+dnl S/390-64 mpn_sec_tabselect
+
+dnl Copyright 2021 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 ?
+C z9 ?
+C z10 ?
+C z196 ?
+C z13 ?
+C z14 ?
+C z15 ?
+
+dnl void
+dnl mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
+dnl mp_size_t n, mp_size_t nents, mp_size_t which)
+
+define(`rp', `%r2')
+define(`tp', `%r3')
+define(`n', `%r4')
+define(`nents', `%r5')
+define(`which_arg',`%r6') C magicked to stack
+
+dnl r0 r1 r2 r3 r4 r5 r6 r7
+dnl r8 r9 r10 r11 r12 r13 r14 r15
+
+define(`mask', `%r14')
+define(`k', `%r1')
+define(`which', `%r0')
+
+define(`FRAME', 32)
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+ stm %r5, %r15, 20(%r15)
+ ahi %r15, -FRAME
+
+ sll n, 2
+ msr %r5, n
+ st %r5, 16(%r15) C nents * n * LIMB_BYTES
+
+ lr %r5, n
+ srl %r5, 2+2
+ nr %r5, %r5
+ je L(end4)
+L(outer):
+ l which, eval(24+FRAME)(%r15)
+ l k, eval(20+FRAME)(%r15) C nents
+ lhi %r6, 0
+ lhi %r7, 0
+ lhi %r8, 0
+ lhi %r9, 0
+L(tp4): lhi mask, 1
+ slr which, mask
+ slbr mask, mask
+ lm %r10, %r13, 0(tp)
+ nr %r10, mask
+ nr %r11, mask
+ nr %r12, mask
+ nr %r13, mask
+ ar %r6, %r10
+ ar %r7, %r11
+ ar %r8, %r12
+ ar %r9, %r13
+ ar tp, n
+ brct k, L(tp4)
+ stm %r6, %r9, 0(rp)
+ ahi rp, 16
+ sl tp, 16(%r15)
+ ahi tp, eval(4*4)
+ brct %r5, L(outer)
+L(end4):
+ tmll n, 8
+ je L(end2)
+ l which, eval(24+FRAME)(%r15)
+ l k, eval(20+FRAME)(%r15) C nents
+ lhi %r6, 0
+ lhi %r7, 0
+L(tp2): lhi mask, 1
+ slr which, mask
+ slbr mask, mask
+ lm %r10, %r11, 0(tp)
+ nr %r10, mask
+ nr %r11, mask
+ ar %r6, %r10
+ ar %r7, %r11
+ ar tp, n
+ brct k, L(tp2)
+ stm %r6, %r7, 0(rp)
+ ahi rp, 8
+ sl tp, 16(%r15)
+ ahi tp, eval(2*4)
+L(end2):
+ tmll n, 4
+ je L(end1)
+ l which, eval(24+FRAME)(%r15)
+ l k, eval(20+FRAME)(%r15) C nents
+ lhi %r6, 0
+L(tp1): lhi mask, 1
+ slr which, mask
+ slbr mask, mask
+ l %r10, 0(tp)
+ nr %r10, mask
+ ar %r6, %r10
+ ar tp, n
+ brct k, L(tp1)
+ st %r6, 0(rp)
+L(end1):
+ lm %r5, %r15, eval(20+FRAME)(%r15)
+ br %r14
+EPILOGUE()
diff -r 60912bb7ed3c -r 12f3d019cacb mpn/s390_64/sec_tabselect.asm
--- a/mpn/s390_64/sec_tabselect.asm Tue Nov 02 02:02:59 2021 +0100
+++ b/mpn/s390_64/sec_tabselect.asm Sun Nov 07 22:40:43 2021 +0100
@@ -31,7 +31,7 @@
include(`../config.m4')
C cycles/limb
-C z900 - slfi unsupported
+C z900 ?
C z990 ?
C z9 ?
C z10 ?
@@ -78,7 +78,8 @@
lghi %r7, 0
lghi %r8, 0
lghi %r9, 0
-L(tp4): slfi which, 1
+L(tp4): lghi mask, 1
+ slgr which, mask
slbgr mask, mask
lmg %r10, %r13, 0(tp)
ngr %r10, mask
@@ -103,7 +104,8 @@
lg k, eval(40+FRAME)(%r15) C nents
lghi %r6, 0
lghi %r7, 0
-L(tp2): slfi which, 1
+L(tp2): lghi mask, 1
+ slgr which, mask
slbgr mask, mask
lmg %r10, %r11, 0(tp)
ngr %r10, mask
@@ -122,7 +124,8 @@
lg which, eval(48+FRAME)(%r15)
lg k, eval(40+FRAME)(%r15) C nents
lghi %r6, 0
-L(tp1): slfi which, 1
+L(tp1): lghi mask, 1
+ slgr which, mask
slbgr mask, mask
lg %r10, 0(tp)
ngr %r10, mask
More information about the gmp-commit
mailing list