[Gmp-commit] /var/hg/gmp: 2 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue Nov 2 01:00:44 UTC 2021
details: /var/hg/gmp/rev/c23c7ac76d6b
changeset: 18272:c23c7ac76d6b
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Nov 02 01:47:16 2021 +0100
description:
Rewrite recently added s390_64 sec_tabselect.
details: /var/hg/gmp/rev/ce52b7504440
changeset: 18273:ce52b7504440
user: Torbjorn Granlund <tg at gmplib.org>
date: Tue Nov 02 02:00:41 2021 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 4 +
mpn/s390_64/sec_tabselect.asm | 113 +++++++++++++++++++++++++++++------------
mpz/fac_ui.c | 30 ++++++++--
3 files changed, 106 insertions(+), 41 deletions(-)
diffs (208 lines):
diff -r d8f1f6b7b51f -r ce52b7504440 ChangeLog
--- a/ChangeLog Tue Nov 02 01:40:37 2021 +0100
+++ b/ChangeLog Tue Nov 02 02:00:41 2021 +0100
@@ -1,3 +1,7 @@
+2021-10-31 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpz/fac_ui.c: Save half the products for small values.
+
2021-10-08 Niels Möller <nisse at lysator.liu.se>
* tests/mpn/t-addaddmul.c: Unit test for mpn_addaddmul_1msb0.
diff -r d8f1f6b7b51f -r ce52b7504440 mpn/s390_64/sec_tabselect.asm
--- a/mpn/s390_64/sec_tabselect.asm Tue Nov 02 01:40:37 2021 +0100
+++ b/mpn/s390_64/sec_tabselect.asm Tue Nov 02 02:00:41 2021 +0100
@@ -31,14 +31,14 @@
include(`../config.m4')
C cycles/limb
-C z900 ?
+C z900 - slfi unsupported
C z990 ?
C z9 ?
C z10 ?
C z196 ?
C z13 ?
C z14 ?
-C z15 ?
+C z15 1.6
dnl void
dnl mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
@@ -48,42 +48,89 @@
define(`tp', `%r3')
define(`n', `%r4')
define(`nents', `%r5')
-define(`which', `%r6')
+define(`which_arg',`%r6') C magicked to stack
+
+dnl r0 r1 r2 r3 r4 r5 r6 r7
+dnl r8 r9 r10 r11 r12 r13 r14 r15
+
+define(`mask', `%r14')
+define(`k', `%r1')
+define(`which', `%r0')
+
+define(`FRAME', 64)
ASM_START()
PROLOGUE(mpn_sec_tabselect)
- stmg %r7, %r8, 56(%r15)
- lgr %r8, n
- sllg n, n, 3
+ stmg %r5, %r15, 40(%r15)
+ aghi %r15, -FRAME
-L(cpy): lg %r0, 0(tp)
- stg %r0, 0(rp)
- aghi tp, 8
- aghi rp, 8
- brctg %r8, L(cpy)
+ sllg n, n, 3
+ msgr %r5, n
+ stg %r5, 16(%r15) C nents * n * LIMB_BYTES
- aghi nents, -1
- jle L(ret)
- slfi which, 1
-
+ srlg %r5, n, 2+3
+ ngr %r5, %r5
+ je L(end4)
L(outer):
- slfi which, 1
- slbgr %r0, %r0
- sgr rp, n
- srlg %r8, n, 3
-
-L(top): lg %r1, 0(rp)
- lg %r7, 0(tp)
- xgr %r7, %r1
- ngr %r7, %r0
- xgr %r1, %r7
- stg %r1, 0(rp)
- aghi tp, 8
- aghi rp, 8
- brctg %r8, L(top)
-
- brctg nents, L(outer)
-
-L(ret): lmg %r7, %r8, 56(%r15)
+ lg which, eval(48+FRAME)(%r15)
+ lg k, eval(40+FRAME)(%r15) C nents
+ lghi %r6, 0
+ lghi %r7, 0
+ lghi %r8, 0
+ lghi %r9, 0
+L(tp4): slfi which, 1
+ slbgr mask, mask
+ lmg %r10, %r13, 0(tp)
+ ngr %r10, mask
+ ngr %r11, mask
+ ngr %r12, mask
+ ngr %r13, mask
+ agr %r6, %r10
+ agr %r7, %r11
+ agr %r8, %r12
+ agr %r9, %r13
+ agr tp, n
+ brctg k, L(tp4)
+ stmg %r6, %r9, 0(rp)
+ aghi rp, 32
+ slg tp, 16(%r15)
+ aghi tp, eval(4*8)
+ brctg %r5, L(outer)
+L(end4):
+ tmll n, 16
+ je L(end2)
+ lg which, eval(48+FRAME)(%r15)
+ lg k, eval(40+FRAME)(%r15) C nents
+ lghi %r6, 0
+ lghi %r7, 0
+L(tp2): slfi which, 1
+ slbgr mask, mask
+ lmg %r10, %r11, 0(tp)
+ ngr %r10, mask
+ ngr %r11, mask
+ agr %r6, %r10
+ agr %r7, %r11
+ agr tp, n
+ brctg k, L(tp2)
+ stmg %r6, %r7, 0(rp)
+ aghi rp, 16
+ slg tp, 16(%r15)
+ aghi tp, eval(2*8)
+L(end2):
+ tmll n, 8
+ je L(end1)
+ lg which, eval(48+FRAME)(%r15)
+ lg k, eval(40+FRAME)(%r15) C nents
+ lghi %r6, 0
+L(tp1): slfi which, 1
+ slbgr mask, mask
+ lg %r10, 0(tp)
+ ngr %r10, mask
+ agr %r6, %r10
+ agr tp, n
+ brctg k, L(tp1)
+ stg %r6, 0(rp)
+L(end1):
+ lmg %r5, %r15, eval(40+FRAME)(%r15)
br %r14
EPILOGUE()
diff -r d8f1f6b7b51f -r ce52b7504440 mpz/fac_ui.c
--- a/mpz/fac_ui.c Tue Nov 02 01:40:37 2021 +0100
+++ b/mpz/fac_ui.c Tue Nov 02 02:00:41 2021 +0100
@@ -2,8 +2,8 @@
Contributed to the GNU project by Marco Bodrato.
-Copyright 1991, 1993-1995, 2000-2003, 2011, 2012, 2015 Free Software
-Foundation, Inc.
+Copyright 1991, 1993-1995, 2000-2003, 2011, 2012, 2015, 2021 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -68,21 +68,35 @@
mp_limb_t prod, max_prod;
mp_size_t j;
mp_ptr factors;
+ mp_limb_t fac, diff = n - numberof (table);
TMP_SDECL;
TMP_SMARK;
- factors = TMP_SALLOC_LIMBS (2 + (n - numberof (table)) / FACTORS_PER_LIMB);
+ factors = TMP_SALLOC_LIMBS (2 + diff / FACTORS_PER_LIMB);
factors[0] = table[numberof (table)-1];
j = 1;
- prod = n;
+ if ((diff & 1) == 0)
+ {
+ prod = n;
+ /* if (diff != 0) */
+ fac = --n * numberof (table);
+ }
+ else
+ {
+ prod = n * numberof (table);
+ fac = prod + --diff;
+ }
+
#if TUNE_PROGRAM_BUILD
- max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
+ max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD_LIMIT * FAC_DSC_THRESHOLD_LIMIT);
#else
- max_prod = GMP_NUMB_MAX / (FAC_ODD_THRESHOLD | 1);
+ max_prod = GMP_NUMB_MAX /
+ (((FAC_ODD_THRESHOLD + numberof (table) + 1) / 2) *
+ ((FAC_ODD_THRESHOLD + numberof (table)) / 2));
#endif
- while (--n >= numberof (table))
- FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+ for (;diff != 0; fac += (diff -= 2))
+ FACTOR_LIST_STORE (fac, prod, max_prod, factors, j);
factors[j++] = prod;
mpz_prodlimbs (x, factors, j);
More information about the gmp-commit
mailing list