[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Tue May 29 17:53:39 CEST 2012
details: /var/hg/gmp/rev/5658de0bc96f
changeset: 15020:5658de0bc96f
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue May 29 17:46:53 2012 +0200
description:
mpn/x86_64/fat/fat.c: small formula clarification.
details: /var/hg/gmp/rev/4f686080eb56
changeset: 15021:4f686080eb56
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue May 29 17:52:41 2012 +0200
description:
mpz/remove.c: Optimise branches.
details: /var/hg/gmp/rev/67c759ab9acd
changeset: 15022:67c759ab9acd
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue May 29 17:53:04 2012 +0200
description:
Trivial merge.
details: /var/hg/gmp/rev/5e5e751d845b
changeset: 15023:5e5e751d845b
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Tue May 29 17:53:36 2012 +0200
description:
Changelog
diffstat:
ChangeLog | 8 +++
mpn/arm/v5/mod_1_1.asm | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
mpn/x86_64/fat/fat.c | 2 +-
mpz/remove.c | 100 +++++++++++++++++++++++++----------------
4 files changed, 186 insertions(+), 41 deletions(-)
diffs (287 lines):
diff -r eeff898dd3a6 -r 5e5e751d845b ChangeLog
--- a/ChangeLog Mon May 28 10:33:01 2012 +0200
+++ b/ChangeLog Tue May 29 17:53:36 2012 +0200
@@ -1,3 +1,11 @@
+2012-05-29 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpz/remove.c: Optimise branches.
+
+2012-05-29 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/arm/v5/mod_1_1.asm: New file.
+
2012-05-28 Niels Möller <nisse at lysator.liu.se>
* mpn/generic/gcdext.c (compute_v): Simplified carry handling a
diff -r eeff898dd3a6 -r 5e5e751d845b mpn/arm/v5/mod_1_1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v5/mod_1_1.asm Tue May 29 17:53:36 2012 +0200
@@ -0,0 +1,117 @@
+dnl ARM mpn_mod_1_1p
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 7
+C Cortex-A15 ?
+
+define(`ap', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1_1p)
+ push {r4-r10}
+ add r0, r0, r1, asl #2
+ ldr r5, [r0, #-4]!
+ ldr r12, [r0, #-4]!
+ subs r1, r1, #2
+ ble L(4)
+ ldr r8, [r3, #12]
+ mov r4, r12
+ mov r10, r5
+ umull r7, r5, r10, r8
+ sub r1, r1, #1
+ b L(mid)
+
+L(top): adds r12, r6, r7
+ adcs r10, r4, r5
+ sub r1, r1, #1
+ mov r6, #0
+ movcs r6, r8
+ umull r7, r5, r10, r8
+ adds r4, r12, r6
+ subcs r4, r4, r2
+L(mid): ldr r6, [r0, #-4]!
+ teq r1, #0
+ bne L(top)
+
+ adds r12, r6, r7
+ adcs r5, r4, r5
+ subcs r5, r5, r2
+L(4): ldr r1, [r3, #4]
+ cmp r1, #0
+ beq L(7)
+ ldr r4, [r3, #8]
+ umull r0, r6, r5, r4
+ adds r12, r0, r12
+ addcs r6, r6, #1
+ rsb r0, r1, #32
+ mov r0, r12, lsr r0
+ orr r5, r0, r6, asl r1
+ mov r12, r12, asl r1
+ b L(8)
+L(7): cmp r5, r2
+ subcs r5, r5, r2
+L(8): ldr r0, [r3, #0]
+ umull r4, r3, r5, r0
+ add r5, r5, #1
+ adds r0, r4, r12
+ adc r5, r3, r5
+ mul r5, r2, r5
+ sub r12, r12, r5
+ cmp r12, r0
+ addhi r12, r12, r2
+ cmp r2, r12
+ subls r12, r12, r2
+ mov r0, r12, lsr r1
+ pop {r4-r10}
+ bx r14
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+ stmfd sp!, {r4, r5, r6, r14}
+ mov r5, r0
+ clz r4, r1
+ mov r0, r1, asl r4
+ rsb r6, r0, #0
+ bl mpn_invert_limb
+ str r0, [r5, #0]
+ str r4, [r5, #4]
+ cmp r4, #0
+ beq L(2)
+ rsb r1, r4, #32
+ mov r3, #1
+ mov r3, r3, asl r4
+ orr r3, r3, r0, lsr r1
+ mul r3, r6, r3
+ mov r4, r3, lsr r4
+ str r4, [r5, #8]
+L(2): mul r0, r6, r0
+ str r0, [r5, #12]
+ ldmfd sp!, {r4, r5, r6, pc}
+EPILOGUE()
diff -r eeff898dd3a6 -r 5e5e751d845b mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c Mon May 28 10:33:01 2012 +0200
+++ b/mpn/x86_64/fat/fat.c Tue May 29 17:53:36 2012 +0200
@@ -206,7 +206,7 @@
/* Check extended feature flags */
__gmpn_cpuid (dummy_string, 0x80000001);
- if ((dummy_string[4 + 29 / 8] & (1 << (29 - 3 * 8))) == 0)
+ if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
abort (); /* longmode-capable-bit turned off! */
/*********************************************************/
diff -r eeff898dd3a6 -r 5e5e751d845b mpz/remove.c
--- a/mpz/remove.c Mon May 28 10:33:01 2012 +0200
+++ b/mpz/remove.c Tue May 29 17:53:36 2012 +0200
@@ -1,6 +1,6 @@
/* mpz_remove -- divide out a factor and return its multiplicity.
-Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -23,45 +23,64 @@
mp_bitcnt_t
mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
{
- mpz_t fpow[GMP_LIMB_BITS]; /* Really MP_SIZE_T_BITS */
- mpz_t x, rem;
mp_bitcnt_t pwr;
- int p;
+ mp_srcptr fp;
+ mp_size_t sn, fn, afn;
+ mp_limb_t fp0;
- if (UNLIKELY (mpz_cmpabs_ui (f, 1) <= 0))
- DIVIDE_BY_ZERO;
+ sn = SIZ (src);
+ fn = SIZ (f);
+ fp = PTR (f);
+ afn = ABS (fn);
+ fp0 = fp[0];
- if (UNLIKELY (SIZ (src) == 0))
- {
- SIZ (dest) = 0;
- return 0;
- }
+ if (UNLIKELY ((afn <= (fp0 == 1)) /* mpz_cmpabs_ui (f, 1) <= 0 */
+ | (sn == 0))) {
+ /* f = 0 or f = +- 1 or src = 0 */
+ if (afn == 0)
+ DIVIDE_BY_ZERO;
+ mpz_set (dest, src);
+ return 0;
+ }
- if (mpz_cmpabs_ui (f, 2) == 0)
- {
- mp_bitcnt_t s0;
- s0 = mpz_scan1 (src, 0);
- mpz_div_2exp (dest, src, s0);
- if (s0 & (SIZ (f) < 0)) /*((s0 % 2 == 1) && (SIZ (f) < 0))*/
- mpz_neg (dest, dest);
- return s0;
- }
+#if 0
+ if ((fp0 & 1) == 1) { /* f is odd */
+ mp_ptr dp;
+ mp_size_t dn;
- /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0). It is an
- upper bound of the result we're seeking. We could also shift down the
- operands so that they become odd, to make intermediate values smaller. */
+ dn = ABS (sn);
+ dp = MPZ_REALLOC (dest, dn);
- mpz_init (rem);
- mpz_init (x);
+ pwr = mpn_remove (dp, &dn, PTR(src), dn, fp, afn, ~(mp_bitcnt_t) 0);
- pwr = 0;
- mpz_init (fpow[0]);
- mpz_set (fpow[0], f);
- mpz_set (dest, src);
+ SIZ (dest) = ((pwr & (fn < 0)) ^ (sn < 0)) ? -dn : dn;
+ } else
+#endif
+ if (afn == (fp0 == 2)) { /* mpz_cmpabs_ui (f, 2) == 0 */
+ pwr = mpz_scan1 (src, 0);
+ mpz_div_2exp (dest, src, pwr);
+ if (pwr & (SIZ (f) < 0)) /*((pwr % 2 == 1) && (SIZ (f) < 0))*/
+ mpz_neg (dest, dest);
+ } else { /* f != +-2 */
- /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k). */
- for (p = 0;; p++)
- {
+ mpz_t fpow[GMP_LIMB_BITS]; /* Really MP_SIZE_T_BITS */
+ mpz_t x, rem;
+ int p;
+
+ /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0). It is an
+ upper bound of the result we're seeking. We could also shift down the
+ operands so that they become odd, to make intermediate values smaller. */
+
+ mpz_init (rem);
+ mpz_init (x);
+
+ pwr = 0;
+ mpz_init (fpow[0]);
+ mpz_set (fpow[0], f);
+ mpz_set (dest, src);
+
+ /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k). */
+ for (p = 0;; p++) {
mpz_tdiv_qr (x, rem, dest, fpow[p]);
if (SIZ (rem) != 0)
break;
@@ -70,14 +89,13 @@
mpz_set (dest, x);
}
- pwr = (1L << p) - 1;
+ pwr = (1L << p) - 1;
- mpz_clear (fpow[p]);
+ mpz_clear (fpow[p]);
- /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
- zero remainder. */
- while (--p >= 0)
- {
+ /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
+ zero remainder. */
+ while (--p >= 0) {
mpz_tdiv_qr (x, rem, dest, fpow[p]);
if (SIZ (rem) == 0)
{
@@ -87,7 +105,9 @@
mpz_clear (fpow[p]);
}
- mpz_clear (x);
- mpz_clear (rem);
+ mpz_clear (x);
+ mpz_clear (rem);
+ }
+
return pwr;
}
More information about the gmp-commit
mailing list