[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Jun 8 20:30:27 UTC 2015
details: /var/hg/gmp/rev/ee0ec9977e8d
changeset: 16683:ee0ec9977e8d
user: Torbjorn Granlund <torbjorng at google.com>
date: Mon Jun 08 22:28:01 2015 +0200
description:
Align stack for calls.
details: /var/hg/gmp/rev/bde3705f3113
changeset: 16684:bde3705f3113
user: Torbjorn Granlund <torbjorng at google.com>
date: Mon Jun 08 22:29:07 2015 +0200
description:
ChangeLog
details: /var/hg/gmp/rev/e39fd1964873
changeset: 16685:e39fd1964873
user: Torbjorn Granlund <torbjorng at google.com>
date: Mon Jun 08 22:30:25 2015 +0200
description:
Trivial merge.
diffstat:
ChangeLog | 4 +++
mpn/generic/rootrem.c | 61 +++++++++++++++++++++++-------------------------
mpn/generic/toom2_sqr.c | 6 ++--
mpn/x86/p6/gcd_1.asm | 7 ++++-
4 files changed, 41 insertions(+), 37 deletions(-)
diffs (191 lines):
diff -r fd8f399f5b49 -r e39fd1964873 ChangeLog
--- a/ChangeLog Sat Jun 06 21:15:23 2015 +0200
+++ b/ChangeLog Mon Jun 08 22:30:25 2015 +0200
@@ -1,3 +1,7 @@
+2015-06-08 Torbjörn Granlund <torbjorng at google.com>
+
+ * mpn/x86/p6/gcd_1.asm: Align stack for calls.
+
2015-06-06 Torbjörn Granlund <torbjorng at google.com>
* config.sub: Recognise any arm*neon CPU.
diff -r fd8f399f5b49 -r e39fd1964873 mpn/generic/rootrem.c
--- a/mpn/generic/rootrem.c Sat Jun 06 21:15:23 2015 +0200
+++ b/mpn/generic/rootrem.c Mon Jun 08 22:30:25 2015 +0200
@@ -143,35 +143,22 @@
int logk;
TMP_DECL;
- TMP_MARK;
-
- if (remp == NULL)
- {
- rp = TMP_ALLOC_LIMBS (un + 1); /* will contain the remainder */
- scratch = rp; /* used by mpn_div_q */
- }
- else
- {
- scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
- rp = remp;
- }
- sp = rootp;
-
MPN_SIZEINBASE_2EXP(unb, up, un, 1);
/* unb is the number of bits of the input U */
-
xnb = (unb - 1) / k + 1; /* ceil (unb / k) */
/* xnb is the number of bits of the root R */
if (xnb == 1) /* root is 1 */
{
if (remp == NULL)
- remp = rp;
- mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
- MPN_NORMALIZE (remp, un); /* There should be at most one zero limb,
+ un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
+ else
+ {
+ mpn_sub_1 (remp, up, un, CNST_LIMB (1));
+ un -= (remp [un - 1] == 0); /* There should be at most one zero limb,
if we demand u to be normalized */
+ }
rootp[0] = 1;
- TMP_FREE;
return un;
}
@@ -180,12 +167,6 @@
r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
kk = k * (xnb - 1); /* number of truncated bits in the input */
rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
- MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
- mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since
- the non-truncated part is less than 2^k, it
- is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
- sp[0] = 1; /* initial approximation */
- sn = 1; /* it has one limb */
for (logk = 1; ((k - 1) >> logk) != 0; logk++)
;
@@ -193,7 +174,7 @@
b = xnb - 1; /* number of remaining bits to determine in the kth root */
ni = 0;
- while (b != 0)
+ do
{
/* invariant: here we want b+1 total bits for the kth root */
sizes[ni] = b;
@@ -208,7 +189,7 @@
if (b >= sizes[ni])
b = sizes[ni] - 1; /* add just one bit at a time */
ni++;
- }
+ } while (b != 0);
sizes[ni] = 0;
ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
/* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
@@ -216,6 +197,7 @@
Newton iteration will first compute sizes[ni-1] extra bits,
then sizes[ni-2], ..., then sizes[0] = b. */
+ TMP_MARK;
/* qp and wp need enough space to store S'^k where S' is an approximate
root. Since S' can be as large as S+2, the worst case is when S=2 and
S'=4. But then since we know the number of bits of S in advance, S'
@@ -224,14 +206,29 @@
fits in un limbs, the number of extra limbs needed is bounded by
ceil(k*log2(3/2)/GMP_NUMB_BITS). */
#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
- TMP_ALLOC_LIMBS_2 (qp, un + EXTRA, /* will contain quotient and remainder
- of R/(k*S^(k-1)), and S^k */
+ TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
+ qp, un + EXTRA, /* will contain quotient and remainder
+ of R/(k*S^(k-1)), and S^k */
wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
- and temporary for mpn_pow_1 */
+ and temporary for mpn_pow_1 */
+
+ if (remp == NULL)
+ rp = scratch; /* will contain the remainder */
+ else
+ rp = remp;
+ sp = rootp;
+
+ MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
+ mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since
+ the non-truncated part is less than 2^k, it
+ is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
+ sp[0] = 1; /* initial approximation */
+ sn = 1; /* it has one limb */
wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
wn = 1;
- for (i = ni; i != 0; i--)
+ i = ni;
+ do
{
/* 1: loop invariant:
{sp, sn} is the current approximation of the root, which has
@@ -408,7 +405,7 @@
/* otherwise we have rn > 0, thus the return value is ok */
/* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
- }
+ } while (--i != 0);
TMP_FREE;
return rn;
diff -r fd8f399f5b49 -r e39fd1964873 mpn/generic/toom2_sqr.c
--- a/mpn/generic/toom2_sqr.c Sat Jun 06 21:15:23 2015 +0200
+++ b/mpn/generic/toom2_sqr.c Mon Jun 08 22:30:25 2015 +0200
@@ -138,9 +138,9 @@
ASSERT (cy + 1 <= 3);
ASSERT (cy2 <= 2);
- mpn_incr_u (pp + 2 * n, cy2);
+ MPN_INCR_U (pp + 2 * n, s + s, cy2);
if (LIKELY (cy <= 2))
- mpn_incr_u (pp + 3 * n, cy);
+ MPN_INCR_U (pp + 3 * n, s + s - n, cy);
else
- mpn_decr_u (pp + 3 * n, 1);
+ MPN_DECR_U (pp + 3 * n, s + s - n, 1);
}
diff -r fd8f399f5b49 -r e39fd1964873 mpn/x86/p6/gcd_1.asm
--- a/mpn/x86/p6/gcd_1.asm Sat Jun 06 21:15:23 2015 +0200
+++ b/mpn/x86/p6/gcd_1.asm Mon Jun 08 22:30:25 2015 +0200
@@ -2,7 +2,8 @@
dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked by Torbjorn Granlund.
-dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2015 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
@@ -99,6 +100,7 @@
call L(movl_eip_to_ebx)
add $_GLOBAL_OFFSET_TABLE_, %ebx
')
+ add $-4, %esp
push v0 C param 3
push n C param 2
push up C param 1
@@ -111,13 +113,14 @@
call L(movl_eip_to_ebx)
add $_GLOBAL_OFFSET_TABLE_, %ebx
')
+ add $-4, %esp
push v0 C param 3
push n C param 2
push up C param 1
CALL( mpn_modexact_1_odd)
L(called):
- add $12, %esp C deallocate params
+ add $16, %esp C deallocate params
ifdef(`PIC_WITH_EBX',`dnl
pop %ebx
')
More information about the gmp-commit
mailing list