[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Nov 12 21:06:38 CET 2012
details: /var/hg/gmp/rev/1e723be62081
changeset: 15109:1e723be62081
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Nov 12 20:54:09 2012 +0100
description:
Remove a redundant abort().
details: /var/hg/gmp/rev/228bb37c6023
changeset: 15110:228bb37c6023
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Nov 12 20:56:00 2012 +0100
description:
Fix comment typo.
details: /var/hg/gmp/rev/4e17fc006374
changeset: 15111:4e17fc006374
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Nov 12 20:59:17 2012 +0100
description:
Tune, simplify.
details: /var/hg/gmp/rev/b8fde68617eb
changeset: 15112:b8fde68617eb
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Nov 12 21:00:26 2012 +0100
description:
Fix typo.
details: /var/hg/gmp/rev/94c55274b650
changeset: 15113:94c55274b650
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Nov 12 21:02:19 2012 +0100
description:
Add ARM64 support. Add AVR support.
details: /var/hg/gmp/rev/889e31898397
changeset: 15114:889e31898397
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Nov 12 21:05:42 2012 +0100
description:
Update recommendations for M-R counts. Misc wording improvements.
diffstat:
ChangeLog | 5 +
doc/gmp.texi | 14 +++--
longlong.h | 38 +++++++++++++++
mpn/generic/perfsqr.c | 2 +-
mpn/powerpc64/mode64/divrem_1.asm | 98 ++++++++++----------------------------
mpn/s390_64/lshift.asm | 2 +-
tests/mpz/t-popcount.c | 3 +-
7 files changed, 80 insertions(+), 82 deletions(-)
diffs (truncated from 319 to 300 lines):
diff -r 16951630978f -r 889e31898397 ChangeLog
--- a/ChangeLog Mon Nov 12 20:52:35 2012 +0100
+++ b/ChangeLog Mon Nov 12 21:05:42 2012 +0100
@@ -1,5 +1,10 @@
2012-11-12 Torbjorn Granlund <tege at gmplib.org>
+ * longlong.h: Add ARM64 support.
+ * longlong.h: Add AVR support.
+
+ * mpn/powerpc64/mode64/divrem_1.asm: Tune, simplify.
+
* mpq/md_2exp.c: Use MPN_COPY_INCR, not MPN_COPY_DECR.
* tests/mpq/t-md_2exp.c (check_random): New function.
diff -r 16951630978f -r 889e31898397 doc/gmp.texi
--- a/doc/gmp.texi Mon Nov 12 20:52:35 2012 +0100
+++ b/doc/gmp.texi Mon Nov 12 21:05:42 2012 +0100
@@ -3494,9 +3494,10 @@
@var{n} is definitely composite.
This function does some trial divisions, then some Miller-Rabin probabilistic
-primality tests. @var{reps} controls how many such tests are done, 5 to 10 is
-a reasonable number, more will reduce the chances of a composite being
-returned as ``probably prime''.
+primality tests. The argument @var{reps} controls how many such tests are
+done; a higher value will reduce the chances of a composite being returned as
+``probably prime''. 25 is a reasonable number; a composite number will then be
+identified as a prime with a probability of less than @m{2^{-50}}.
Miller-Rabin and similar tests can be more properly called compositeness
tests. Numbers which fail are known to be composite but those which pass
@@ -8343,9 +8344,10 @@
divisions saved. When @math{d} is a single limb some simplifications arise,
providing good speedups on a number of processors.
- at code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the @code{mpn_redc_X}
-functions differ subtly in how they return @math{r}, leading to some negations
-in the above formula, but all are essentially the same.
+The functions @code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the
+internal @code{mpn_redc_X} functions differ subtly in how they return @math{r},
+leading to some negations in the above formula, but all are essentially the
+same.
@cindex Divisibility algorithm
@cindex Congruence algorithm
diff -r 16951630978f -r 889e31898397 longlong.h
--- a/longlong.h Mon Nov 12 20:52:35 2012 +0100
+++ b/longlong.h Mon Nov 12 21:05:42 2012 +0100
@@ -259,6 +259,15 @@
#endif /* clz using mpn */
#endif /* __alpha */
+#if defined (__AVR) && W_TYPE_SIZE == 8
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ unsigned short __p = (unsigned short) (m0) * (m1); \
+ (ph) = __p >> 8; \
+ (pl) = __p; \
+ } while (0)
+#endif /* AVR */
+
#if defined (_CRAY) && W_TYPE_SIZE == 64
#include <intrinsics.h>
#define UDIV_PREINV_ALWAYS 1
@@ -520,6 +529,35 @@
#endif
#endif /* __arm__ */
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rZ" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (bl)) \
+ { \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+ } \
+ else /* only bh might be a constant */ \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rZ" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
+ } while (0)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("clz\t%0, %1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#endif /* __aarch64__ */
+
#if defined (__clipper__) && W_TYPE_SIZE == 32
#define umul_ppmm(w1, w0, u, v) \
({union {UDItype __ll; \
diff -r 16951630978f -r 889e31898397 mpn/generic/perfsqr.c
--- a/mpn/generic/perfsqr.c Mon Nov 12 20:52:35 2012 +0100
+++ b/mpn/generic/perfsqr.c Mon Nov 12 21:05:42 2012 +0100
@@ -185,7 +185,7 @@
/* Check that we have even multiplicity of 2, and then check that the rest is
a possible perfect square. Leave disabled until we can determine this
really is an improvement. It it is, it could completely replace the
- simple probe above, since this should through out more non-squares, but at
+ simple probe above, since this should throw out more non-squares, but at
the expense of somewhat more cycles. */
{
mp_limb_t lo;
diff -r 16951630978f -r 889e31898397 mpn/powerpc64/mode64/divrem_1.asm
--- a/mpn/powerpc64/mode64/divrem_1.asm Mon Nov 12 20:52:35 2012 +0100
+++ b/mpn/powerpc64/mode64/divrem_1.asm Mon Nov 12 21:05:42 2012 +0100
@@ -1,7 +1,7 @@
dnl PowerPC-64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.
-dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
-dnl Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
@@ -22,11 +22,11 @@
C cycles/limb
C norm unorm frac
-C POWER3/PPC630 16-34 16-34 ~11
-C POWER4/PPC970 29 19
-C POWER5 29 29 ~20
-C POWER6 50 59 ~42
-C POWER7 25 25 ~14
+C POWER3/PPC630 16-34 16-34 ~11 outdated figures
+C POWER4/PPC970 28 28 19
+C POWER5 29 29 ~19
+C POWER6 49 59 ~42
+C POWER7 24.5 23 ~14
C INPUT PARAMETERS
C qp = r3
@@ -113,10 +113,11 @@
sldi r6, r6, 3
ALIGN(16)
L(uloop):
- addi r11, r31, 1
ldx r8, r26, r6
+ nop
mulld r0, r31, r3
mulhdu r10, r31, r3
+ addi r11, r31, 1
srd r9, r8, r5
addi r6, r6, -8
or r9, r7, r9
@@ -124,12 +125,11 @@
adde r10, r10, r11
mulld r31, r10, r30
subf r31, r31, r9
- subfc r0, r0, r31 C r >= ql
- subfe r0, r0, r0 C r0 = -(r >= ql)
- not r7, r0
- add r10, r7, r10 C qh -= (r >= ql)
- andc r0, r30, r0
- add r31, r31, r0
+ subfc r0, r31, r0 C r <= ql
+ subfe r0, r0, r0 C r0 = -(r <= ql)
+ and r9, r30, r0
+ add r31, r31, r9
+ add r10, r0, r10 C qh -= (r >= ql)
cmpld cr7, r31, r30
bge- cr7, L(164)
L(123):
@@ -166,19 +166,19 @@
L(ufloop):
addi r11, r31, 1
nop
- mulld r7, r3, r31
+ mulld r0, r3, r31
mulhdu r10, r3, r31
add r10, r10, r11
mulld r31, r9, r10
ifelse(0,1,`
- subfc r0, r7, r31
+ subfc r0, r0, r31
subfe r0, r0, r0 C r0 = -(r >= ql)
not r7, r0
add r10, r7, r10 C qh -= (r >= ql)
andc r0, r30, r0
add r31, r31, r0
',`
- cmpld cr7, r31, r7
+ cmpld cr7, r31, r0
blt cr7, L(29)
add r31, r30, r31
addi r10, r10, -1
@@ -219,12 +219,11 @@
and r0, r0, r7
subf r31, r0, r31
L(8):
-L(10):
mr r3, r30
CALL( mpn_invert_limb)
- nop
+ li r27, 0
addic. r6, r28, -1
- blt- cr0, L(150)
+ blt- cr0, L(110)
mtctr r28
sldi r6, r6, 3
ALIGN(16)
@@ -234,68 +233,23 @@
mulld r0, r31, r3
mulhdu r10, r31, r3
addi r6, r6, -8
- addc r7, r0, r8
+ addc r0, r0, r8
adde r10, r10, r11
mulld r31, r10, r30
subf r31, r31, r8 C r = nl - qh * d
- subfc r0, r7, r31 C r >= ql
- subfe r0, r0, r0 C r0 = -(r >= ql)
- not r7, r0
- add r10, r7, r10 C qh -= (r >= ql)
- andc r0, r30, r0
- add r31, r31, r0
+ subfc r0, r31, r0 C r <= ql
+ subfe r0, r0, r0 C r0 = -(r <= ql)
+ and r9, r30, r0
+ add r31, r31, r9
+ add r10, r0, r10 C qh -= (r >= ql)
cmpld cr7, r31, r30
bge- cr7, L(167)
L(51):
std r10, 0(r29)
addi r29, r29, -8
bdnz L(nloop)
+ b L(110)
-L(150):
- addic. r9, r25, -1
- blt- cr0, L(152)
- mtctr r25
- neg r9, r30
- ALIGN(16)
-L(nfloop):
- addi r11, r31, 1
- nop
- mulld r7, r3, r31
- mulhdu r10, r3, r31
- add r10, r10, r11
- mulld r31, r9, r10
-ifelse(0,1,`
- subfc r0, r7, r31
- subfe r0, r0, r0 C r0 = -(r >= ql)
- not r7, r0
- add r10, r7, r10 C qh -= (r >= ql)
- andc r0, r30, r0
- add r31, r31, r0
-',`
- cmpld cr7, r31, r7
- blt cr7, L(28)
- add r31, r30, r31
- addi r10, r10, -1
-L(28):
-')
- std r10, 0(r29)
- addi r29, r29, -8
- bdnz L(nfloop)
-L(152):
- addi r1, r1, 176
- mr r3, r31
- ld r0, 16(r1)
- lwz r12, 8(r1)
- mtlr r0
- ld r25, -56(r1)
- ld r26, -48(r1)
- mtcrf 8, r12
- ld r27, -40(r1)
- ld r28, -32(r1)
- ld r29, -24(r1)
- ld r30, -16(r1)
- ld r31, -8(r1)
- blr
L(164):
subf r31, r30, r31
addi r10, r10, 1
diff -r 16951630978f -r 889e31898397 mpn/s390_64/lshift.asm
--- a/mpn/s390_64/lshift.asm Mon Nov 12 20:52:35 2012 +0100
+++ b/mpn/s390_64/lshift.asm Mon Nov 12 21:05:42 2012 +0100
@@ -32,7 +32,7 @@
C * One could assume more pipelining could approach 2.5 c/l, but we have not
C found any 8-way loop that runs better than the current 4-way loop.
C * Consider using the same feed-in code for 1 <= n <= 3 as for n mod 4,
-C similrly to the x86_64 sqr_basecase feed-in.
+C similarly to the x86_64 sqr_basecase feed-in.
C INPUT PARAMETERS
define(`rp', `%r2')
diff -r 16951630978f -r 889e31898397 tests/mpz/t-popcount.c
More information about the gmp-commit
mailing list