[Gmp-commit] /var/hg/gmp: 6 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sun Apr 7 23:58:55 CEST 2013
details: /var/hg/gmp/rev/f2d5f9542a47
changeset: 15687:f2d5f9542a47
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 07 22:26:45 2013 +0200
description:
Put all multiplies low-limb first.
details: /var/hg/gmp/rev/03abecfad25d
changeset: 15688:03abecfad25d
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 07 22:44:18 2013 +0200
description:
(sparc64): Define umul_ppmm, add_ssaaaa, and count_leading_zeros conditionally under the symbol__sparc_vis3.
details: /var/hg/gmp/rev/3e21364a4020
changeset: 15689:3e21364a4020
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 07 22:46:50 2013 +0200
description:
Tabify.
details: /var/hg/gmp/rev/02a13fc63f87
changeset: 15690:02a13fc63f87
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 07 22:52:21 2013 +0200
description:
Make trivial change to avoid v6t2 instruction, move file accordingly.
details: /var/hg/gmp/rev/e8d8279fd12e
changeset: 15691:e8d8279fd12e
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 07 22:54:41 2013 +0200
description:
Correct cycle table, add arch table.
details: /var/hg/gmp/rev/8c0775f4aa4d
changeset: 15692:8c0775f4aa4d
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Apr 07 23:58:44 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 15 ++++
longlong.h | 19 +++++
mpn/alpha/invert_limb.asm | 64 +++++++++---------
mpn/arm/mode1o.asm | 12 ++-
mpn/arm/v6/mode1o.asm | 84 ++++++++++++++++++++++++
mpn/arm/v6t2/mode1o.asm | 76 ---------------------
mpn/powerpc64/mode64/invert_limb.asm | 69 ++++++++++---------
mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm | 4 +-
mpn/sparc64/ultrasparct3/dive_1.asm | 4 +-
mpn/sparc64/ultrasparct3/invert_limb.asm | 4 +-
mpn/sparc64/ultrasparct3/mod_1_4.asm | 4 +-
mpn/sparc64/ultrasparct3/mode1o.asm | 4 +-
mpn/x86_64/aors_err1_n.asm | 108 +++++++++++++++---------------
mpn/x86_64/aors_err2_n.asm | 54 +++++++-------
mpn/x86_64/core2/aors_err1_n.asm | 106 +++++++++++++++---------------
15 files changed, 339 insertions(+), 288 deletions(-)
diffs (truncated from 841 to 300 lines):
diff -r 4904502d6332 -r 8c0775f4aa4d ChangeLog
--- a/ChangeLog Thu Apr 04 23:43:28 2013 +0200
+++ b/ChangeLog Sun Apr 07 23:58:44 2013 +0200
@@ -1,3 +1,18 @@
+2013-04-07 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/sparc64/ultrasparct3/mod_34lsub1.asm: New file.
+
+ * longlong.h (sparc64): Define umul_ppmm, add_ssaaaa, and
+ count_leading_zeros conditionally under the symbol__sparc_vis3.
+
+ * mpn/arm/dive_1.asm: New file.
+ * mpn/arm/v6/dive_1.asm: New file.
+
+ * mpn/arm/v6t2/mode1o.asm: Make trivial change to avoid v6t2...
+ * mpn/arm/v6/mode1o.asm: ...instruction, move file accordingly.
+
+ * mpn/powerpc64/mode64/invert_limb.asm: Put all multiplies low-limb first.
+
2013-04-04 David S. Miller <davem at davemloft.net>
* mpn/sparc64/ultrasparct3/add_n.asm: Rewrite.
diff -r 4904502d6332 -r 8c0775f4aa4d longlong.h
--- a/longlong.h Thu Apr 04 23:43:28 2013 +0200
+++ b/longlong.h Sun Apr 07 23:58:44 2013 +0200
@@ -1757,6 +1757,25 @@
: "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \
"rJ" ((al) >> 32), "rI" ((bl) >> 32) \
__CLOBBER_CC)
+#ifdef __sparc_vis3
+#undef add_ssaaaa
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ( \
+ "addcc %r4, %5, %1\n" \
+ " addxc %r2, %r3, %0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rI" (bl) __CLOBBER_CC)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ (pl) = __m0 * __m1; \
+ __asm__ ("umulxhi\t%2, %1, %0" \
+ : "=r" (ph) \
+ : "%r" (__m0), "r" (__m1)); \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("lzd\t%1,%0" : "=r" (count) : "r" (x))
+#endif
#endif
#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32
diff -r 4904502d6332 -r 8c0775f4aa4d mpn/alpha/invert_limb.asm
--- a/mpn/alpha/invert_limb.asm Thu Apr 04 23:43:28 2013 +0200
+++ b/mpn/alpha/invert_limb.asm Sun Apr 07 23:58:44 2013 +0200
@@ -77,37 +77,37 @@
ret r31, (r26), 1
EPILOGUE()
DATASTART(approx_tab)
- .word 0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
- .word 0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
- .word 0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
- .word 0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
- .word 0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
- .word 0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
- .word 0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
- .word 0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
- .word 0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
- .word 0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
- .word 0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
- .word 0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
- .word 0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
- .word 0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
- .word 0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
- .word 0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
- .word 0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
- .word 0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
- .word 0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
- .word 0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
- .word 0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
- .word 0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
- .word 0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
- .word 0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
- .word 0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
- .word 0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
- .word 0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
- .word 0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
- .word 0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
- .word 0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
- .word 0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
- .word 0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
+ .word 0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+ .word 0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+ .word 0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+ .word 0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+ .word 0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+ .word 0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+ .word 0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+ .word 0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+ .word 0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+ .word 0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+ .word 0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+ .word 0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+ .word 0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+ .word 0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+ .word 0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+ .word 0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+ .word 0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+ .word 0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+ .word 0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+ .word 0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+ .word 0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+ .word 0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+ .word 0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+ .word 0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+ .word 0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+ .word 0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+ .word 0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+ .word 0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+ .word 0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+ .word 0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+ .word 0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+ .word 0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
DATAEND()
ASM_END()
diff -r 4904502d6332 -r 8c0775f4aa4d mpn/arm/mode1o.asm
--- a/mpn/arm/mode1o.asm Thu Apr 04 23:43:28 2013 +0200
+++ b/mpn/arm/mode1o.asm Sun Apr 07 23:58:44 2013 +0200
@@ -26,8 +26,16 @@
C XScale ?
C Cortex-A7 ?
C Cortex-A8 ?
-C Cortex-A9 9
-C Cortex-A15 7
+C Cortex-A9 10
+C Cortex-A15 9
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te -
+C v6 -
+C v6t2 -
+C v7a -
define(`up', `r0')
define(`n', `r1')
diff -r 4904502d6332 -r 8c0775f4aa4d mpn/arm/v6/mode1o.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v6/mode1o.asm Sun Apr 07 23:58:44 2013 +0200
@@ -0,0 +1,84 @@
+dnl ARM v6 mpn_modexact_1c_odd
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM -
+C XScale -
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 9
+C Cortex-A15 7
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te smulbb
+C v6 umaal
+C v6t2 -
+C v7a -
+
+define(`up', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cy', `r3')
+
+ .protected binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+ stmfd sp!, {r4, r5, r6, r7}
+
+ LEA( r4, binvert_limb_table)
+
+ ldr r6, [up], #4 C up[0]
+
+ and r12, d, #254
+ ldrb r4, [r4, r12, lsr #1]
+ smulbb r12, r4, r4
+ mul r12, d, r12
+ rsb r12, r12, r4, asl #1
+ mul r4, r12, r12
+ mul r4, d, r4
+ rsb r4, r4, r12, asl #1 C r4 = inverse
+
+ subs n, n, #1
+ sub r6, r6, cy
+ mul r6, r6, r4
+ beq L(end)
+
+ rsb r5, r4, #0 C r5 = -inverse
+
+L(top): ldr r7, [up], #4
+ mov r12, #0
+ umaal r12, cy, r6, d
+ mul r6, r7, r4
+ mla r6, cy, r5, r6
+ subs n, n, #1
+ bne L(top)
+
+L(end): mov r12, #0
+ umaal r12, cy, r6, d
+ mov r0, cy
+
+ ldmfd sp!, {r4, r5, r6, r7}
+ bx r14
+EPILOGUE()
diff -r 4904502d6332 -r 8c0775f4aa4d mpn/arm/v6t2/mode1o.asm
--- a/mpn/arm/v6t2/mode1o.asm Thu Apr 04 23:43:28 2013 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-dnl ARM mpn_modexact_1c_odd
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 9
-C Cortex-A15 7
-
-define(`up', `r0')
-define(`n', `r1')
-define(`d', `r2')
-define(`cy', `r3')
-
- .protected binvert_limb_table
-ASM_START()
-PROLOGUE(mpn_modexact_1c_odd)
- stmfd sp!, {r4, r5, r6, r7}
-
- LEA( r4, binvert_limb_table)
-
- ldr r6, [up], #4 C up[0]
-
- ubfx r12, d, #1, #7
- ldrb r4, [r4, r12]
- smulbb r12, r4, r4
- mul r12, d, r12
- rsb r12, r12, r4, asl #1
- mul r4, r12, r12
- mul r4, d, r4
- rsb r4, r4, r12, asl #1 C r4 = inverse
-
- subs n, n, #1
- sub r6, r6, cy
- mul r6, r6, r4
- beq L(end)
-
- rsb r5, r4, #0 C r5 = -inverse
-
-L(top): ldr r7, [up], #4
More information about the gmp-commit
mailing list