[Gmp-commit] /var/hg/gmp: 7 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Jul 15 13:08:15 CEST 2013
details: /var/hg/gmp/rev/86ae171eb987
changeset: 15865:86ae171eb987
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jul 13 22:55:49 2013 +0200
description:
Provide Atom/32 cnd_add_n and cnd_sub_n.
details: /var/hg/gmp/rev/e642c1816a23
changeset: 15866:e642c1816a23
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jul 13 23:36:25 2013 +0200
description:
Remove dead ptr update.
details: /var/hg/gmp/rev/483551488a65
changeset: 15867:483551488a65
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Jul 14 00:40:10 2013 +0200
description:
Remove explicit nop after CALL.
details: /var/hg/gmp/rev/a26b2d1f8d47
changeset: 15868:a26b2d1f8d47
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Jul 14 00:41:42 2013 +0200
description:
Minor layout fix.
details: /var/hg/gmp/rev/fefeaebaacdf
changeset: 15869:fefeaebaacdf
user: Torbjorn Granlund <tege at gmplib.org>
date: Sun Jul 14 00:42:38 2013 +0200
description:
Add more CPU types to table.
details: /var/hg/gmp/rev/25f10d63513b
changeset: 15870:25f10d63513b
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Jul 15 13:07:37 2013 +0200
description:
Compute inverse as floor(B^2/(dh+1)), per Niels' suggestion.
Remove inverse rounding-up code.
details: /var/hg/gmp/rev/78b8732e93ae
changeset: 15871:78b8732e93ae
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Jul 15 13:08:07 2013 +0200
description:
ChangeLog
diffstat:
ChangeLog | 27 +++++++++
mpn/arm64/aors_n.asm | 2 +-
mpn/generic/sb_div_sec.c | 15 +----
mpn/generic/sbpi1_div_sec.c | 3 -
mpn/powerpc64/mode64/divrem_1.asm | 1 -
mpn/powerpc64/mode64/divrem_2.asm | 1 -
mpn/powerpc64/mode64/gcd_1.asm | 2 +-
mpn/powerpc64/mode64/mod_1_1.asm | 1 -
mpn/powerpc64/mode64/mod_1_4.asm | 1 -
mpn/powerpc64/mode64/p7/gcd_1.asm | 2 +-
mpn/x86/atom/cnd_add_n.asm | 102 ++++++++++++++++++++++++++++++++++
mpn/x86/atom/cnd_sub_n.asm | 113 ++++++++++++++++++++++++++++++++++++++
mpn/x86_64/aorrlsh1_n.asm | 5 +-
13 files changed, 252 insertions(+), 23 deletions(-)
diffs (truncated from 392 to 300 lines):
diff -r 8de68af9fb4b -r 78b8732e93ae ChangeLog
--- a/ChangeLog Fri Jul 12 12:21:42 2013 +0200
+++ b/ChangeLog Mon Jul 15 13:08:07 2013 +0200
@@ -1,3 +1,30 @@
+2013-07-15 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/sb_div_sec.c: Compute inverse as floor(B^2/(dh+1)), per
+ Niels' suggestion.
+ * mpn/generic/sbpi1_div_sec.c: Remove inverse rounding-up code.
+
+2013-07-14 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/mode64/divrem_1.asm: Remove explicit nop after CALL.
+ * mpn/powerpc64/mode64/divrem_2.asm: Likewise.
+ * mpn/powerpc64/mode64/mod_1_1.asm: Likewise.
+ * mpn/powerpc64/mode64/mod_1_4.asm: Likewise.
+
+2013-07-13 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86/atom/cnd_add_n.asm: New file.
+ * mpn/x86/atom/cnd_sub_n.asm: New file.o
+
+2013-07-12 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/generic/sbpi1_div_sec.c: Partial rewrite.
+
+2013-07-11 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/x86_64/cnd_aors_n.asm: Tweak for better speed on K8, bobcat, bd1,
+ NHM, Atom.
+
2013-07-05 Torbjorn Granlund <tege at gmplib.org>
* mpn/powerpc64/p7/copyi.asm: Handle n = 0.
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/arm64/aors_n.asm
--- a/mpn/arm64/aors_n.asm Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/arm64/aors_n.asm Mon Jul 15 13:08:07 2013 +0200
@@ -81,7 +81,7 @@
ADDSUBC x9, x5, x7
cbnz n, L(top)
-L(end): stp x8, x9, [rp],#16
+L(end): stp x8, x9, [rp]
L(rt): RETVAL
ret
EPILOGUE()
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/generic/sb_div_sec.c
--- a/mpn/generic/sb_div_sec.c Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/generic/sb_div_sec.c Mon Jul 15 13:08:07 2013 +0200
@@ -81,18 +81,9 @@
np2 = np;
}
- if (dn == 1)
- {
- d0 = dp2[dn - 1];
- invert_limb (inv32, d0);
- }
- else
- {
- d1 = dp2[dn - 1];
- d0 = dp2[dn - 2];
- invert_pi1 (dinv, d1, d0);
- inv32 = dinv.inv32;
- }
+ d0 = dp2[dn - 1];
+ d0 += (~d0 != 0);
+ invert_limb (inv32, d0);
/* We add nn + dn to tp here, not nn + 1 + dn, as expected. This is since nn
here will have been incremented. */
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/generic/sbpi1_div_sec.c
--- a/mpn/generic/sbpi1_div_sec.c Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/generic/sbpi1_div_sec.c Mon Jul 15 13:08:07 2013 +0200
@@ -94,9 +94,6 @@
#endif
}
- /* Decremenet inverse to keep quotient half limbs from being too large. */
- dinv -= dinv != 0; /* FIXME: cmp-to-int */
-
/* Create a divisor copy shifted half a limb. */
hp = tp; /* (dn + 1) limbs */
hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/divrem_1.asm
--- a/mpn/powerpc64/mode64/divrem_1.asm Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/divrem_1.asm Mon Jul 15 13:08:07 2013 +0200
@@ -97,7 +97,6 @@
sld r31, r31, r27
mr r3, r30
CALL( mpn_invert_limb)
- nop
beq- cr4, L(110)
sldi r9, r28, 3
addic. r6, r28, -2
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/divrem_2.asm
--- a/mpn/powerpc64/mode64/divrem_2.asm Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/divrem_2.asm Mon Jul 15 13:08:07 2013 +0200
@@ -96,7 +96,6 @@
blt cr0, L(18)
mr r3, r30
CALL( mpn_invert_limb)
- nop
mulld r10, r3, r30
mulhdu r0, r3, r28
addc r8, r10, r28
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/gcd_1.asm
--- a/mpn/powerpc64/mode64/gcd_1.asm Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/gcd_1.asm Mon Jul 15 13:08:07 2013 +0200
@@ -20,7 +20,7 @@
include(`../config.m4')
-C cycles/bit (approx)
+C cycles/bit (approx)
C POWER3/PPC630 ?
C POWER4/PPC970 8.5
C POWER5 ?
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/mod_1_1.asm
--- a/mpn/powerpc64/mode64/mod_1_1.asm Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/mod_1_1.asm Mon Jul 15 13:08:07 2013 +0200
@@ -116,7 +116,6 @@
sld r30, r4, r31
mr r3, r30
CALL( mpn_invert_limb)
- nop
cmpdi cr7, r31, 0
neg r0, r30
beq- cr7, L(13)
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/mod_1_4.asm
--- a/mpn/powerpc64/mode64/mod_1_4.asm Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/mod_1_4.asm Mon Jul 15 13:08:07 2013 +0200
@@ -195,7 +195,6 @@
sld r30, r4, r31
mr r3, r30
CALL( mpn_invert_limb)
- nop
subfic r9, r31, 64
li r10, 1
sld r10, r10, r31
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/powerpc64/mode64/p7/gcd_1.asm
--- a/mpn/powerpc64/mode64/p7/gcd_1.asm Fri Jul 12 12:21:42 2013 +0200
+++ b/mpn/powerpc64/mode64/p7/gcd_1.asm Mon Jul 15 13:08:07 2013 +0200
@@ -20,7 +20,7 @@
include(`../config.m4')
-C cycles/bit (approx)
+C cycles/bit (approx)
C POWER3/PPC630 -
C POWER4/PPC970 -
C POWER5 -
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/x86/atom/cnd_add_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/cnd_add_n.asm Mon Jul 15 13:08:07 2013 +0200
@@ -0,0 +1,102 @@
+dnl X86 mpn_cnd_add_n optimised for Intel Atom.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C P5 ?
+C P6 model 0-8,10-12 ?
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) ?
+C P4 model 3-4 (Prescott) ?
+C Intel atom 4.67
+C AMD K6 ?
+C AMD K7 ?
+C AMD K8 ?
+
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`vp', `%ebp')
+define(`n', `%ecx')
+define(`cnd', `20(%esp)')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_cnd_add_n)
+ push %edi
+ push %esi
+ push %ebx
+ push %ebp
+
+ mov cnd, %eax C make cnd into a mask (1)
+ mov 24(%esp), rp
+ neg %eax C make cnd into a mask (1)
+ mov 28(%esp), up
+ sbb %eax, %eax C make cnd into a mask (1)
+ mov 32(%esp), vp
+ mov %eax, cnd C make cnd into a mask (1)
+ mov 36(%esp), n
+
+ xor %edx, %edx
+
+ shr $1, n
+ jnc L(top)
+
+ mov 0(vp), %eax
+ and cnd, %eax
+ lea 4(vp), vp
+ add 0(up), %eax
+ lea 4(rp), rp
+ lea 4(up), up
+ sbb %edx, %edx
+ mov %eax, -4(rp)
+ inc n
+ dec n
+ je L(end)
+
+L(top): sbb %edx, %edx
+ mov 0(vp), %eax
+ and cnd, %eax
+ lea 8(vp), vp
+ lea 8(rp), rp
+ mov -4(vp), %ebx
+ and cnd, %ebx
+ add %edx, %edx
+ adc 0(up), %eax
+ lea 8(up), up
+ mov %eax, -8(rp)
+ adc -4(up), %ebx
+ dec n
+ mov %ebx, -4(rp)
+ jne L(top)
+
+L(end): mov $0, %eax
+ adc %eax, %eax
+
+ pop %ebp
+ pop %ebx
+ pop %esi
+ pop %edi
+ ret
+EPILOGUE()
+ASM_END()
diff -r 8de68af9fb4b -r 78b8732e93ae mpn/x86/atom/cnd_sub_n.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86/atom/cnd_sub_n.asm Mon Jul 15 13:08:07 2013 +0200
@@ -0,0 +1,113 @@
+dnl X86 mpn_cnd_sub_n optimised for Intel Atom.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C P5 ?
+C P6 model 0-8,10-12 ?
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) ?
+C P4 model 3-4 (Prescott) ?
+C Intel atom 5.67
+C AMD K6 ?
+C AMD K7 ?
+C AMD K8 ?
+
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`vp', `%ebp')
More information about the gmp-commit
mailing list