[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Feb 26 17:05:53 CET 2011
details: /var/hg/gmp/rev/ef472f66a4e0
changeset: 13909:ef472f66a4e0
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 26 17:04:48 2011 +0100
description:
>From Marco: Optimise non-loop code.
details: /var/hg/gmp/rev/4a38cf2fd7c6
changeset: 13910:4a38cf2fd7c6
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 26 17:04:55 2011 +0100
description:
*** empty log message ***
details: /var/hg/gmp/rev/3588847580e7
changeset: 13911:3588847580e7
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Feb 26 17:05:50 2011 +0100
description:
Trivial merge.
diffstat:
ChangeLog | 9 +++++
mpn/generic/mod_1_1.c | 5 +++
mpn/x86/atom/sse2/aorsmul_1.asm | 68 ++++++++++++++++++----------------------
3 files changed, 45 insertions(+), 37 deletions(-)
diffs (148 lines):
diff -r 3ea54f250ae5 -r 3588847580e7 ChangeLog
--- a/ChangeLog Fri Feb 25 21:42:43 2011 +0100
+++ b/ChangeLog Sat Feb 26 17:05:50 2011 +0100
@@ -1,3 +1,12 @@
+2011-02-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86/atom/sse2/aorsmul_1.asm: Optimise non-loop code.
+
+2011-02-26 Torbjorn Granlund <tege at gmplib.org>
+
+ * mpn/powerpc64/mode64/aorsmul_1.asm: Add missing MULFUNC_PROLOGUE.
+ * mpn/m68k/mc68020/aorsmul_1.asm: Likewise.
+
2011-02-25 Torbjorn Granlund <tege at gmplib.org>
* mpn/x86/atom/sse2/aorsmul_1.asm: New file.
diff -r 3ea54f250ae5 -r 3588847580e7 mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c Fri Feb 25 21:42:43 2011 +0100
+++ b/mpn/generic/mod_1_1.c Sat Feb 26 17:05:50 2011 +0100
@@ -50,6 +50,11 @@
ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
cps[2] = B1modb >> cnt;
+ /* In the normalized case, this can be simplified to
+ *
+ * B2modb = - b * bi;
+ * ASSERT (B2modb <= b); // NB: equality iff b = B/2
+ */
udiv_rnd_preinv (B2modb, B1modb, b, bi);
cps[3] = B2modb >> cnt;
}
diff -r 3ea54f250ae5 -r 3588847580e7 mpn/x86/atom/sse2/aorsmul_1.asm
--- a/mpn/x86/atom/sse2/aorsmul_1.asm Fri Feb 25 21:42:43 2011 +0100
+++ b/mpn/x86/atom/sse2/aorsmul_1.asm Sat Feb 26 17:05:50 2011 +0100
@@ -1,6 +1,6 @@
dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
-dnl Contributed to the GNU project by Torbjorn Granlund.
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
dnl
dnl Copyright 2011 Free Software Foundation, Inc.
dnl
@@ -62,51 +62,46 @@
push %ebx
mov 16(%esp), rp
mov 20(%esp), up
- mov 24(%esp), n
+ mov 24(%esp), %eax
movd 28(%esp), %mm7
- mov n, %eax
+ mov %eax, n
+ and $1, %eax
+ jz L(fi0or2)
+ movd (up), %mm1
+ pmuludq %mm7, %mm1
shr $2, n
- and $3, %eax
- jz L(fi0)
- cmp $2, %eax
- jc L(fi1)
- jz L(fi2)
+ jnc L(fi1)
-L(fi3): lea -12(rp), rp
- movd (up), %mm1
- lea 4(up), up
- pmuludq %mm7, %mm1
+L(fi3): lea 4(up), up
+ lea -12(rp), rp
movd %mm1, %ebx
+ add $1, n C increment and clear carry
movd (up), %mm0
- inc n
jmp L(lo3)
-L(fi0): lea -8(rp), rp
+L(fi1): lea -4(rp), rp
+ movd %mm1, %ebx
+ jz L(wd1)
+ movd 4(up), %mm0
+ lea -4(up), up
+ pmuludq %mm7, %mm0
+ jmp L(lo1)
+
+L(fi0or2):
movd (up), %mm0
+ pmuludq %mm7, %mm0
+ shr $2, n
+ movd 4(up), %mm1
+ jc L(fi2)
lea -8(up), up
- pmuludq %mm7, %mm0
+ lea -8(rp), rp
movd %mm0, %eax
- movd 12(up), %mm1
pmuludq %mm7, %mm1
jmp L(lo0)
-L(fi1): lea -4(rp), rp
- movd (up), %mm1
- lea -4(up), up
- pmuludq %mm7, %mm1
- movd %mm1, %ebx
- test n, n
- jz L(wd1)
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- jmp L(lo1)
-
-L(fi2): movd (up), %mm0
- pmuludq %mm7, %mm0
- movd 4(up), %mm1
+L(fi2): test n, n C clear carry
movd %mm0, %eax
pmuludq %mm7, %mm1
- test n, n
jnz L(lo2)
jmp L(wd2)
@@ -148,7 +143,7 @@
movd 4(up), %mm1
jnz L(top)
-L(end): adc $0, %edx
+L(end): adc n, %edx C n is zero here
ADDSUB %ebx, 12(rp)
movd %mm0, %eax
pmuludq %mm7, %mm1
@@ -157,15 +152,14 @@
adc %edx, %eax
movd %mm0, %edx
movd %mm1, %ebx
- adc $0, %edx
+ adc n, %edx
ADDSUB %eax, (rp)
L(wd1): psrlq $32, %mm1
adc %edx, %ebx
- movd %mm1, %edx
- adc $0, %edx
+ movd %mm1, %eax
+ adc n, %eax
ADDSUB %ebx, 4(rp)
- mov %edx, %eax
- adc $0, %eax
+ adc n, %eax
emms
pop %ebx
pop %esi
More information about the gmp-commit
mailing list