[Gmp-commit] /var/hg/gmp: 3 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Mon Mar 26 08:01:38 CEST 2012
details: /var/hg/gmp/rev/4cc4e90dbfa8
changeset: 14778:4cc4e90dbfa8
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Mar 26 07:57:34 2012 +0200
description:
mpn/x86_64/gcd_1.asm: Reduce latency.
details: /var/hg/gmp/rev/9099d85fa5ab
changeset: 14779:9099d85fa5ab
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Mar 26 07:58:19 2012 +0200
description:
mpn/x86_64/mul_basecase.asm: Save one jump.
details: /var/hg/gmp/rev/0e5486f3204c
changeset: 14780:0e5486f3204c
user: Marco Bodrato <bodrato at mail.dm.unipi.it>
date: Mon Mar 26 08:01:31 2012 +0200
description:
mpz/iset_ui.c: Don't realloc.
diffstat:
ChangeLog | 7 ++++++
mpn/x86_64/gcd_1.asm | 48 +++++++++++++++++++++++---------------------
mpn/x86_64/mul_basecase.asm | 15 ++++++-------
mpz/iset_ui.c | 17 +++++++++------
4 files changed, 49 insertions(+), 38 deletions(-)
diffs (168 lines):
diff -r 0b2e01fac6b0 -r 0e5486f3204c ChangeLog
--- a/ChangeLog Wed Mar 21 15:53:02 2012 +0100
+++ b/ChangeLog Mon Mar 26 08:01:31 2012 +0200
@@ -1,3 +1,10 @@
+2012-03-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+ * mpn/x86_64/gcd_1.asm: Reduce latency.
+ * mpn/x86_64/mul_basecase.asm: Save one jump.
+
+ * mpz/iset_ui.c: Don't realloc.
+
2012-03-20 Marco Bodrato <bodrato at mail.dm.unipi.it>
* mp_clz_tab.c: Add __clz_tab[128].
diff -r 0b2e01fac6b0 -r 0e5486f3204c mpn/x86_64/gcd_1.asm
--- a/mpn/x86_64/gcd_1.asm Wed Mar 21 15:53:02 2012 +0100
+++ b/mpn/x86_64/gcd_1.asm Mon Mar 26 08:01:31 2012 +0200
@@ -67,8 +67,8 @@
PROLOGUE(mpn_gcd_1)
DOS64_ENTRY(3)
mov (up), %rax C U low limb
+ mov $-1, R32(%rcx)
or v0, %rax C x | y
- mov $-1, R32(%rcx)
L(twos):
inc R32(%rcx)
@@ -76,17 +76,13 @@
jnc L(twos)
shr R8(%rcx), v0
- mov R32(%rcx), R32(%rax) C common twos
+ push %rcx C common twos
L(divide_strip_y):
shr v0
jnc L(divide_strip_y)
adc v0, v0
- push %rax
- push v0
- sub $8, %rsp C maintain ABI required rsp alignment
-
cmp $1, n
jnz L(reduce_nby1)
@@ -95,10 +91,31 @@
mov %r8, %rax
shr $BMOD_THRES_LOG2, %r8
cmp %r8, v0
- ja L(reduced)
- jmp L(bmod)
+ ja L(noreduce)
+ push v0
+ sub $8, %rsp C maintain ABI required rsp alignment
+
+L(bmod):
+IFDOS(` mov %rdx, %r8 ')
+IFDOS(` mov %rsi, %rdx ')
+IFDOS(` mov %rdi, %rcx ')
+ CALL( mpn_modexact_1_odd)
+
+L(reduced):
+ add $8, %rsp
+ pop %rdx
+
+L(noreduce):
+ LEA( ctz_table, %rsi)
+ test %rax, %rax
+ mov %rax, %rcx
+ jnz L(mid)
+ jmp L(end)
L(reduce_nby1):
+ push v0
+ sub $8, %rsp C maintain ABI required rsp alignment
+
cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
jl L(bmod)
IFDOS(` mov %rdx, %r8 ')
@@ -106,21 +123,6 @@
IFDOS(` mov %rdi, %rcx ')
CALL( mpn_mod_1)
jmp L(reduced)
-L(bmod):
-IFDOS(` mov %rdx, %r8 ')
-IFDOS(` mov %rsi, %rdx ')
-IFDOS(` mov %rdi, %rcx ')
- CALL( mpn_modexact_1_odd)
-L(reduced):
-
- add $8, %rsp
- pop %rdx
-
- LEA( ctz_table, %rsi)
- test %rax, %rax
- mov %rax, %rcx
- jnz L(mid)
- jmp L(end)
ALIGN(16) C K8 BC P4 NHM SBR
L(top): cmovc %rcx, %rax C if x-y < 0 0
diff -r 0b2e01fac6b0 -r 0e5486f3204c mpn/x86_64/mul_basecase.asm
--- a/mpn/x86_64/mul_basecase.asm Wed Mar 21 15:53:02 2012 +0100
+++ b/mpn/x86_64/mul_basecase.asm Mon Mar 26 08:01:31 2012 +0200
@@ -100,7 +100,13 @@
cmp $2, R32(w0)
jc L(mul_1_prologue_1)
jz L(mul_1_prologue_2)
- jmp L(mul_1_prologue_3)
+
+L(mul_1_prologue_3):
+ add $-1, n
+ lea L(addmul_outer_3)(%rip), outer_addr
+ mov %rax, w3
+ mov %rdx, w0
+ jmp L(mul_1_entry_3)
L(mul_1_prologue_0):
mov %rax, w2
@@ -132,13 +138,6 @@
xor R32(w3), R32(w3)
jmp L(mul_1_entry_2)
-L(mul_1_prologue_3):
- add $-1, n
- lea L(addmul_outer_3)(%rip), outer_addr
- mov %rax, w3
- mov %rdx, w0
- jmp L(mul_1_entry_3)
-
C this loop is 10 c/loop = 2.5 c/l on K8, for all up/rp alignments
diff -r 0b2e01fac6b0 -r 0e5486f3204c mpz/iset_ui.c
--- a/mpz/iset_ui.c Wed Mar 21 15:53:02 2012 +0100
+++ b/mpz/iset_ui.c Mon Mar 26 08:01:31 2012 +0200
@@ -27,20 +27,23 @@
{
mp_size_t size;
- ALLOC (dest) = 1;
- PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-
- PTR (dest)[0] = val & GMP_NUMB_MASK;
- size = val != 0;
-
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
if (val > GMP_NUMB_MAX)
{
- MPZ_REALLOC (dest, 2);
+ ALLOC (dest) = 2;
+ PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB*2);
PTR (dest)[1] = val >> GMP_NUMB_BITS;
size = 2;
}
+ else
#endif
+ {
+ ALLOC (dest) = 1;
+ PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+
+ size = val != 0;
+ }
+ PTR (dest)[0] = val & GMP_NUMB_MASK;
SIZ (dest) = size;
}
More information about the gmp-commit
mailing list