[Gmp-commit] /var/hg/gmp: 3 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Mon Mar 26 08:01:38 CEST 2012


details:   /var/hg/gmp/rev/4cc4e90dbfa8
changeset: 14778:4cc4e90dbfa8
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Mar 26 07:57:34 2012 +0200
description:
mpn/x86_64/gcd_1.asm: Reduce latency.

details:   /var/hg/gmp/rev/9099d85fa5ab
changeset: 14779:9099d85fa5ab
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Mar 26 07:58:19 2012 +0200
description:
mpn/x86_64/mul_basecase.asm: Save one jump.

details:   /var/hg/gmp/rev/0e5486f3204c
changeset: 14780:0e5486f3204c
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Mon Mar 26 08:01:31 2012 +0200
description:
mpz/iset_ui.c: Don't realloc.

diffstat:

 ChangeLog                   |   7 ++++++
 mpn/x86_64/gcd_1.asm        |  48 +++++++++++++++++++++++---------------------
 mpn/x86_64/mul_basecase.asm |  15 ++++++-------
 mpz/iset_ui.c               |  17 +++++++++------
 4 files changed, 49 insertions(+), 38 deletions(-)

diffs (168 lines):

diff -r 0b2e01fac6b0 -r 0e5486f3204c ChangeLog
--- a/ChangeLog	Wed Mar 21 15:53:02 2012 +0100
+++ b/ChangeLog	Mon Mar 26 08:01:31 2012 +0200
@@ -1,3 +1,10 @@
+2012-03-26 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpn/x86_64/gcd_1.asm: Reduce latency.
+	* mpn/x86_64/mul_basecase.asm: Save one jump.
+
+	* mpz/iset_ui.c: Don't realloc.
+
 2012-03-20 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* mp_clz_tab.c: Add __clz_tab[128].
diff -r 0b2e01fac6b0 -r 0e5486f3204c mpn/x86_64/gcd_1.asm
--- a/mpn/x86_64/gcd_1.asm	Wed Mar 21 15:53:02 2012 +0100
+++ b/mpn/x86_64/gcd_1.asm	Mon Mar 26 08:01:31 2012 +0200
@@ -67,8 +67,8 @@
 PROLOGUE(mpn_gcd_1)
 	DOS64_ENTRY(3)
 	mov	(up), %rax		C U low limb
+	mov	$-1, R32(%rcx)
 	or	v0, %rax		C x | y
-	mov	$-1, R32(%rcx)
 
 L(twos):
 	inc	R32(%rcx)
@@ -76,17 +76,13 @@
 	jnc	L(twos)
 
 	shr	R8(%rcx), v0
-	mov	R32(%rcx), R32(%rax)	C common twos
+	push	%rcx			C common twos
 
 L(divide_strip_y):
 	shr	v0
 	jnc	L(divide_strip_y)
 	adc	v0, v0
 
-	push	%rax
-	push	v0
-	sub	$8, %rsp		C maintain ABI required rsp alignment
-
 	cmp	$1, n
 	jnz	L(reduce_nby1)
 
@@ -95,10 +91,31 @@
 	mov	%r8, %rax
 	shr	$BMOD_THRES_LOG2, %r8
 	cmp	%r8, v0
-	ja	L(reduced)
-	jmp	L(bmod)
+	ja	L(noreduce)
+	push	v0
+	sub	$8, %rsp		C maintain ABI required rsp alignment
+
+L(bmod):
+IFDOS(`	mov	%rdx, %r8	')
+IFDOS(`	mov	%rsi, %rdx	')
+IFDOS(`	mov	%rdi, %rcx	')
+	CALL(	mpn_modexact_1_odd)
+
+L(reduced):
+	add	$8, %rsp
+	pop	%rdx
+
+L(noreduce):
+	LEA(	ctz_table, %rsi)
+	test	%rax, %rax
+	mov	%rax, %rcx
+	jnz	L(mid)
+	jmp	L(end)
 
 L(reduce_nby1):
+	push	v0
+	sub	$8, %rsp		C maintain ABI required rsp alignment
+
 	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, n
 	jl	L(bmod)
 IFDOS(`	mov	%rdx, %r8	')
@@ -106,21 +123,6 @@
 IFDOS(`	mov	%rdi, %rcx	')
 	CALL(	mpn_mod_1)
 	jmp	L(reduced)
-L(bmod):
-IFDOS(`	mov	%rdx, %r8	')
-IFDOS(`	mov	%rsi, %rdx	')
-IFDOS(`	mov	%rdi, %rcx	')
-	CALL(	mpn_modexact_1_odd)
-L(reduced):
-
-	add	$8, %rsp
-	pop	%rdx
-
-	LEA(	ctz_table, %rsi)
-	test	%rax, %rax
-	mov	%rax, %rcx
-	jnz	L(mid)
-	jmp	L(end)
 
 	ALIGN(16)			C               K8    BC    P4    NHM   SBR
 L(top):	cmovc	%rcx, %rax		C if x-y < 0	0
diff -r 0b2e01fac6b0 -r 0e5486f3204c mpn/x86_64/mul_basecase.asm
--- a/mpn/x86_64/mul_basecase.asm	Wed Mar 21 15:53:02 2012 +0100
+++ b/mpn/x86_64/mul_basecase.asm	Mon Mar 26 08:01:31 2012 +0200
@@ -100,7 +100,13 @@
 	cmp	$2, R32(w0)
 	jc	L(mul_1_prologue_1)
 	jz	L(mul_1_prologue_2)
-	jmp	L(mul_1_prologue_3)
+
+L(mul_1_prologue_3):
+	add	$-1, n
+	lea	L(addmul_outer_3)(%rip), outer_addr
+	mov	%rax, w3
+	mov	%rdx, w0
+	jmp	L(mul_1_entry_3)
 
 L(mul_1_prologue_0):
 	mov	%rax, w2
@@ -132,13 +138,6 @@
 	xor	R32(w3), R32(w3)
 	jmp	L(mul_1_entry_2)
 
-L(mul_1_prologue_3):
-	add	$-1, n
-	lea	L(addmul_outer_3)(%rip), outer_addr
-	mov	%rax, w3
-	mov	%rdx, w0
-	jmp	L(mul_1_entry_3)
-
 
 	C this loop is 10 c/loop = 2.5 c/l on K8, for all up/rp alignments
 
diff -r 0b2e01fac6b0 -r 0e5486f3204c mpz/iset_ui.c
--- a/mpz/iset_ui.c	Wed Mar 21 15:53:02 2012 +0100
+++ b/mpz/iset_ui.c	Mon Mar 26 08:01:31 2012 +0200
@@ -27,20 +27,23 @@
 {
   mp_size_t size;
 
-  ALLOC (dest) = 1;
-  PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-
-  PTR (dest)[0] = val & GMP_NUMB_MASK;
-  size = val != 0;
-
 #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
   if (val > GMP_NUMB_MAX)
     {
-      MPZ_REALLOC (dest, 2);
+      ALLOC (dest) = 2;
+      PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB*2);
       PTR (dest)[1] = val >> GMP_NUMB_BITS;
       size = 2;
     }
+  else
 #endif
+    {
+      ALLOC (dest) = 1;
+      PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+
+      size = val != 0;
+    }
+  PTR (dest)[0] = val & GMP_NUMB_MASK;
 
   SIZ (dest) = size;
 }


More information about the gmp-commit mailing list