[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Sat Apr 9 13:25:27 CEST 2011


details:   /var/hg/gmp/rev/b58b2ec6af9c
changeset: 14116:b58b2ec6af9c
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Wed Mar 30 23:06:19 2011 +0200
description:
Added TODO comment.

details:   /var/hg/gmp/rev/6617eb8e4b0f
changeset: 14117:6617eb8e4b0f
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Wed Mar 30 23:08:12 2011 +0200
description:
Moved mpn_div_qr_2u_pi1 to a separate file.

details:   /var/hg/gmp/rev/4b7ba5f71347
changeset: 14118:4b7ba5f71347
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Wed Mar 30 23:13:48 2011 +0200
description:
x86_64 implementation of mpn_div_qr_2u_pi1.

details:   /var/hg/gmp/rev/1f26709da9c1
changeset: 14119:1f26709da9c1
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Sat Apr 09 13:25:22 2011 +0200
description:
Fixed include guard #endif

diffstat:

 ChangeLog                    |   17 +++-
 configure.in                 |    2 +-
 gmp-impl.h                   |    3 +
 mpn/generic/div_qr_2.c       |   32 -------
 mpn/generic/div_qr_2u_pi1.c  |   66 +++++++++++++++
 mpn/x86_64/div_qr_2n_pi1.asm |    5 +
 mpn/x86_64/div_qr_2u_pi1.asm |  189 +++++++++++++++++++++++++++++++++++++++++++
 tune/speed.h                 |    6 +-
 8 files changed, 283 insertions(+), 37 deletions(-)

diffs (truncated from 398 to 300 lines):

diff -r 0594e395043e -r 1f26709da9c1 ChangeLog
--- a/ChangeLog	Wed Mar 30 14:09:06 2011 +0200
+++ b/ChangeLog	Sat Apr 09 13:25:22 2011 +0200
@@ -1,5 +1,20 @@
 2011-03-30  Niels Möller  <nisse at lysator.liu.se>
 
+	* mpn/x86_64/div_qr_2u_pi1.asm: New file.
+
+	* configure.in (gmp_mpn_functions): Add div_qr_2u_pi1.
+
+	* gmp-impl.h (mpn_div_qr_2u_pi1): Declare.
+
+	* mpn/generic/div_qr_2u_pi1.c (mpn_div_qr_2u_pi1): Moved to
+	separate file, from...
+	* mpn/generic/div_qr_2.c: ... old location.
+
+	* mpn/generic/div_qr_2n_pi1.c: Renamed file, from...
+	* mpn/generic/div_qr_2_pi1_norm.c: ...old name.
+	* mpn/x86_64/div_qr_2n_pi1.asm: Renamed file, from...
+	* mpn/x86_64/div_qr_2_pi1_norm.asm: ...old name.
+
 	* gmp-impl.h (mpn_div_qr_2n_pi1): Use new name in declaration.
 	* tune/speed.h (speed_mpn_div_qr_2n): Likewise.
 	(speed_mpn_div_qr_2u): Likewise.
@@ -70,7 +85,7 @@
 
 	* gmp-impl.h (mpn_div_qr_2_pi1_norm): Declare.
 
-	* configure.in: Added div_qr_2_pi1_norm.
+	* configure.in (gmp_mpn_functions): Added div_qr_2_pi1_norm.
 
 2011-03-22  Torbjorn Granlund  <tege at gmplib.org>
 
diff -r 0594e395043e -r 1f26709da9c1 configure.in
--- a/configure.in	Wed Mar 30 14:09:06 2011 +0200
+++ b/configure.in	Sat Apr 09 13:25:22 2011 +0200
@@ -2547,7 +2547,7 @@
   toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts	   \
   toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts	   \
   invertappr invert binvert mulmod_bnm1 sqrmod_bnm1			   \
-  div_qr_2 div_qr_2n_pi1						   \
+  div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1					   \
   sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q				   \
   dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q				   \
   mu_div_qr mu_divappr_q mu_div_q					   \
diff -r 0594e395043e -r 1f26709da9c1 gmp-impl.h
--- a/gmp-impl.h	Wed Mar 30 14:09:06 2011 +0200
+++ b/gmp-impl.h	Sat Apr 09 13:25:22 2011 +0200
@@ -1298,6 +1298,9 @@
 #define   mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
   __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t));
 
+#define   mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t));
+
 #define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
 
diff -r 0594e395043e -r 1f26709da9c1 mpn/generic/div_qr_2.c
--- a/mpn/generic/div_qr_2.c	Wed Mar 30 14:09:06 2011 +0200
+++ b/mpn/generic/div_qr_2.c	Sat Apr 09 13:25:22 2011 +0200
@@ -268,38 +268,6 @@
   return qh;
 }
 
-static mp_limb_t
-mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-		   mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
-{
-  mp_limb_t qh;
-  mp_limb_t r2, r1, r0;
-  mp_size_t i;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-  ASSERT (shift > 0);
-
-  r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
-  r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
-  r0 = np[nn-2] << shift;
-
-  udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
-
-  for (i = nn - 2 - 1; i >= 0; i--)
-    {
-      mp_limb_t q;
-      r0 = np[i];
-      r1 |= r0 >> (GMP_LIMB_BITS - shift);
-      r0 <<= shift;
-      udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
-      qp[i] = q;
-    }
-
-  rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
-  rp[1] = r2 >> shift;
-  return qh;
-}
 
 /* Divide num {np,nn} by den {dp,2} and write the nn-2 least
    significant quotient limbs at qp and the 2 long remainder at np.
diff -r 0594e395043e -r 1f26709da9c1 mpn/generic/div_qr_2u_pi1.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_2u_pi1.c	Sat Apr 09 13:25:22 2011 +0200
@@ -0,0 +1,66 @@
+/* mpn_div_qr_2u_pi1
+
+   Contributed to the GNU project by Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
+
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for unnormalized divisor */
+mp_limb_t
+mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		   mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
+{
+  mp_limb_t qh;
+  mp_limb_t r2, r1, r0;
+  mp_size_t i;
+
+  ASSERT (nn >= 2);
+  ASSERT (!(d1 & GMP_NUMB_HIGHBIT));
+  ASSERT (shift > 0);
+
+  r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
+  r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
+  r0 = np[nn-2] << shift;
+
+  udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t q;
+      r0 = np[i];
+      r1 |= r0 >> (GMP_LIMB_BITS - shift);
+      r0 <<= shift;
+      udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
+      qp[i] = q;
+    }
+
+  rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
+  rp[1] = r2 >> shift;
+
+  return qh;
+}
diff -r 0594e395043e -r 1f26709da9c1 mpn/x86_64/div_qr_2n_pi1.asm
--- a/mpn/x86_64/div_qr_2n_pi1.asm	Wed Mar 30 14:09:06 2011 +0200
+++ b/mpn/x86_64/div_qr_2n_pi1.asm	Sat Apr 09 13:25:22 2011 +0200
@@ -40,6 +40,11 @@
 define(`t0',		`%r14')
 define(`md1',		`%r15')
 
+C TODO
+C * Store qh in the same stack slot as di_param, instead of pushing
+C   it. (we could put it in register %rbp, but then we would need to
+C   save and restore that instead, which doesn't seem like a win).
+	
 ASM_START()
 	TEXT
 	ALIGN(16)
diff -r 0594e395043e -r 1f26709da9c1 mpn/x86_64/div_qr_2u_pi1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/div_qr_2u_pi1.asm	Sat Apr 09 13:25:22 2011 +0200
@@ -0,0 +1,189 @@
+dnl  x86-64 mpn_div_qr_2u_pi1
+dnl  -- Divide an mpn number by an unnormalized 2-limb number,
+dnl     using a single-limb inverse and shifting the dividend on the fly.
+
+dnl  Copyright 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C		c/l
+C INPUT PARAMETERS
+define(`qp',		`%rdi')
+define(`rp',		`%rsi')
+define(`up_param',	`%rdx')
+define(`un_param',	`%rcx') dnl %rcx needed for shift count
+define(`d1',		`%r8')
+define(`d0',		`%r9')
+define(`shift_param',	`FRAME+8(%rsp)')
+define(`di_param',	`FRAME+16(%rsp)')
+
+define(`di',		`%r10')
+define(`up',		`%r11')
+define(`un',		`%rbp')
+define(`u2',		`%rbx')
+define(`u1',		`%r12')
+define(`u0',		`%rsi') dnl Same as rp, which is saved and restored.
+define(`t1',		`%r13')
+define(`t0',		`%r14')
+define(`md1',		`%r15')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+deflit(`FRAME', 0)
+PROLOGUE(mpn_div_qr_2u_pi1)
+	mov	di_param, di
+	mov	up_param, up
+	push	%r15
+	push	%r14
+	push	%r13
+	push	%r12
+	push	%rbx
+	push	%rbp
+	push	rp
+deflit(`FRAME', 56)
+	lea	-2(un_param), un
+	mov	d1, md1
+	neg	md1
+
+	C int parameter, 32 bits only
+	movl	shift_param, R32(%rcx)
+
+	C FIXME: Different code for SHLD_SLOW
+	
+	xor	R32(u2), R32(u2)
+	mov	8(up, un, 8), u1
+	shld	%cl, u1, u2
+	C Remains to read (up, un, 8) and shift u1, u0
+	C udiv_qr_3by2 (qh,u2,u1,u2,u1,n0, d1,d0,di)
+	mov	di, %rax
+	mul	u2
+	mov	(up, un, 8), u0
+	shld	%cl, u0, u1
+	mov	u1, t0
+	add	%rax, t0	C q0 in t0
+	adc	u2, %rdx
+	mov	%rdx, t1	C q in t1
+	imul	md1, %rdx
+	mov	d0, %rax
+	lea	(%rdx, u1), u2
+	mul	t1
+	mov	u0, u1
+	shl	%cl, u1
+	sub	d0, u1
+	sbb	d1, u2
+	sub	%rax, u1
+	sbb	%rdx, u2
+	xor	R32(%rax), R32(%rax)
+	xor	R32(%rdx), R32(%rdx)
+	cmp	t0, u2
+	cmovnc	d0, %rax
+	cmovnc	d1, %rdx
+	adc	$0, t1
+	nop
+	add	%rax, u1
+	adc	%rdx, u2
+	cmp	d1, u2
+	jae	L(fix_qh)
+L(bck_qh):
+	push	t1	C push qh on stack
+
+	jmp	L(next)
+
+	ALIGN(16)
+L(loop):
+	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)


More information about the gmp-commit mailing list