[Gmp-commit] /var/hg/gmp: 4 new changesets
mercurial at gmplib.org
mercurial at gmplib.org
Sat Apr 9 13:25:27 CEST 2011
details: /var/hg/gmp/rev/b58b2ec6af9c
changeset: 14116:b58b2ec6af9c
user: Niels M?ller <nisse at lysator.liu.se>
date: Wed Mar 30 23:06:19 2011 +0200
description:
Added TODO comment.
details: /var/hg/gmp/rev/6617eb8e4b0f
changeset: 14117:6617eb8e4b0f
user: Niels M?ller <nisse at lysator.liu.se>
date: Wed Mar 30 23:08:12 2011 +0200
description:
Moved mpn_div_qr_2u_pi1 to a separate file.
details: /var/hg/gmp/rev/4b7ba5f71347
changeset: 14118:4b7ba5f71347
user: Niels M?ller <nisse at lysator.liu.se>
date: Wed Mar 30 23:13:48 2011 +0200
description:
x86_64 implementation of mpn_div_qr_2u_pi1.
details: /var/hg/gmp/rev/1f26709da9c1
changeset: 14119:1f26709da9c1
user: Niels M?ller <nisse at lysator.liu.se>
date: Sat Apr 09 13:25:22 2011 +0200
description:
Fixed include guard #endif
diffstat:
ChangeLog | 17 +++-
configure.in | 2 +-
gmp-impl.h | 3 +
mpn/generic/div_qr_2.c | 32 -------
mpn/generic/div_qr_2u_pi1.c | 66 +++++++++++++++
mpn/x86_64/div_qr_2n_pi1.asm | 5 +
mpn/x86_64/div_qr_2u_pi1.asm | 189 +++++++++++++++++++++++++++++++++++++++++++
tune/speed.h | 6 +-
8 files changed, 283 insertions(+), 37 deletions(-)
diffs (truncated from 398 to 300 lines):
diff -r 0594e395043e -r 1f26709da9c1 ChangeLog
--- a/ChangeLog Wed Mar 30 14:09:06 2011 +0200
+++ b/ChangeLog Sat Apr 09 13:25:22 2011 +0200
@@ -1,5 +1,20 @@
2011-03-30 Niels Möller <nisse at lysator.liu.se>
+ * mpn/x86_64/div_qr_2u_pi1.asm: New file.
+
+ * configure.in (gmp_mpn_functions): Add div_qr_2u_pi1.
+
+ * gmp-impl.h (mpn_div_qr_2u_pi1): Declare.
+
+ * mpn/generic/div_qr_2u_pi1.c (mpn_div_qr_2u_pi1): Moved to
+ separate file, from...
+ * mpn/generic/div_qr_2.c: ... old location.
+
+ * mpn/generic/div_qr_2n_pi1.c: Renamed file, from...
+ * mpn/generic/div_qr_2_pi1_norm.c: ...old name.
+ * mpn/x86_64/div_qr_2n_pi1.asm: Renamed file, from...
+ * mpn/x86_64/div_qr_2_pi1_norm.asm: ...old name.
+
* gmp-impl.h (mpn_div_qr_2n_pi1): Use new name in declaration.
* tune/speed.h (speed_mpn_div_qr_2n): Likewise.
(speed_mpn_div_qr_2u): Likewise.
@@ -70,7 +85,7 @@
* gmp-impl.h (mpn_div_qr_2_pi1_norm): Declare.
- * configure.in: Added div_qr_2_pi1_norm.
+ * configure.in (gmp_mpn_functions): Added div_qr_2_pi1_norm.
2011-03-22 Torbjorn Granlund <tege at gmplib.org>
diff -r 0594e395043e -r 1f26709da9c1 configure.in
--- a/configure.in Wed Mar 30 14:09:06 2011 +0200
+++ b/configure.in Sat Apr 09 13:25:22 2011 +0200
@@ -2547,7 +2547,7 @@
toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts \
toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts \
invertappr invert binvert mulmod_bnm1 sqrmod_bnm1 \
- div_qr_2 div_qr_2n_pi1 \
+ div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1 \
sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \
dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q \
mu_div_qr mu_divappr_q mu_div_q \
diff -r 0594e395043e -r 1f26709da9c1 gmp-impl.h
--- a/gmp-impl.h Wed Mar 30 14:09:06 2011 +0200
+++ b/gmp-impl.h Sat Apr 09 13:25:22 2011 +0200
@@ -1298,6 +1298,9 @@
#define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
__GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t));
+#define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
+ __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t));
+
#define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
diff -r 0594e395043e -r 1f26709da9c1 mpn/generic/div_qr_2.c
--- a/mpn/generic/div_qr_2.c Wed Mar 30 14:09:06 2011 +0200
+++ b/mpn/generic/div_qr_2.c Sat Apr 09 13:25:22 2011 +0200
@@ -268,38 +268,6 @@
return qh;
}
-static mp_limb_t
-mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
- mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
-{
- mp_limb_t qh;
- mp_limb_t r2, r1, r0;
- mp_size_t i;
-
- ASSERT (nn >= 2);
- ASSERT (d1 & GMP_NUMB_HIGHBIT);
- ASSERT (shift > 0);
-
- r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
- r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
- r0 = np[nn-2] << shift;
-
- udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
-
- for (i = nn - 2 - 1; i >= 0; i--)
- {
- mp_limb_t q;
- r0 = np[i];
- r1 |= r0 >> (GMP_LIMB_BITS - shift);
- r0 <<= shift;
- udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
- qp[i] = q;
- }
-
- rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
- rp[1] = r2 >> shift;
- return qh;
-}
/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
significant quotient limbs at qp and the 2 long remainder at np.
diff -r 0594e395043e -r 1f26709da9c1 mpn/generic/div_qr_2u_pi1.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/generic/div_qr_2u_pi1.c Sat Apr 09 13:25:22 2011 +0200
@@ -0,0 +1,66 @@
+/* mpn_div_qr_2u_pi1
+
+ Contributed to the GNU project by Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE.
+
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for unnormalized divisor */
+mp_limb_t
+mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
+{
+ mp_limb_t qh;
+ mp_limb_t r2, r1, r0;
+ mp_size_t i;
+
+ ASSERT (nn >= 2);
+ ASSERT (!(d1 & GMP_NUMB_HIGHBIT));
+ ASSERT (shift > 0);
+
+ r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
+ r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
+ r0 = np[nn-2] << shift;
+
+ udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
+
+ for (i = nn - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ r0 = np[i];
+ r1 |= r0 >> (GMP_LIMB_BITS - shift);
+ r0 <<= shift;
+ udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
+ qp[i] = q;
+ }
+
+ rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
+ rp[1] = r2 >> shift;
+
+ return qh;
+}
diff -r 0594e395043e -r 1f26709da9c1 mpn/x86_64/div_qr_2n_pi1.asm
--- a/mpn/x86_64/div_qr_2n_pi1.asm Wed Mar 30 14:09:06 2011 +0200
+++ b/mpn/x86_64/div_qr_2n_pi1.asm Sat Apr 09 13:25:22 2011 +0200
@@ -40,6 +40,11 @@
define(`t0', `%r14')
define(`md1', `%r15')
+C TODO
+C * Store qh in the same stack slot as di_param, instead of pushing
+C it. (we could put it in register %rbp, but then we would need to
+C save and restore that instead, which doesn't seem like a win).
+
ASM_START()
TEXT
ALIGN(16)
diff -r 0594e395043e -r 1f26709da9c1 mpn/x86_64/div_qr_2u_pi1.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/div_qr_2u_pi1.asm Sat Apr 09 13:25:22 2011 +0200
@@ -0,0 +1,189 @@
+dnl x86-64 mpn_div_qr_2u_pi1
+dnl -- Divide an mpn number by an unnormalized 2-limb number,
+dnl using a single-limb inverse and shifting the dividend on the fly.
+
+dnl Copyright 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C c/l
+C INPUT PARAMETERS
+define(`qp', `%rdi')
+define(`rp', `%rsi')
+define(`up_param', `%rdx')
+define(`un_param', `%rcx') dnl %rcx needed for shift count
+define(`d1', `%r8')
+define(`d0', `%r9')
+define(`shift_param', `FRAME+8(%rsp)')
+define(`di_param', `FRAME+16(%rsp)')
+
+define(`di', `%r10')
+define(`up', `%r11')
+define(`un', `%rbp')
+define(`u2', `%rbx')
+define(`u1', `%r12')
+define(`u0', `%rsi') dnl Same as rp, which is saved and restored.
+define(`t1', `%r13')
+define(`t0', `%r14')
+define(`md1', `%r15')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME', 0)
+PROLOGUE(mpn_div_qr_2u_pi1)
+ mov di_param, di
+ mov up_param, up
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %rbx
+ push %rbp
+ push rp
+deflit(`FRAME', 56)
+ lea -2(un_param), un
+ mov d1, md1
+ neg md1
+
+ C int parameter, 32 bits only
+ movl shift_param, R32(%rcx)
+
+ C FIXME: Different code for SHLD_SLOW
+
+ xor R32(u2), R32(u2)
+ mov 8(up, un, 8), u1
+ shld %cl, u1, u2
+ C Remains to read (up, un, 8) and shift u1, u0
+ C udiv_qr_3by2 (qh,u2,u1,u2,u1,n0, d1,d0,di)
+ mov di, %rax
+ mul u2
+ mov (up, un, 8), u0
+ shld %cl, u0, u1
+ mov u1, t0
+ add %rax, t0 C q0 in t0
+ adc u2, %rdx
+ mov %rdx, t1 C q in t1
+ imul md1, %rdx
+ mov d0, %rax
+ lea (%rdx, u1), u2
+ mul t1
+ mov u0, u1
+ shl %cl, u1
+ sub d0, u1
+ sbb d1, u2
+ sub %rax, u1
+ sbb %rdx, u2
+ xor R32(%rax), R32(%rax)
+ xor R32(%rdx), R32(%rdx)
+ cmp t0, u2
+ cmovnc d0, %rax
+ cmovnc d1, %rdx
+ adc $0, t1
+ nop
+ add %rax, u1
+ adc %rdx, u2
+ cmp d1, u2
+ jae L(fix_qh)
+L(bck_qh):
+ push t1 C push qh on stack
+
+ jmp L(next)
+
+ ALIGN(16)
+L(loop):
+ C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
More information about the gmp-commit
mailing list