[Gmp-commit] /var/hg/gmp: 4 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue May 29 17:53:39 CEST 2012


details:   /var/hg/gmp/rev/5658de0bc96f
changeset: 15020:5658de0bc96f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue May 29 17:46:53 2012 +0200
description:
mpn/x86_64/fat/fat.c: small formula clarification.

details:   /var/hg/gmp/rev/4f686080eb56
changeset: 15021:4f686080eb56
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue May 29 17:52:41 2012 +0200
description:
mpz/remove.c: Optimise branches.

details:   /var/hg/gmp/rev/67c759ab9acd
changeset: 15022:67c759ab9acd
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue May 29 17:53:04 2012 +0200
description:
Trivial merge.

details:   /var/hg/gmp/rev/5e5e751d845b
changeset: 15023:5e5e751d845b
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue May 29 17:53:36 2012 +0200
description:
Changelog

diffstat:

 ChangeLog              |    8 +++
 mpn/arm/v5/mod_1_1.asm |  117 +++++++++++++++++++++++++++++++++++++++++++++++++
 mpn/x86_64/fat/fat.c   |    2 +-
 mpz/remove.c           |  100 +++++++++++++++++++++++++----------------
 4 files changed, 186 insertions(+), 41 deletions(-)

diffs (287 lines):

diff -r eeff898dd3a6 -r 5e5e751d845b ChangeLog
--- a/ChangeLog	Mon May 28 10:33:01 2012 +0200
+++ b/ChangeLog	Tue May 29 17:53:36 2012 +0200
@@ -1,3 +1,11 @@
+2012-05-29 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* mpz/remove.c: Optimise branches.
+
+2012-05-29  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/arm/v5/mod_1_1.asm: New file.
+
 2012-05-28  Niels Möller  <nisse at lysator.liu.se>
 
 	* mpn/generic/gcdext.c (compute_v): Simplified carry handling a
diff -r eeff898dd3a6 -r 5e5e751d845b mpn/arm/v5/mod_1_1.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/arm/v5/mod_1_1.asm	Tue May 29 17:53:36 2012 +0200
@@ -0,0 +1,117 @@
+dnl  ARM mpn_mod_1_1p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 7
+C Cortex-A15	 ?
+
+define(`ap', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1_1p)
+	push	{r4-r10}
+	add	r0, r0, r1, asl #2
+	ldr	r5, [r0, #-4]!
+	ldr	r12, [r0, #-4]!
+	subs	r1, r1, #2
+	ble	L(4)
+	ldr	r8, [r3, #12]
+	mov	r4, r12
+	mov	r10, r5
+	umull	r7, r5, r10, r8
+	sub	r1, r1, #1
+	b	L(mid)
+
+L(top):	adds	r12, r6, r7
+	adcs	r10, r4, r5
+	sub	r1, r1, #1
+	mov	r6, #0
+	movcs	r6, r8
+	umull	r7, r5, r10, r8
+	adds	r4, r12, r6
+	subcs	r4, r4, r2
+L(mid):	ldr	r6, [r0, #-4]!
+	teq	r1, #0
+	bne	L(top)
+
+	adds	r12, r6, r7
+	adcs	r5, r4, r5
+	subcs	r5, r5, r2
+L(4):	ldr	r1, [r3, #4]
+	cmp	r1, #0
+	beq	L(7)
+	ldr	r4, [r3, #8]
+	umull	r0, r6, r5, r4
+	adds	r12, r0, r12
+	addcs	r6, r6, #1
+	rsb	r0, r1, #32
+	mov	r0, r12, lsr r0
+	orr	r5, r0, r6, asl r1
+	mov	r12, r12, asl r1
+	b	L(8)
+L(7):	cmp	r5, r2
+	subcs	r5, r5, r2
+L(8):	ldr	r0, [r3, #0]
+	umull	r4, r3, r5, r0
+	add	r5, r5, #1
+	adds	r0, r4, r12
+	adc	r5, r3, r5
+	mul	r5, r2, r5
+	sub	r12, r12, r5
+	cmp	r12, r0
+	addhi	r12, r12, r2
+	cmp	r2, r12
+	subls	r12, r12, r2
+	mov	r0, r12, lsr r1
+	pop	{r4-r10}
+	bx	r14
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+	stmfd	sp!, {r4, r5, r6, r14}
+	mov	r5, r0
+	clz	r4, r1
+	mov	r0, r1, asl r4
+	rsb	r6, r0, #0
+	bl	mpn_invert_limb
+	str	r0, [r5, #0]
+	str	r4, [r5, #4]
+	cmp	r4, #0
+	beq	L(2)
+	rsb	r1, r4, #32
+	mov	r3, #1
+	mov	r3, r3, asl r4
+	orr	r3, r3, r0, lsr r1
+	mul	r3, r6, r3
+	mov	r4, r3, lsr r4
+	str	r4, [r5, #8]
+L(2):	mul	r0, r6, r0
+	str	r0, [r5, #12]
+	ldmfd	sp!, {r4, r5, r6, pc}
+EPILOGUE()
diff -r eeff898dd3a6 -r 5e5e751d845b mpn/x86_64/fat/fat.c
--- a/mpn/x86_64/fat/fat.c	Mon May 28 10:33:01 2012 +0200
+++ b/mpn/x86_64/fat/fat.c	Tue May 29 17:53:36 2012 +0200
@@ -206,7 +206,7 @@
 
   /* Check extended feature flags */
   __gmpn_cpuid (dummy_string, 0x80000001);
-  if ((dummy_string[4 + 29 / 8] & (1 << (29 - 3 * 8))) == 0)
+  if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
     abort (); /* longmode-capable-bit turned off! */
 
   /*********************************************************/
diff -r eeff898dd3a6 -r 5e5e751d845b mpz/remove.c
--- a/mpz/remove.c	Mon May 28 10:33:01 2012 +0200
+++ b/mpz/remove.c	Tue May 29 17:53:36 2012 +0200
@@ -1,6 +1,6 @@
 /* mpz_remove -- divide out a factor and return its multiplicity.
 
-Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -23,45 +23,64 @@
 mp_bitcnt_t
 mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
 {
-  mpz_t fpow[GMP_LIMB_BITS];		/* Really MP_SIZE_T_BITS */
-  mpz_t x, rem;
   mp_bitcnt_t pwr;
-  int p;
+  mp_srcptr fp;
+  mp_size_t sn, fn, afn;
+  mp_limb_t fp0;
 
-  if (UNLIKELY (mpz_cmpabs_ui (f, 1) <= 0))
-    DIVIDE_BY_ZERO;
+  sn = SIZ (src);
+  fn = SIZ (f);
+  fp = PTR (f);
+  afn = ABS (fn);
+  fp0 = fp[0];
 
-  if (UNLIKELY (SIZ (src) == 0))
-    {
-      SIZ (dest) = 0;
-      return 0;
-    }
+  if (UNLIKELY ((afn <= (fp0 == 1)) /* mpz_cmpabs_ui (f, 1) <= 0 */
+		| (sn == 0))) {
+    /*  f = 0 or f = +- 1 or src = 0 */
+    if (afn == 0)
+      DIVIDE_BY_ZERO;
+    mpz_set (dest, src);
+    return 0;
+  }
 
-  if (mpz_cmpabs_ui (f, 2) == 0)
-    {
-      mp_bitcnt_t s0;
-      s0 = mpz_scan1 (src, 0);
-      mpz_div_2exp (dest, src, s0);
-      if (s0 & (SIZ (f) < 0)) /*((s0 % 2 == 1) && (SIZ (f) < 0))*/
-	mpz_neg (dest, dest);
-      return s0;
-    }
+#if 0
+  if ((fp0 & 1) == 1) { /* f is odd */
+    mp_ptr dp;
+    mp_size_t dn;
 
-  /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0).  It is an
-     upper bound of the result we're seeking.  We could also shift down the
-     operands so that they become odd, to make intermediate values smaller.  */
+    dn = ABS (sn);
+    dp = MPZ_REALLOC (dest, dn);
 
-  mpz_init (rem);
-  mpz_init (x);
+    pwr = mpn_remove (dp, &dn, PTR(src), dn, fp, afn, ~(mp_bitcnt_t) 0);
 
-  pwr = 0;
-  mpz_init (fpow[0]);
-  mpz_set (fpow[0], f);
-  mpz_set (dest, src);
+    SIZ (dest) = ((pwr & (fn < 0)) ^ (sn < 0)) ? -dn : dn;
+  } else
+#endif
+  if (afn == (fp0 == 2)) { /* mpz_cmpabs_ui (f, 2) == 0 */
+    pwr = mpz_scan1 (src, 0);
+    mpz_div_2exp (dest, src, pwr);
+    if (pwr & (SIZ (f) < 0)) /*((pwr % 2 == 1) && (SIZ (f) < 0))*/
+      mpz_neg (dest, dest);
+  } else { /* f != +-2 */
 
-  /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k).  */
-  for (p = 0;; p++)
-    {
+    mpz_t fpow[GMP_LIMB_BITS];		/* Really MP_SIZE_T_BITS */
+    mpz_t x, rem;
+    int p;
+
+    /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0).  It is an
+       upper bound of the result we're seeking.  We could also shift down the
+       operands so that they become odd, to make intermediate values smaller.  */
+
+    mpz_init (rem);
+    mpz_init (x);
+
+    pwr = 0;
+    mpz_init (fpow[0]);
+    mpz_set (fpow[0], f);
+    mpz_set (dest, src);
+
+    /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k).  */
+    for (p = 0;; p++) {
       mpz_tdiv_qr (x, rem, dest, fpow[p]);
       if (SIZ (rem) != 0)
 	break;
@@ -70,14 +89,13 @@
       mpz_set (dest, x);
     }
 
-  pwr = (1L << p) - 1;
+    pwr = (1L << p) - 1;
 
-  mpz_clear (fpow[p]);
+    mpz_clear (fpow[p]);
 
-  /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
-     zero remainder.  */
-  while (--p >= 0)
-    {
+    /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
+       zero remainder.  */
+    while (--p >= 0) {
       mpz_tdiv_qr (x, rem, dest, fpow[p]);
       if (SIZ (rem) == 0)
 	{
@@ -87,7 +105,9 @@
       mpz_clear (fpow[p]);
     }
 
-  mpz_clear (x);
-  mpz_clear (rem);
+    mpz_clear (x);
+    mpz_clear (rem);
+  }
+
   return pwr;
 }


More information about the gmp-commit mailing list