[Gmp-commit] /var/hg/gmp: Update redc_1 and redc_2 to do the add_n but not th...

mercurial at gmplib.org mercurial at gmplib.org
Sun Feb 19 21:04:19 CET 2012


details:   /var/hg/gmp/rev/0237213c74f5
changeset: 14644:0237213c74f5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Sun Feb 19 21:04:16 2012 +0100
description:
Update redc_1 and redc_2 to do the add_n but not the conditional sub_n.

diffstat:

 ChangeLog              |  21 ++++++++++++++++++++-
 gmp-impl.h             |   4 ++--
 mpn/generic/powm.c     |  26 +++++++++++++++++---------
 mpn/generic/powm_sec.c |   3 +--
 mpn/generic/redc_1.c   |  12 ++++++++----
 mpn/generic/redc_2.c   |  11 ++++++-----
 mpn/generic/redc_n.c   |   2 +-
 mpn/x86_64/redc_1.asm  |  46 ++++++++++++++++++++++++++++++++--------------
 tests/refmpn.c         |   4 +++-
 tune/speed.h           |   6 ++++--
 10 files changed, 94 insertions(+), 41 deletions(-)

diffs (truncated from 395 to 300 lines):

diff -r 13f6af182a66 -r 0237213c74f5 ChangeLog
--- a/ChangeLog	Sat Feb 18 21:16:26 2012 +0100
+++ b/ChangeLog	Sun Feb 19 21:04:16 2012 +0100
@@ -1,3 +1,22 @@
+2012-02-19  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/generic/redc_1.c: Put back mpn_add_n call, return its carry.
+	Reintroduce previously removed RP argument.
+	* mpn/x86_64/redc_1.asm: Likewise.
+
+	* mpn/generic/redc_2.c:  Remove mpn_sub_n call, return carry from
+	mpn_add_n call.
+
+	* gmp-impl.h (mpn_redc_1, mpn_redc_2): Now return an mp_limb_t.
+
+	* tune/speed.h (SPEED_ROUTINE_REDC_1): Adopt to pass RP argument.
+
+	* tests/refmpn.c (refmpn_redc_1): Adopt to new redc_1 interface.
+
+	* mpn/generic/powm.c (MPN_REDC_1): Pass rp parameter to mpn_redc_1.
+	* mpn/generic/powm_sec.c (MPN_REDC_1_SEC): Likewise.
+	* mpn/generic/powm.c (MPN_REDC_2): New macro, use for mpn_redc_2.
+
 2012-02-18  Marc Glisse  <marc.glisse at inria.fr>
 
 	* gmpxx.h (std::common_type): New partial specialization in C++11.
@@ -645,7 +664,7 @@
 
 2011-11-07  Torbjorn Granlund  <tege at gmplib.org>
 
-	* mpn/generic/redc_1.c: Just reduce U uperand using Hensel norm, but
+	* mpn/generic/redc_1.c: Just reduce U operand using Hensel norm, but
 	not fully canonically; leave add_n and conditional sub_n to caller.
 	Therefore omit R argument.
 
diff -r 13f6af182a66 -r 0237213c74f5 gmp-impl.h
--- a/gmp-impl.h	Sat Feb 18 21:16:26 2012 +0100
+++ b/gmp-impl.h	Sun Feb 19 21:04:16 2012 +0100
@@ -1085,10 +1085,10 @@
 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 
 #define mpn_redc_1 __MPN(redc_1)
-__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 #define mpn_redc_2 __MPN(redc_2)
-__GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 #define mpn_redc_n __MPN(redc_n)
 __GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 
diff -r 13f6af182a66 -r 0237213c74f5 mpn/generic/powm.c
--- a/mpn/generic/powm.c	Sat Feb 18 21:16:26 2012 +0100
+++ b/mpn/generic/powm.c	Sun Feb 19 21:04:16 2012 +0100
@@ -78,8 +78,16 @@
 #define MPN_REDC_1(rp, up, mp, n, invm)					\
   do {									\
     mp_limb_t cy;							\
-    mpn_redc_1 (up, mp, n, invm);					\
-    cy = mpn_add_n (rp, up + n, up, n);					\
+    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
+    if (cy != 0)							\
+      mpn_sub_n (rp, rp, mp, n);					\
+  } while (0)
+
+#undef MPN_REDC_2
+#define MPN_REDC_2(rp, up, mp, n, mip)					\
+  do {									\
+    mp_limb_t cy;							\
+    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
     if (cy != 0)							\
       mpn_sub_n (rp, rp, mp, n);					\
   } while (0)
@@ -224,7 +232,7 @@
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    mpn_redc_2 (rp, tp, mp, n, mip);
+    MPN_REDC_2 (rp, tp, mp, n, mip);
 #else
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
@@ -241,7 +249,7 @@
       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-	mpn_redc_2 (this_pp, tp, mp, n, mip);
+	MPN_REDC_2 (this_pp, tp, mp, n, mip);
 #else
       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
@@ -343,7 +351,7 @@
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
 	      INNERLOOP;
 	    }
 	  else
@@ -353,7 +361,7 @@
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
 	      INNERLOOP;
 	    }
 	}
@@ -364,7 +372,7 @@
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
 	  INNERLOOP;
 	}
       else
@@ -421,7 +429,7 @@
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
 	  INNERLOOP;
 	}
       else
@@ -557,7 +565,7 @@
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    mpn_redc_2 (rp, tp, mp, n, mip);
+    MPN_REDC_2 (rp, tp, mp, n, mip);
 #else
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
diff -r 13f6af182a66 -r 0237213c74f5 mpn/generic/powm_sec.c
--- a/mpn/generic/powm_sec.c	Sat Feb 18 21:16:26 2012 +0100
+++ b/mpn/generic/powm_sec.c	Sun Feb 19 21:04:16 2012 +0100
@@ -60,8 +60,7 @@
 #define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
   do {									\
     mp_limb_t cy;							\
-    mpn_redc_1 (up, mp, n, invm);					\
-    cy = mpn_add_n (rp, up + n, up, n);					\
+    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
     mpn_subcnd_n (rp, rp, mp, n, cy);					\
   } while (0)
 
diff -r 13f6af182a66 -r 0237213c74f5 mpn/generic/redc_1.c
--- a/mpn/generic/redc_1.c	Sat Feb 18 21:16:26 2012 +0100
+++ b/mpn/generic/redc_1.c	Sun Feb 19 21:04:16 2012 +0100
@@ -1,10 +1,11 @@
-/* mpn_redc_1.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_1.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
    mp[] is n limbs; up[] is 2n limbs.
 
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009, 2012 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -24,8 +25,8 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 
-void
-mpn_redc_1 (mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+mp_limb_t
+mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
 {
   mp_size_t j;
   mp_limb_t cy;
@@ -40,4 +41,7 @@
       up[0] = cy;
       up++;
     }
+
+  cy = mpn_add_n (rp, up, up - n, n);
+  return cy;
 }
diff -r 13f6af182a66 -r 0237213c74f5 mpn/generic/redc_2.c
--- a/mpn/generic/redc_2.c	Sat Feb 18 21:16:26 2012 +0100
+++ b/mpn/generic/redc_2.c	Sun Feb 19 21:04:16 2012 +0100
@@ -1,10 +1,11 @@
-/* mpn_redc_2.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_2.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
    mp[] is n limbs; up[] is 2n limbs.
 
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2012 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
@@ -66,7 +67,7 @@
   } while (0)
 #endif
 
-void
+mp_limb_t
 mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
 {
   mp_limb_t q[2];
@@ -92,7 +93,7 @@
       up[n] = upn;
       up += 2;
     }
+
   cy = mpn_add_n (rp, up, up - n, n);
-  if (cy != 0)
-    mpn_sub_n (rp, rp, mp, n);
+  return cy;
 }
diff -r 13f6af182a66 -r 0237213c74f5 mpn/generic/redc_n.c
--- a/mpn/generic/redc_n.c	Sat Feb 18 21:16:26 2012 +0100
+++ b/mpn/generic/redc_n.c	Sun Feb 19 21:04:16 2012 +0100
@@ -1,4 +1,4 @@
-/* mpn_redc_n.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_n.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
    mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
 
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
diff -r 13f6af182a66 -r 0237213c74f5 mpn/x86_64/redc_1.asm
--- a/mpn/x86_64/redc_1.asm	Sat Feb 18 21:16:26 2012 +0100
+++ b/mpn/x86_64/redc_1.asm	Sun Feb 19 21:04:16 2012 +0100
@@ -1,6 +1,6 @@
 dnl  AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse.
 
-dnl  Copyright 2004, 2008, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2008, 2011, 2012 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -38,14 +38,16 @@
 C    outer loops and before the end of the outer loops.  The last outer
 C    loop iteration would then compute an unneeded product, but it is at
 C    least not a stray read from up[], since it is at up[n].
+C  * Make a tail call to mpn_add_n.
 
 C INPUT PARAMETERS
-define(`up',	  `%rdi')
-define(`mp',	  `%rsi')
-define(`n_param', `%rdx')
-define(`invm',	  `%rcx')
+define(`rp',	  `%rdi')
+define(`up',	  `%rsi')
+define(`mp_param',`%rdx')
+define(`n',	  `%rcx')
+define(`invm',	  `%r8')
 
-define(`n',	  `%r13')
+define(`mp',	  `%r13')
 define(`i',	  `%r11')
 define(`nneg',	  `%r12')
 
@@ -57,17 +59,17 @@
 	ALIGN(32)
 PROLOGUE(mpn_redc_1)
 	DOS64_ENTRY(4)
+IFDOS(`	mov	56(%rsp), %r8	')
 	push	%rbp
 	push	%rbx
 	push	%r12
 	push	%r13
 	push	%r14
 
-	lea	(mp,n_param,8), mp	C mp += n
-	lea	(up,n_param,8), up	C up += n
+	lea	(mp_param,n,8), mp	C mp += n
+	lea	(up,n,8), up		C up += n
 
-	mov	n_param, nneg
-	mov	n_param, n
+	mov	n, nneg
 	neg	nneg
 
 	mov	R32(n), R32(%rax)
@@ -135,7 +137,8 @@
 	add	$8, up
 	dec	n
 	jnz	L(o1)
-	jmp	L(ret)
+	lea	16(up,nneg,8), up
+	jmp	L(common)
 
 L(b0):	C lea	(mp), mp
 	lea	-16(up), up
@@ -187,7 +190,8 @@
 	add	$8, up
 	dec	n
 	jnz	L(o0)
-	jmp	L(ret)


More information about the gmp-commit mailing list