mpn_cnd_add_n

Niels Möller nisse at lysator.liu.se
Thu Mar 7 10:41:20 CET 2013


nisse at lysator.liu.se (Niels Möller) writes:

> Not much harder, I hope. There are three assembly implementations, as
> far as I see, x86_64, powerpc64/mode64, and arm. x86_64 and powerpc64
> should need just permuting the m4 names for registers, while arm needs
> one or two additional lines changed since the final argument is passed
> on the stack.

Here's a patch that reorders the arguments for mpn_addcnd_n and
mpn_subcnd_n (I think it's best to keep this change separate from the
renaming, since the potential problems are quite different).

It's tested on x86_64, arm, and with --disable-assembly. I've run a
regular make check and tests/devel/try, and tune/speed. I haven't tested
the (apparently trivial) powerpc64 change.

I you think this is ok, I'll check it in, and look into the renaming
next.

Regards,
/Niels

diff -r 293ed286d8cc gmp-impl.h
--- a/gmp-impl.h	Thu Mar 07 07:45:57 2013 +0100
+++ b/gmp-impl.h	Thu Mar 07 10:03:49 2013 +0100
@@ -1556,9 +1556,9 @@
 #define   mpn_tabselect __MPN(tabselect)
 __GMP_DECLSPEC void      mpn_tabselect (volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t);
 #define   mpn_addcnd_n __MPN(addcnd_n)
-__GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+__GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
 #define   mpn_subcnd_n __MPN(subcnd_n)
-__GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+__GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
 
 #define mpn_sb_div_qr_sec __MPN(sb_div_qr_sec)
 __GMP_DECLSPEC void mpn_sb_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
diff -r 293ed286d8cc mpn/arm/aorscnd_n.asm
--- a/mpn/arm/aorscnd_n.asm	Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/arm/aorscnd_n.asm	Thu Mar 07 10:03:49 2013 +0100
@@ -26,12 +26,13 @@
 C Cortex-A9	 2.5	slightly fluctuating
 C Cortex-A15	 ?
 
-define(`rp',	`r0')
-define(`up',	`r1')
-define(`vp',	`r2')
-define(`n',	`r3')
+define(`cnd',	`r0')
+define(`rp',	`r1')
+define(`up',	`r2')
+define(`vp',	`r3')
 
-define(`cnd',	`r12')
+define(`n',	`r12')
+
 
 ifdef(`OPERATION_addcnd_n', `
 	define(`ADDSUB',      adds)
@@ -52,7 +53,7 @@
 ASM_START()
 PROLOGUE(func)
 	push	{r4-r11}
-	ldr	cnd, [sp, #32]
+	ldr	n, [sp, #32]
 
 	INITCY				C really only needed for n = 0 (mod 4)
 
diff -r 293ed286d8cc mpn/generic/addcnd_n.c
--- a/mpn/generic/addcnd_n.c	Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/addcnd_n.c	Thu Mar 07 10:03:49 2013 +0100
@@ -28,7 +28,7 @@
 #include "gmp-impl.h"
 
 mp_limb_t
-mpn_addcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+mpn_addcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
 {
   mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
 
diff -r 293ed286d8cc mpn/generic/powm_sec.c
--- a/mpn/generic/powm_sec.c	Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/powm_sec.c	Thu Mar 07 10:03:49 2013 +0100
@@ -63,7 +63,7 @@
   do {									\
     mp_limb_t cy;							\
     cy = mpn_redc_1 (rp, up, mp, n, invm);				\
-    mpn_subcnd_n (rp, rp, mp, n, cy);					\
+    mpn_subcnd_n (cy, rp, rp, mp, n);					\
   } while (0)
 
 #undef MPN_REDC_2_SEC
@@ -71,7 +71,7 @@
   do {									\
     mp_limb_t cy;							\
     cy = mpn_redc_2 (rp, up, mp, n, mip);				\
-    mpn_subcnd_n (rp, rp, mp, n, cy);					\
+    mpn_subcnd_n (cy, rp, rp, mp, n);					\
   } while (0)
 
 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
@@ -418,7 +418,7 @@
   MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
 #endif
   cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
-  mpn_subcnd_n (rp, rp, mp, n, !cnd);
+  mpn_subcnd_n (!cnd, rp, rp, mp, n);
 }
 
 mp_size_t
diff -r 293ed286d8cc mpn/generic/sbpi1_div_sec.c
--- a/mpn/generic/sbpi1_div_sec.c	Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/sbpi1_div_sec.c	Thu Mar 07 10:03:49 2013 +0100
@@ -64,7 +64,7 @@
   if (nn == dn)
     {
       cy = mpn_sub_n (np, np, dp, dn);
-      mpn_addcnd_n (np, np, dp, dn, cy);
+      mpn_addcnd_n (cy, np, np, dp, dn);
 #if OPERATION_sbpi1_div_qr_sec
       return 1 - cy;
 #else
@@ -124,7 +124,7 @@
 #if OPERATION_sbpi1_div_qr_sec
   qlp[0] += h;
 #endif
-  h -= mpn_subcnd_n (np, np, dp, dn, h);
+  h -= mpn_subcnd_n (h, np, np, dp, dn);
 
   /* 2nd adjustment depends on remainder/divisor comparision as well as whether
      extra remainder limb was nullified by previous subtract.  */
@@ -133,7 +133,7 @@
 #if OPERATION_sbpi1_div_qr_sec
   qlp[0] += cy;
 #endif
-  mpn_addcnd_n (np, np, dp, dn, 1 - cy);
+  mpn_addcnd_n (1 - cy, np, np, dp, dn);
 
   /* Combine quotient halves into final quotient.  */
 #if OPERATION_sbpi1_div_qr_sec
diff -r 293ed286d8cc mpn/generic/subcnd_n.c
--- a/mpn/generic/subcnd_n.c	Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/subcnd_n.c	Thu Mar 07 10:03:49 2013 +0100
@@ -28,7 +28,7 @@
 #include "gmp-impl.h"
 
 mp_limb_t
-mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+mpn_subcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
 {
   mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
 
diff -r 293ed286d8cc mpn/powerpc64/mode64/aorscnd_n.asm
--- a/mpn/powerpc64/mode64/aorscnd_n.asm	Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/powerpc64/mode64/aorscnd_n.asm	Thu Mar 07 10:03:49 2013 +0100
@@ -28,11 +28,11 @@
 C POWER7                 ?
 
 C INPUT PARAMETERS
-define(`rp',   `r3')
-define(`up',   `r4')
-define(`vp',   `r5')
-define(`n',    `r6')
-define(`cnd',  `r7')
+define(`cnd',  `r3')
+define(`rp',   `r4')
+define(`up',   `r5')
+define(`vp',   `r6')
+define(`n',    `r7')
 
 ifdef(`OPERATION_addcnd_n',`
   define(ADDSUBC,	adde)
diff -r 293ed286d8cc mpn/x86_64/aorscnd_n.asm
--- a/mpn/x86_64/aorscnd_n.asm	Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/x86_64/aorscnd_n.asm	Thu Mar 07 10:03:49 2013 +0100
@@ -44,11 +44,11 @@
 C    for any other processor.
 
 C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`n',	`%rcx')
-define(`cnd',	`%r8')
+define(`cnd',	`%rdi')
+define(`rp',	`%rsi')
+define(`up',	`%rdx')
+define(`vp',	`%rcx')
+define(`n',	`%r8')
 
 ifdef(`OPERATION_addcnd_n', `
 	define(ADDSUB,	      add)
diff -r 293ed286d8cc tests/devel/try.c
--- a/tests/devel/try.c	Thu Mar 07 07:45:57 2013 +0100
+++ b/tests/devel/try.c	Thu Mar 07 10:03:49 2013 +0100
@@ -2409,10 +2409,13 @@
   case TYPE_RSBLSH2_NC:
   case TYPE_ADD_NC:
   case TYPE_SUB_NC:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
+    break;
   case TYPE_ADDCND_N:
   case TYPE_SUBCND_N:
     e->retval = CALLING_CONVENTIONS (function)
-      (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
+      (carry, e->d[0].p, e->s[0].p, e->s[1].p, size);
     break;
   case TYPE_ADD_ERR1_N:
   case TYPE_SUB_ERR1_N:
diff -r 293ed286d8cc tests/refmpn.c
--- a/tests/refmpn.c	Thu Mar 07 07:45:57 2013 +0100
+++ b/tests/refmpn.c	Thu Mar 07 10:03:49 2013 +0100
@@ -597,7 +597,7 @@
 }
 
 mp_limb_t
-refmpn_addcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+refmpn_addcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
 {
   if (cnd != 0)
     return refmpn_add_n (rp, s1p, s2p, size);
@@ -608,7 +608,7 @@
     }
 }
 mp_limb_t
-refmpn_subcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+refmpn_subcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
 {
   if (cnd != 0)
     return refmpn_sub_n (rp, s1p, s2p, size);
diff -r 293ed286d8cc tests/tests.h
--- a/tests/tests.h	Thu Mar 07 07:45:57 2013 +0100
+++ b/tests/tests.h	Thu Mar 07 10:03:49 2013 +0100
@@ -162,8 +162,8 @@
 int refmpf_validate_division (const char *, mpf_srcptr, mpf_srcptr, mpf_srcptr);
 
 
-mp_limb_t refmpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t refmpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_subcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
 
 mp_limb_t refmpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
 mp_limb_t refmpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
diff -r 293ed286d8cc tune/common.c
--- a/tune/common.c	Thu Mar 07 07:45:57 2013 +0100
+++ b/tune/common.c	Thu Mar 07 10:03:49 2013 +0100
@@ -1133,12 +1133,12 @@
 double
 speed_mpn_addcnd_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addcnd_n (wp, xp, yp, s->size, 1));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addcnd_n (1, wp, xp, yp, s->size));
 }
 double
 speed_mpn_subcnd_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_subcnd_n (wp, xp, yp, s->size, 1));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_subcnd_n (1, wp, xp, yp, s->size));
 }
 
 /* mpn_and_n etc can be macros and so have to be handled with



-- 
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.


More information about the gmp-devel mailing list