mpn_cnd_add_n
Niels Möller
nisse at lysator.liu.se
Thu Mar 7 10:41:20 CET 2013
nisse at lysator.liu.se (Niels Möller) writes:
> Not much harder, I hope. There are three assembly implementations, as
> far as I see, x86_64, powerpc64/mode64, and arm. x86_64 and powerpc64
> should need just permuting the m4 names for registers, while arm needs
> one or two additional lines changed since the final argument is passed
> on the stack.
Here's a patch that reorders the arguments for mpn_addcnd_n and
mpn_subcnd_n (I think it's best to keep this change separate from the
renaming, since the potential problems are quite different).
It's tested on x86_64, arm, and with --disable-assembly. I've run a
regular make check and tests/devel/try, and tune/speed. I haven't tested
the (apparently trivial) powerpc64 change.
I you think this is ok, I'll check it in, and look into the renaming
next.
Regards,
/Niels
diff -r 293ed286d8cc gmp-impl.h
--- a/gmp-impl.h Thu Mar 07 07:45:57 2013 +0100
+++ b/gmp-impl.h Thu Mar 07 10:03:49 2013 +0100
@@ -1556,9 +1556,9 @@
#define mpn_tabselect __MPN(tabselect)
__GMP_DECLSPEC void mpn_tabselect (volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t);
#define mpn_addcnd_n __MPN(addcnd_n)
-__GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+__GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_subcnd_n __MPN(subcnd_n)
-__GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+__GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_sb_div_qr_sec __MPN(sb_div_qr_sec)
__GMP_DECLSPEC void mpn_sb_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
diff -r 293ed286d8cc mpn/arm/aorscnd_n.asm
--- a/mpn/arm/aorscnd_n.asm Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/arm/aorscnd_n.asm Thu Mar 07 10:03:49 2013 +0100
@@ -26,12 +26,13 @@
C Cortex-A9 2.5 slightly fluctuating
C Cortex-A15 ?
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n', `r3')
+define(`cnd', `r0')
+define(`rp', `r1')
+define(`up', `r2')
+define(`vp', `r3')
-define(`cnd', `r12')
+define(`n', `r12')
+
ifdef(`OPERATION_addcnd_n', `
define(`ADDSUB', adds)
@@ -52,7 +53,7 @@
ASM_START()
PROLOGUE(func)
push {r4-r11}
- ldr cnd, [sp, #32]
+ ldr n, [sp, #32]
INITCY C really only needed for n = 0 (mod 4)
diff -r 293ed286d8cc mpn/generic/addcnd_n.c
--- a/mpn/generic/addcnd_n.c Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/addcnd_n.c Thu Mar 07 10:03:49 2013 +0100
@@ -28,7 +28,7 @@
#include "gmp-impl.h"
mp_limb_t
-mpn_addcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+mpn_addcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
diff -r 293ed286d8cc mpn/generic/powm_sec.c
--- a/mpn/generic/powm_sec.c Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/powm_sec.c Thu Mar 07 10:03:49 2013 +0100
@@ -63,7 +63,7 @@
do { \
mp_limb_t cy; \
cy = mpn_redc_1 (rp, up, mp, n, invm); \
- mpn_subcnd_n (rp, rp, mp, n, cy); \
+ mpn_subcnd_n (cy, rp, rp, mp, n); \
} while (0)
#undef MPN_REDC_2_SEC
@@ -71,7 +71,7 @@
do { \
mp_limb_t cy; \
cy = mpn_redc_2 (rp, up, mp, n, mip); \
- mpn_subcnd_n (rp, rp, mp, n, cy); \
+ mpn_subcnd_n (cy, rp, rp, mp, n); \
} while (0)
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
@@ -418,7 +418,7 @@
MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
#endif
cnd = mpn_sub_n (tp, rp, mp, n); /* we need just retval */
- mpn_subcnd_n (rp, rp, mp, n, !cnd);
+ mpn_subcnd_n (!cnd, rp, rp, mp, n);
}
mp_size_t
diff -r 293ed286d8cc mpn/generic/sbpi1_div_sec.c
--- a/mpn/generic/sbpi1_div_sec.c Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/sbpi1_div_sec.c Thu Mar 07 10:03:49 2013 +0100
@@ -64,7 +64,7 @@
if (nn == dn)
{
cy = mpn_sub_n (np, np, dp, dn);
- mpn_addcnd_n (np, np, dp, dn, cy);
+ mpn_addcnd_n (cy, np, np, dp, dn);
#if OPERATION_sbpi1_div_qr_sec
return 1 - cy;
#else
@@ -124,7 +124,7 @@
#if OPERATION_sbpi1_div_qr_sec
qlp[0] += h;
#endif
- h -= mpn_subcnd_n (np, np, dp, dn, h);
+ h -= mpn_subcnd_n (h, np, np, dp, dn);
/* 2nd adjustment depends on remainder/divisor comparision as well as whether
extra remainder limb was nullified by previous subtract. */
@@ -133,7 +133,7 @@
#if OPERATION_sbpi1_div_qr_sec
qlp[0] += cy;
#endif
- mpn_addcnd_n (np, np, dp, dn, 1 - cy);
+ mpn_addcnd_n (1 - cy, np, np, dp, dn);
/* Combine quotient halves into final quotient. */
#if OPERATION_sbpi1_div_qr_sec
diff -r 293ed286d8cc mpn/generic/subcnd_n.c
--- a/mpn/generic/subcnd_n.c Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/generic/subcnd_n.c Thu Mar 07 10:03:49 2013 +0100
@@ -28,7 +28,7 @@
#include "gmp-impl.h"
mp_limb_t
-mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+mpn_subcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
diff -r 293ed286d8cc mpn/powerpc64/mode64/aorscnd_n.asm
--- a/mpn/powerpc64/mode64/aorscnd_n.asm Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/powerpc64/mode64/aorscnd_n.asm Thu Mar 07 10:03:49 2013 +0100
@@ -28,11 +28,11 @@
C POWER7 ?
C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`vp', `r5')
-define(`n', `r6')
-define(`cnd', `r7')
+define(`cnd', `r3')
+define(`rp', `r4')
+define(`up', `r5')
+define(`vp', `r6')
+define(`n', `r7')
ifdef(`OPERATION_addcnd_n',`
define(ADDSUBC, adde)
diff -r 293ed286d8cc mpn/x86_64/aorscnd_n.asm
--- a/mpn/x86_64/aorscnd_n.asm Thu Mar 07 07:45:57 2013 +0100
+++ b/mpn/x86_64/aorscnd_n.asm Thu Mar 07 10:03:49 2013 +0100
@@ -44,11 +44,11 @@
C for any other processor.
C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n', `%rcx')
-define(`cnd', `%r8')
+define(`cnd', `%rdi')
+define(`rp', `%rsi')
+define(`up', `%rdx')
+define(`vp', `%rcx')
+define(`n', `%r8')
ifdef(`OPERATION_addcnd_n', `
define(ADDSUB, add)
diff -r 293ed286d8cc tests/devel/try.c
--- a/tests/devel/try.c Thu Mar 07 07:45:57 2013 +0100
+++ b/tests/devel/try.c Thu Mar 07 10:03:49 2013 +0100
@@ -2409,10 +2409,13 @@
case TYPE_RSBLSH2_NC:
case TYPE_ADD_NC:
case TYPE_SUB_NC:
+ e->retval = CALLING_CONVENTIONS (function)
+ (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
+ break;
case TYPE_ADDCND_N:
case TYPE_SUBCND_N:
e->retval = CALLING_CONVENTIONS (function)
- (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
+ (carry, e->d[0].p, e->s[0].p, e->s[1].p, size);
break;
case TYPE_ADD_ERR1_N:
case TYPE_SUB_ERR1_N:
diff -r 293ed286d8cc tests/refmpn.c
--- a/tests/refmpn.c Thu Mar 07 07:45:57 2013 +0100
+++ b/tests/refmpn.c Thu Mar 07 10:03:49 2013 +0100
@@ -597,7 +597,7 @@
}
mp_limb_t
-refmpn_addcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+refmpn_addcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
{
if (cnd != 0)
return refmpn_add_n (rp, s1p, s2p, size);
@@ -608,7 +608,7 @@
}
}
mp_limb_t
-refmpn_subcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+refmpn_subcnd_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
{
if (cnd != 0)
return refmpn_sub_n (rp, s1p, s2p, size);
diff -r 293ed286d8cc tests/tests.h
--- a/tests/tests.h Thu Mar 07 07:45:57 2013 +0100
+++ b/tests/tests.h Thu Mar 07 10:03:49 2013 +0100
@@ -162,8 +162,8 @@
int refmpf_validate_division (const char *, mpf_srcptr, mpf_srcptr, mpf_srcptr);
-mp_limb_t refmpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t refmpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_subcnd_n (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
mp_limb_t refmpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
mp_limb_t refmpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
diff -r 293ed286d8cc tune/common.c
--- a/tune/common.c Thu Mar 07 07:45:57 2013 +0100
+++ b/tune/common.c Thu Mar 07 10:03:49 2013 +0100
@@ -1133,12 +1133,12 @@
double
speed_mpn_addcnd_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addcnd_n (wp, xp, yp, s->size, 1));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addcnd_n (1, wp, xp, yp, s->size));
}
double
speed_mpn_subcnd_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_subcnd_n (wp, xp, yp, s->size, 1));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_subcnd_n (1, wp, xp, yp, s->size));
}
/* mpn_and_n etc can be macros and so have to be handled with
--
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list