C implementation of mod_1_1

Niels Möller nisse at lysator.liu.se
Wed Mar 2 22:24:16 CET 2011


Torbjorn Granlund <tg at gmplib.org> writes:

> nisse at lysator.liu.se (Niels Möller) writes:

>   #if defined (__sparc__) && W_TYPE_SIZE == 32
>   #define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
>     __asm__ ("addcc %r5,%6,%2\n\taddxcc %r3,%4,%1\n\tsubx %%g0,%%g0,%0"	\
>   	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
>   	   : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl)			\
>   	   __CLOBBER_CC)
>   #endif
>
> This looks OK.

I've looked a bit further. It took me a good while before I found the
doc on the % modifier in constraints. BTW, the definition of add_ssaaaa
for sparc64 (line 1567) violates the gcc restriction that multiple %
modifiers are not supported.

> There is also a movcc, conditional move.  It can take xcc and icc as
> input.  I don't recall if it takes immediate operands as input.

It can take an immediate operand. So it's useful here.

It would be great if you could have a look at the below definitions,
with a particular look on

  1. Use of = vs &= in output operands (I think I have understood the
     meaning, but I'm not used to inline asm).

  2. The actual assembler code for sparc64 and powerpc64. 

  3. For powerpc, I have a strange matching constraint between s0 and a0
     (inherited from the add_sssaaaa you gave me). I can guess what the
     %I6c is intended to do, is that related? The add_ssaaaa in longlong.h
     uses some different tricks, but no matching constraint.

For x86 and x86_64, the main change is that m is now an output only, in
earlier versions derived from add_sssaaaa it was an input/output
operand.

/nisse

#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
  __asm__ ("add\t%6, %k2\n\tadc\t%4, %k1\n\tsbb\t%k0, %k0"		\
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),                 \
	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
#endif

#if defined (__amd64__) && W_TYPE_SIZE == 64
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
  __asm__ ("add\t%6, %q2\n\tadc\t%4, %q1\n\tsbb\t%q0, %q0"		\
	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),               \
	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
#endif

#if defined (__sparc__) && W_TYPE_SIZE == 32
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)			      \
  __asm__ ("addcc %r5,%6,%2\n\taddxcc %r3,%4,%1\n\tsubx %%g0,%%g0,%0" \
         : "=r" (m), "=r" (sh), "=&r" (sl)                            \
         : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl)                 \
         __CLOBBER_CC)
#endif

/* FIXME: Needs review and/or testing. */
#if 0 && defined (__sparc__) && W_TYPE_SIZE == 64
#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
  __asm__ (								\
      "addcc	%r5,%6,%2\n"						\
      "	addccc	%r7,%8,%%g0\n"						\
      "	addc	%r3,%4,%1\n"						\
      "	clr	%0\n"							\
      "	movcs	%%xcc, -1, %0\n"					\
       : "=r" (m),"=r" (sh), "=&r" (sl)					\
       : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),			\
	 "rJ" ((al) >> 32), "rI" ((bl) >> 32),				\
	 __CLOBBER_CC)
#endif

/* FIXME: Needs review and/or testing. I don't understand why
   constraints says s0 (%2) and a0 (%6) must share a register. */
#if 0 && HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64
#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
  __asm__ (								\
       "add%I6c	%2,%5,%6\n"						\
      "	adde	%1,%3,%4\n"						\
      "	subfe	%0,%0,%0\n"						\
      "	neg	%0, %0"							\
	   : "=r" (m), "=r" (s1), "=&r" (s0)                          \
	   : "r"  ((UDItype)(a1)), "r" ((UDItype)(b1)),                 \
	     "%2" ((UDItype)(a0)), "rI" ((UDItype)(b0)))
#endif


-- 
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.


More information about the gmp-devel mailing list