[Gmp-commit] /var/hg/gmp: Improved add_mssaaaa.

mercurial at gmplib.org mercurial at gmplib.org
Thu Mar 3 12:33:02 CET 2011


details:   /var/hg/gmp/rev/439c4cd236d7
changeset: 13989:439c4cd236d7
user:      Niels M?ller <nisse at lysator.liu.se>
date:      Thu Mar 03 12:32:58 2011 +0100
description:
Improved add_mssaaaa.

diffstat:

 ChangeLog             |   4 +++
 mpn/generic/mod_1_1.c |  61 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 47 insertions(+), 18 deletions(-)

diffs (104 lines):

diff -r 1b7872594f9b -r 439c4cd236d7 ChangeLog
--- a/ChangeLog	Thu Mar 03 11:47:56 2011 +0100
+++ b/ChangeLog	Thu Mar 03 12:32:58 2011 +0100
@@ -1,5 +1,9 @@
 2011-03-03  Niels Möller  <nisse at lysator.liu.se>
 
+	* mpn/generic/mod_1_1.c (add_mssaaaa): For x86 and x86_64, treat m
+	as in output operand only. Added sparc32 implementation. Also
+	added #if:ed out attempts at sparc64 and powerpc64.
+
 	* tune/tuneup.c (tune_mod_1): Record result of MOD_1_1P_METHOD
 	measurement for use by mpn_mod_1_tune. And omit measurement if
 	mpn_mod_1_1p is native assebly code.
diff -r 1b7872594f9b -r 439c4cd236d7 mpn/generic/mod_1_1.c
--- a/mpn/generic/mod_1_1.c	Thu Mar 03 11:47:56 2011 +0100
+++ b/mpn/generic/mod_1_1.c	Thu Mar 03 12:32:58 2011 +0100
@@ -40,36 +40,61 @@
 #if defined (__GNUC__)
 
 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
-#define add_mssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
-  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tsbb\t%k0, %k0"               \
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
-	   : "0"  ((USItype)(s2)),                                      \
-	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),                 \
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%6, %k2\n\tadc\t%4, %k1\n\tsbb\t%k0, %k0"		\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),                 \
 	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
 #endif
 
 #if defined (__amd64__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
-  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tsbb\t%q0, %q0"               \
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
-	   : "0"  ((UDItype)(s2)),                                      \
-	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),               \
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
+  __asm__ ("add\t%6, %q2\n\tadc\t%4, %q1\n\tsbb\t%q0, %q0"		\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
+	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),               \
 	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
 #endif
 
-/* FIXME: How to do carry -> mask on powerpc? */
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)			      \
+  __asm__ ("addcc %r5,%6,%2\n\taddxcc %r3,%4,%1\n\tsubx %%g0,%%g0,%0" \
+         : "=r" (m), "=r" (sh), "=&r" (sl)                            \
+         : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl)                 \
+         __CLOBBER_CC)
+#endif
+
+/* FIXME: Needs review and/or testing. */
+#if 0 && defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
+  __asm__ (								\
+      "addcc	%r5,%6,%2\n"						\
+      "	addccc	%r7,%8,%%g0\n"						\
+      "	addccc	%r3,%4,%1\n"						\
+      "	clr	%0\n"							\
+      "	movcs	%%xcc, -1, %0\n"					\
+       : "=r" (m),"=r" (sh), "=&r" (sl)					\
+       : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),			\
+	 "rJ" ((al) >> 32), "rI" ((bl) >> 32),				\
+	 __CLOBBER_CC)
+#endif
+
+/* FIXME: Needs review and/or testing. I don't understand why
+   constraints says s0 (%2) and a0 (%6) must share a register. */
 #if 0 && HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
-  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"        \
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
-	   : "0"  ((UDItype)(s2)),                                      \
-	     "r"  ((UDItype)(a1)), "r" ((UDItype)(b1)),                 \
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ (								\
+       "add%I6c	%2,%5,%6\n"						\
+      "	adde	%1,%3,%4\n"						\
+      "	subfe	%0,%0,%0\n"						\
+      "	subfic	%0, %0, -1"							\
+	   : "=r" (m), "=r" (s1), "=&r" (s0)                          \
+	   : "r"  ((UDItype)(a1)), "r" ((UDItype)(b1)),                 \
 	     "%2" ((UDItype)(a0)), "rI" ((UDItype)(b0)))
 #endif
 #endif /* defined (__GNUC__) */
 
 #ifndef add_mssaaaa
-#define add_mssaaaa(s2, s1, s0, a1, a0, b1, b0)                         \
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
   do {                                                                  \
     UWtype __s0, __s1, __c0, __c1;                                      \
     __s0 = (a0) + (b0);                                                 \
@@ -79,7 +104,7 @@
     (s0) = __s0;                                                        \
     __s1 = __s1 + __c0;                                                 \
     (s1) = __s1;                                                        \
-    (s2) = - (__c1 + (__s1 < __c0));					\
+    (m) = - (__c1 + (__s1 < __c0));					\
   } while (0)
 #endif
 


More information about the gmp-commit mailing list