[Gmp-commit] /var/hg/gmp: 10 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Tue Aug 25 18:15:14 UTC 2015


details:   /var/hg/gmp/rev/3e1fa519fe2f
changeset: 16779:3e1fa519fe2f
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 19:36:14 2015 +0200
description:
configure.ac (AH_VERBATIM): Add HAVE_NATIVE_mpn_mullo_basecase.

details:   /var/hg/gmp/rev/b7a730f40961
changeset: 16780:b7a730f40961
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 19:48:44 2015 +0200
description:
mpn/generic/sqrlo.c (mpn_sqrlo): Use mullo_basecase when faster.

details:   /var/hg/gmp/rev/765e281e7c8b
changeset: 16781:765e281e7c8b
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 19:54:07 2015 +0200
description:
mpn/generic/sqrlo_basecase.c: More readable #defines.

details:   /var/hg/gmp/rev/9731557a574c
changeset: 16782:9731557a574c
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 19:59:27 2015 +0200
description:
Tuning code for SQRLO_*:THRESHOLDs.

details:   /var/hg/gmp/rev/a6f295869834
changeset: 16783:a6f295869834
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 20:00:43 2015 +0200
description:
SQRLO_*_THRESHOLDs values.

details:   /var/hg/gmp/rev/382af20d8bb9
changeset: 16784:382af20d8bb9
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 20:05:04 2015 +0200
description:
tune/tuneup.c (tune_mullo): Set MULLO_MUL_N_THRESHOLD to never
whenever the FFT threshold does not exist.

details:   /var/hg/gmp/rev/5558f4a25a00
changeset: 16785:5558f4a25a00
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 20:05:14 2015 +0200
description:
ChangeLog

details:   /var/hg/gmp/rev/196fd0baa17d
changeset: 16786:196fd0baa17d
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 20:10:51 2015 +0200
description:
mpn/minithres/gmp-mparam.h: New SQRLO_*_THRESHOLDs.

details:   /var/hg/gmp/rev/c1deadaeb6fe
changeset: 16787:c1deadaeb6fe
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 20:11:30 2015 +0200
description:
ChangeLog

details:   /var/hg/gmp/rev/7121e1053956
changeset: 16788:7121e1053956
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Tue Aug 25 20:15:04 2015 +0200
description:
Trivial merge

diffstat:

 ChangeLog                        |  24 ++++++++++++-
 configure.ac                     |   1 +
 gmp-impl.h                       |  34 ++++++++++++++++++
 mpn/generic/sqrlo.c              |  20 +++-------
 mpn/generic/sqrlo_basecase.c     |  35 +++++++++++-------
 mpn/minithres/gmp-mparam.h       |   4 ++
 mpn/x86_64/coreisbr/gmp-mparam.h |   3 +
 mpn/x86_64/x86_64-defs.m4        |  22 ++++++------
 tune/Makefile.am                 |   2 +-
 tune/tuneup.c                    |  73 +++++++++++++++++++++++++++++++++------
 10 files changed, 167 insertions(+), 51 deletions(-)

diffs (truncated from 430 to 300 lines):

diff -r 0fe018bd50ee -r 7121e1053956 ChangeLog
--- a/ChangeLog	Sun Aug 23 21:30:13 2015 +0200
+++ b/ChangeLog	Tue Aug 25 20:15:04 2015 +0200
@@ -1,3 +1,25 @@
+2015-08-25 Marco Bodrato <bodrato at mail.dm.unipi.it>
+
+	* configure.ac (AH_VERBATIM): Add HAVE_NATIVE_mpn_mullo_basecase.
+	* mpn/generic/sqrlo.c (mpn_sqrlo): Use mullo_basecase when faster.
+	* mpn/generic/sqrlo_basecase.c: More readable #defines.
+	
+	* tune/tuneup.c (tune_sqrlo): New function to tune sqrlo thresholds.
+	(all): Call it, after multiplication and FFT.
+	* tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add sqrlo{,_basecase}.c .
+	* gmp-impl.h: Add all SQRLO_*_THRESHOLD* defs, for tuning and default.
+	* mpn/generic/sqrlo.c: Remove default threshold definitions.
+	* mpn/generic/sqrlo_basecase.c: Use SQRLO_DC_THRESHOLD_LIMIT.
+	* mpn/minithres/gmp-mparam.h: New SQRLO_*_THRESHOLDs.
+
+	* tune/tuneup.c (tune_mullo): Set MULLO_MUL_N_THRESHOLD to never
+	whenever the FFT threshold does not exist.
+
+2015-08-25  Torbjörn Granlund  <torbjorng at google.com>
+
+	* mpn/x86_64/x86_64-defs.m4: Output computed numbers in base-10 instead
+	of base-16 to avoid bugs on Solaris, FreeBSD, and old NetBSD.
+
 2015-08-23  Torbjörn Granlund  <torbjorng at google.com>
 
 	* mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Add more entries, handle
@@ -19,7 +41,7 @@
 	* config.sub: Corresponding changes.
 	* configure.ac: Corresponding changes.
 	* mpn/x86_64/skylake/gmp-mparam.h: New file.
-	
+
 2015-08-15  Torbjörn Granlund  <torbjorng at google.com>
 
 	* mpn/generic/mullo_basecase.c: Provide alternative code, make default.
diff -r 0fe018bd50ee -r 7121e1053956 configure.ac
--- a/configure.ac	Sun Aug 23 21:30:13 2015 +0200
+++ b/configure.ac	Tue Aug 25 20:15:04 2015 +0200
@@ -3473,6 +3473,7 @@
 #undef HAVE_NATIVE_mpn_mul_5
 #undef HAVE_NATIVE_mpn_mul_6
 #undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_mullo_basecase
 #undef HAVE_NATIVE_mpn_nand_n
 #undef HAVE_NATIVE_mpn_nior_n
 #undef HAVE_NATIVE_mpn_popcount
diff -r 0fe018bd50ee -r 7121e1053956 gmp-impl.h
--- a/gmp-impl.h	Sun Aug 23 21:30:13 2015 +0200
+++ b/gmp-impl.h	Tue Aug 25 20:15:04 2015 +0200
@@ -2090,6 +2090,12 @@
 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT
 #define MULLO_BASECASE_THRESHOLD_LIMIT  MULLO_BASECASE_THRESHOLD
 #endif
+#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
+#define SQRLO_BASECASE_THRESHOLD_LIMIT  SQRLO_BASECASE_THRESHOLD
+#endif
+#ifndef SQRLO_DC_THRESHOLD_LIMIT
+#define SQRLO_DC_THRESHOLD_LIMIT  SQRLO_DC_THRESHOLD
+#endif
 
 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
    mpn_mul_basecase.  Default is to use mpn_sqr_basecase from 0.  (Note that we
@@ -2138,6 +2144,18 @@
 #define MULLO_MUL_N_THRESHOLD      (2*MUL_FFT_THRESHOLD)
 #endif
 
+#ifndef SQRLO_BASECASE_THRESHOLD
+#define SQRLO_BASECASE_THRESHOLD          0  /* never use mpn_sqr_basecase */
+#endif
+
+#ifndef SQRLO_DC_THRESHOLD
+#define SQRLO_DC_THRESHOLD         (MULLO_DC_THRESHOLD)
+#endif
+
+#ifndef SQRLO_SQR_THRESHOLD
+#define SQRLO_SQR_THRESHOLD        (MULLO_MUL_N_THRESHOLD)
+#endif
+
 #ifndef DC_DIV_QR_THRESHOLD
 #define DC_DIV_QR_THRESHOLD        (2*MUL_TOOM22_THRESHOLD)
 #endif
@@ -4789,6 +4807,18 @@
 #define MULLO_MUL_N_THRESHOLD		mullo_mul_n_threshold
 extern mp_size_t			mullo_mul_n_threshold;
 
+#undef	SQRLO_BASECASE_THRESHOLD
+#define SQRLO_BASECASE_THRESHOLD	sqrlo_basecase_threshold
+extern mp_size_t			sqrlo_basecase_threshold;
+
+#undef	SQRLO_DC_THRESHOLD
+#define SQRLO_DC_THRESHOLD		sqrlo_dc_threshold
+extern mp_size_t			sqrlo_dc_threshold;
+
+#undef	SQRLO_SQR_THRESHOLD
+#define SQRLO_SQR_THRESHOLD		sqrlo_sqr_threshold
+extern mp_size_t			sqrlo_sqr_threshold;
+
 #undef	MULMID_TOOM42_THRESHOLD
 #define MULMID_TOOM42_THRESHOLD		mulmid_toom42_threshold
 extern mp_size_t			mulmid_toom42_threshold;
@@ -4985,6 +5015,8 @@
 #undef MUL_TOOM22_THRESHOLD_LIMIT
 #undef MUL_TOOM33_THRESHOLD_LIMIT
 #undef MULLO_BASECASE_THRESHOLD_LIMIT
+#undef SQRLO_BASECASE_THRESHOLD_LIMIT
+#undef SQRLO_DC_THRESHOLD_LIMIT
 #undef SQR_TOOM3_THRESHOLD_LIMIT
 #define SQR_TOOM2_MAX_GENERIC           200
 #define MUL_TOOM22_THRESHOLD_LIMIT      700
@@ -4997,6 +5029,8 @@
 #define MUL_TOOM8H_THRESHOLD_LIMIT     1200
 #define SQR_TOOM8_THRESHOLD_LIMIT      1200
 #define MULLO_BASECASE_THRESHOLD_LIMIT  200
+#define SQRLO_BASECASE_THRESHOLD_LIMIT  200
+#define SQRLO_DC_THRESHOLD_LIMIT        400
 #define GET_STR_THRESHOLD_LIMIT         150
 #define FAC_DSC_THRESHOLD_LIMIT        2048
 
diff -r 0fe018bd50ee -r 7121e1053956 mpn/generic/sqrlo.c
--- a/mpn/generic/sqrlo.c	Sun Aug 23 21:30:13 2015 +0200
+++ b/mpn/generic/sqrlo.c	Tue Aug 25 20:15:04 2015 +0200
@@ -38,19 +38,6 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 
-#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
-#define SQRLO_BASECASE_THRESHOLD_LIMIT	200
-#endif
-#ifndef SQRLO_BASECASE_THRESHOLD
-#define SQRLO_BASECASE_THRESHOLD	0
-#endif
-#ifndef SQRLO_DC_THRESHOLD
-#define SQRLO_DC_THRESHOLD		(2*SQR_TOOM2_THRESHOLD)
-#endif
-#ifndef SQRLO_SQR_THRESHOLD
-#define SQRLO_SQR_THRESHOLD		(2*SQR_FFT_THRESHOLD)
-#endif
-
 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
 #define MAYBE_range_basecase 1
 #define MAYBE_range_toom22   1
@@ -211,10 +198,17 @@
 
   if (BELOW_THRESHOLD (n, SQRLO_BASECASE_THRESHOLD))
     {
+      /* FIXME: smarter criteria? */
+#if HAVE_NATIVE_mpn_mullo_basecase || ! HAVE_NATIVE_mpn_sqr_basecase
+      /* mullo computes as many products as sqr, but directly writes
+	 on the result area. */
+      mpn_mullo_basecase (rp, xp, xp, n);      
+#else
       /* Allocate workspace of fixed size on stack: fast! */
       mp_limb_t tp[SQR_BASECASE_ALLOC];
       mpn_sqr_basecase (tp, xp, n);
       MPN_COPY (rp, tp, n);
+#endif
     }
   else if (BELOW_THRESHOLD (n, SQRLO_DC_THRESHOLD))
     {
diff -r 0fe018bd50ee -r 7121e1053956 mpn/generic/sqrlo_basecase.c
--- a/mpn/generic/sqrlo_basecase.c	Sun Aug 23 21:30:13 2015 +0200
+++ b/mpn/generic/sqrlo_basecase.c	Tue Aug 25 20:15:04 2015 +0200
@@ -38,6 +38,13 @@
 #include "gmp-impl.h"
 #include "longlong.h"
 
+#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
+#if HAVE_NATIVE_mpn_addmul_1
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 0
+#else
+#define SQRLO_SHORTCUT_MULTIPLICATIONS 1
+#endif
+#endif
 
 #if HAVE_NATIVE_mpn_sqr_diagonal
 #define MPN_SQR_DIAGONAL(rp, up, n)					\
@@ -56,8 +63,7 @@
   } while (0)
 #endif
 
-#if HAVE_NATIVE_mpn_addlsh1_n_ip1
-#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
+#define MPN_SQRLO_DIAGONAL(rp, up, n)					\
   do {									\
     mp_size_t nhalf;							\
     nhalf = (n) >> 1;							\
@@ -68,25 +74,26 @@
 	op = (up)[nhalf];						\
 	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
       }									\
+  } while (0)
+
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
+  do {									\
+    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
     mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
   } while (0)
 #else
 #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
   do {									\
-    mp_size_t nhalf;							\
-    nhalf = (n) >> 1;							\
-    MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
-    if (((n) & 1) != 0)							\
-      {									\
-	mp_limb_t op;							\
-	op = (up)[nhalf];						\
-	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
-      }									\
+    MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
     mpn_lshift ((tp), (tp), (n) - 1, 1);				\
     mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
   } while (0)
 #endif
 
+/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
+#define SQRLO_BASECASE_ALLOC						\
+  (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
 
 /* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
 #ifndef SQRLO_SPECIAL_CASES
@@ -143,14 +150,14 @@
     }
   else
     {
-      mp_limb_t tp[2 * SQR_TOOM2_THRESHOLD - 1];
+      mp_limb_t tp[SQRLO_BASECASE_ALLOC];
       mp_size_t i;
 
       /* must fit n-1 limbs in tp */
-      ASSERT (n <= 2 * SQR_TOOM2_THRESHOLD);
+      ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
 
       --n;
-#ifdef SQRLO_SHORTCUT_MULTIPLICATIONS
+#if SQRLO_SHORTCUT_MULTIPLICATIONS
       {
 	mp_limb_t cy;
 
diff -r 0fe018bd50ee -r 7121e1053956 mpn/minithres/gmp-mparam.h
--- a/mpn/minithres/gmp-mparam.h	Sun Aug 23 21:30:13 2015 +0200
+++ b/mpn/minithres/gmp-mparam.h	Tue Aug 25 20:15:04 2015 +0200
@@ -75,6 +75,10 @@
 #define MULLO_BASECASE_THRESHOLD             0
 #define MULLO_DC_THRESHOLD                   2
 #define MULLO_MUL_N_THRESHOLD                4
+#define SQRLO_BASECASE_THRESHOLD             0
+#define SQRLO_DC_THRESHOLD                   2
+#define SQRLO_SQR_THRESHOLD                  4
+
 
 #define DC_DIV_QR_THRESHOLD                  6
 #define DC_DIVAPPR_Q_THRESHOLD               6
diff -r 0fe018bd50ee -r 7121e1053956 mpn/x86_64/coreisbr/gmp-mparam.h
--- a/mpn/x86_64/coreisbr/gmp-mparam.h	Sun Aug 23 21:30:13 2015 +0200
+++ b/mpn/x86_64/coreisbr/gmp-mparam.h	Tue Aug 25 20:15:04 2015 +0200
@@ -185,6 +185,9 @@
 #define MULLO_BASECASE_THRESHOLD             0  /* always */
 #define MULLO_DC_THRESHOLD                  62
 #define MULLO_MUL_N_THRESHOLD             8907
+#define SQRLO_BASECASE_THRESHOLD            11
+#define SQRLO_DC_THRESHOLD                 124
+#define SQRLO_SQR_THRESHOLD               6526
 
 #define DC_DIV_QR_THRESHOLD                 52
 #define DC_DIVAPPR_Q_THRESHOLD             166
diff -r 0fe018bd50ee -r 7121e1053956 mpn/x86_64/x86_64-defs.m4
--- a/mpn/x86_64/x86_64-defs.m4	Sun Aug 23 21:30:13 2015 +0200
+++ b/mpn/x86_64/x86_64-defs.m4	Tue Aug 25 20:15:04 2015 +0200
@@ -338,16 +338,16 @@
 define(`mulx',`dnl
 .byte	0xc4`'dnl
 ifelse(`$#',3,`dnl
-,0x`'eval(0xe2^32*regnumh($1)^128*regnumh($3),16)`'dnl
-,0x`'eval(0xfb-8*regnum($2),16)`'dnl
+,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
+,eval(0xfb-8*regnum($2))`'dnl
 ,0xf6`'dnl
-,0x`'eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1),16)`'dnl
+,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
 ',`$#',4,`dnl
-,0x`'eval(0xe2^32*regnumh($2)^128*regnumh($4),16)`'dnl
-,0x`'eval(0xfb-8*regnum($3),16)`'dnl
+,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
+,eval(0xfb-8*regnum($3))`'dnl
 ,0xf6`'dnl
-,0x`'eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)),16)`'dnl
-,0x`'eval(($1 + 256) % 256,16)`'dnl
+,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
+,eval(($1 + 256) % 256)`'dnl
 ')')
 
 dnl  Usage
@@ -375,7 +375,7 @@
 dnl  detected.  Offsets that don't fit one byte are not handled correctly.
 


More information about the gmp-commit mailing list