[Gmp-commit] /home/hgfiles/gmp: Add squaring capabilities to nussbaumer_mul.

mercurial at gmplib.org mercurial at gmplib.org
Thu Dec 17 17:35:49 CET 2009


details:   /home/hgfiles/gmp/rev/c58defebaeee
changeset: 13117:c58defebaeee
user:      Marco Bodrato <bodrato at mail.dm.unipi.it>
date:      Thu Dec 17 17:35:44 2009 +0100
description:
Add squaring capabilities to nussbaumer_mul.

diffstat:

 ChangeLog                    |  6 ++++++
 mpn/generic/nussbaumer_mul.c |  5 ++++-
 mpn/generic/sqrmod_bnm1.c    |  2 +-
 tune/common.c                |  6 ++++++
 tune/speed.c                 |  1 +
 tune/speed.h                 |  1 +
 6 files changed, 19 insertions(+), 2 deletions(-)

diffs (81 lines):

diff -r a43049d48245 -r c58defebaeee ChangeLog
--- a/ChangeLog	Thu Dec 17 17:26:42 2009 +0100
+++ b/ChangeLog	Thu Dec 17 17:35:44 2009 +0100
@@ -53,6 +53,12 @@
 
 	* tune/tuneup.c: Tune SQRMOD_BNM1_THRESHOLD.
 
+	* mpn/generic/nussbaumer_mul.c (mpn_nusbaumer_mul): Mimic fft_mul,
+	use squaring if operands coincide.
+	* tune/speed.h (speed_mpn_nussbaumer_mul_sqr): Declare function.
+	* tune/common.c (speed_mpn_nussbaumer_mul_sqr): New function.
+	* tune/speed.c (routine): Add speed_mpn_nussbaumer_mul_sqr.
+
 2009-12-17  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/generic/bdiv_q.c (mpn_bdiv_q_itch): Rewrite.
diff -r a43049d48245 -r c58defebaeee mpn/generic/nussbaumer_mul.c
--- a/mpn/generic/nussbaumer_mul.c	Thu Dec 17 17:26:42 2009 +0100
+++ b/mpn/generic/nussbaumer_mul.c	Thu Dec 17 17:35:44 2009 +0100
@@ -46,7 +46,10 @@
   TMP_MARK;
   TMP_ALLOC_LIMBS_2(rp, rn, tp, mpn_mulmod_bnm1_itch (rn));
 
-  mpn_mulmod_bnm1 (rp, rn, ap, an, bp, bn, tp);
+  if ((ap == bp) && (an == bn))
+    mpn_sqrmod_bnm1 (rp, rn, ap, an, tp);
+  else
+    mpn_mulmod_bnm1 (rp, rn, ap, an, bp, bn, tp);
 
   MPN_COPY (pp, rp, an + bn);
   TMP_FREE;
diff -r a43049d48245 -r c58defebaeee mpn/generic/sqrmod_bnm1.c
--- a/mpn/generic/sqrmod_bnm1.c	Thu Dec 17 17:26:42 2009 +0100
+++ b/mpn/generic/sqrmod_bnm1.c	Thu Dec 17 17:35:44 2009 +0100
@@ -167,7 +167,7 @@
 	else
 	  {
 	    int mask;
-	    k = mpn_fft_best_k (n, 0);
+	    k = mpn_fft_best_k (n, 1);
 	    mask = (1<<k) -1;
 	    while (n & mask) {k--; mask >>=1;};
 	  }
diff -r a43049d48245 -r c58defebaeee tune/common.c
--- a/tune/common.c	Thu Dec 17 17:26:42 2009 +0100
+++ b/tune/common.c	Thu Dec 17 17:35:44 2009 +0100
@@ -1041,6 +1041,12 @@
   SPEED_ROUTINE_MPN_MUL_N_CALL
     (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size));
 }
+double
+speed_mpn_nussbaumer_mul_sqr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR_CALL
+    (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size));
+}
 
 double
 speed_mpn_mul_fft_full (struct speed_params *s)
diff -r a43049d48245 -r c58defebaeee tune/speed.c
--- a/tune/speed.c	Thu Dec 17 17:26:42 2009 +0100
+++ b/tune/speed.c	Thu Dec 17 17:35:44 2009 +0100
@@ -300,6 +300,7 @@
   { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
   { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
+  { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
   { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
 
diff -r a43049d48245 -r c58defebaeee tune/speed.h
--- a/tune/speed.h	Thu Dec 17 17:26:42 2009 +0100
+++ b/tune/speed.h	Thu Dec 17 17:35:44 2009 +0100
@@ -224,6 +224,7 @@
 double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_nussbaumer_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nussbaumer_mul_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
 double speed_mpn_mullo_n __GMP_PROTO ((struct speed_params *s));


More information about the gmp-commit mailing list