gmp
view mpn/generic/toom32_mul.c @ 12623:b266808afaf4
*** empty log message ***
| author | Torbjorn Granlund <tege@gmplib.org> |
|---|---|
| date | Thu, 11 Jun 2009 00:09:18 +0200 |
| parents | 8ad9d3280dd2 |
| children | 7c15d5afcfb6 |
line source
1 /* mpn_toom32_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 1.5
2 times as large as bn. Or more accurately, bn < an < 3bn.
4 Contributed to the GNU project by Torbjorn Granlund.
5 Improvements by Marco Bodrato.
7 The idea of applying toom to unbalanced multiplication is due to Marco
8 Bodrato and Alberto Zanoni.
10 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
11 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
12 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
14 Copyright 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
16 This file is part of the GNU MP Library.
18 The GNU MP Library is free software; you can redistribute it and/or modify
19 it under the terms of the GNU Lesser General Public License as published by
20 the Free Software Foundation; either version 3 of the License, or (at your
21 option) any later version.
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
26 License for more details.
28 You should have received a copy of the GNU Lesser General Public License
29 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
32 #include "gmp.h"
33 #include "gmp-impl.h"
35 /* Evaluate in: -1, 0, +1, +inf
37 <-s-><--n--><--n-->
38 ___ ______ ______
39 |a2_|___a1_|___a0_|
40 |_b1_|___b0_|
41 <-t--><--n-->
43 v0 = a0 * b0 # A(0)*B(0)
44 v1 = (a0+ a1+ a2)*(b0+ b1) # A(1)*B(1) ah <= 2 bh <= 1
45 vm1 = (a0- a1+ a2)*(b0- b1) # A(-1)*B(-1) |ah| <= 1 bh = 0
46 vinf= a2 * b1 # A(inf)*B(inf)
47 */
49 #if TUNE_PROGRAM_BUILD
50 #define MAYBE_mul_toom22 1
51 #else
52 #define MAYBE_mul_toom22 \
53 (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
54 #endif
56 #define TOOM22_MUL_N_REC(p, a, b, n, ws) \
57 do { \
58 if (! MAYBE_mul_toom22 \
59 || BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD)) \
60 mpn_mul_basecase (p, a, n, b, n); \
61 else \
62 mpn_toom22_mul (p, a, n, b, n, ws); \
63 } while (0)
65 void
66 mpn_toom32_mul (mp_ptr pp,
67 mp_srcptr ap, mp_size_t an,
68 mp_srcptr bp, mp_size_t bn,
69 mp_ptr scratch)
70 {
71 mp_size_t n, s, t;
72 int vm1_neg;
73 mp_limb_t cy;
75 #define a0 ap
76 #define a1 (ap + n)
77 #define a2 (ap + 2 * n)
78 #define b0 bp
79 #define b1 (bp + n)
81 n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
83 s = an - 2 * n;
84 t = bn - n;
86 ASSERT (0 < s && s <= n);
87 ASSERT (0 < t && t <= n);
89 #define as1 (pp + n + 1) /* n+1 */
90 #define asm1 (scratch + n) /* n+1 */
91 #define bs1 (pp) /* n+1 */
92 #define bsm1 (scratch) /* n */
93 #define a0_a2 (scratch) /* n */
95 /* Compute as1 and asm1. */
96 asm1[n] = mpn_add (a0_a2, a0, n, a2, s);
97 #if HAVE_NATIVE_mpn_addsub_n
98 if (asm1[n] == 0 && mpn_cmp (a0_a2, a1, n) < 0)
99 {
100 cy = mpn_addsub_n (as1, asm1, a1, a0_a2, n);
101 as1[n] = cy >> 1;
102 vm1_neg = 1;
103 }
104 else
105 {
106 cy = mpn_addsub_n (as1, asm1, a0_a2, a1, n);
107 as1[n] = asm1[n] + (cy >> 1);
108 asm1[n]-= cy & 1;
109 vm1_neg = 0;
110 }
111 #else
112 as1[n] = asm1[n] + mpn_add_n (as1, a0_a2, a1, n);
113 if (asm1[n] == 0 && mpn_cmp (a0_a2, a1, n) < 0)
114 {
115 mpn_sub_n (asm1, a1, a0_a2, n);
116 vm1_neg = 1;
117 }
118 else
119 {
120 cy = mpn_sub_n (asm1, a0_a2, a1, n);
121 asm1[n]-= cy;
122 vm1_neg = 0;
123 }
124 #endif
126 /* Compute bs1 and bsm1. */
127 if (t == n)
128 {
129 #if HAVE_NATIVE_mpn_addsub_n
130 if (mpn_cmp (b0, b1, n) < 0)
131 {
132 cy = mpn_addsub_n (bs1, bsm1, b1, b0, n);
133 vm1_neg ^= 1;
134 }
135 else
136 {
137 cy = mpn_addsub_n (bs1, bsm1, b0, b1, n);
138 }
139 bs1[n] = cy >> 1;
140 #else
141 bs1[n] = mpn_add_n (bs1, b0, b1, n);
143 if (mpn_cmp (b0, b1, n) < 0)
144 {
145 mpn_sub_n (bsm1, b1, b0, n);
146 vm1_neg ^= 1;
147 }
148 else
149 {
150 mpn_sub_n (bsm1, b0, b1, n);
151 }
152 #endif
153 }
154 else
155 {
156 bs1[n] = mpn_add (bs1, b0, n, b1, t);
158 if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
159 {
160 mpn_sub_n (bsm1, b1, b0, t);
161 MPN_ZERO (bsm1 + t, n - t);
162 vm1_neg ^= 1;
163 }
164 else
165 {
166 mpn_sub (bsm1, b0, n, b1, t);
167 }
168 }
170 ASSERT (as1[n] <= 2);
171 ASSERT (bs1[n] <= 1);
172 ASSERT (asm1[n] <= 1);
173 /*ASSERT (bsm1[n] == 0); */
175 #define v0 pp /* 2n */
176 #define v1 (scratch) /* 2n+1 */
177 #define vinf (pp + 3 * n) /* s+t */
178 #define vm1 (scratch + 2 * n + 1) /* 2n+1 */
179 #define scratch_out scratch + 4 * n + 2
181 /* vm1, 2n+1 limbs */
182 TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
183 cy = 0;
184 if (asm1[n] != 0)
185 cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
186 vm1[2 * n] = cy;
188 /* v1, 2n+1 limbs */
189 TOOM22_MUL_N_REC (v1, as1, bs1, n, scratch_out);
190 if (as1[n] == 1)
191 {
192 cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
193 }
194 else if (as1[n] == 2)
195 {
196 #if HAVE_NATIVE_mpn_addlsh1_n
197 cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
198 #else
199 cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
200 #endif
201 }
202 else
203 cy = 0;
204 if (bs1[n] != 0)
205 cy += mpn_add_n (v1 + n, v1 + n, as1, n);
206 v1[2 * n] = cy;
208 /* vinf, s+t limbs. Use mpn_mul for now, to handle unbalanced operands */
209 if (s > t) mpn_mul (vinf, a2, s, b1, t);
210 else mpn_mul (vinf, b1, t, a2, s);
212 /* v0, 2n limbs */
213 TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
215 /* Interpolate */
217 if (vm1_neg)
218 {
219 #if HAVE_NATIVE_mpn_rsh1add_n
220 mpn_rsh1add_n (vm1, v1, vm1, 2 * n + 1);
221 #else
222 mpn_add_n (vm1, v1, vm1, 2 * n + 1);
223 mpn_rshift (vm1, vm1, 2 * n + 1, 1);
224 #endif
225 }
226 else
227 {
228 #if HAVE_NATIVE_mpn_rsh1sub_n
229 mpn_rsh1sub_n (vm1, v1, vm1, 2 * n + 1);
230 #else
231 mpn_sub_n (vm1, v1, vm1, 2 * n + 1);
232 mpn_rshift (vm1, vm1, 2 * n + 1, 1);
233 #endif
234 }
236 t += s; /* limbs in vinf */
237 if (LIKELY(t>=n)) /* We should consider this branch only, even better t>n */
238 s = n; /* limbs in Lvinf */
239 else {
240 s = t; /* limbs in Lvinf */
241 v1[2 * n]=0;
242 }
244 mpn_sub_n (v1, v1, vm1, s + n + 1);
246 /*
247 pp[] prior to operations:
248 |_H vinf|_L vinf|_______|_H v0__|_L v0__|
250 summation scheme for remaining operations:
251 |______4|n_____3|n_____2|n______|n______|pp
252 |_Hvinf_|_L*vinf| |_H*v0__|_L v0__|
253 || H vm1 | L vm1 |
254 |-H vinf|-L*vinf|
255 || H v1 | L v1 |
256 |-H*v0 |-L v0 |
258 We will avoid double computation of Hv0-Lvinf. Be careful! This
259 operation can give a negative result!
260 */
262 cy = mpn_sub_n (v0 + n, v0 + n, vinf, s); /* Hv0-Lvinf*/
263 v1[ n + s ] += cy - mpn_sub_n(pp + 2 * n, v1, v0, n + s); /* v1-v0+Lvinf */
264 cy -= mpn_add_n (pp + n, pp + n, vm1, s); /* (Hv0-Lvinf)+Lvm1 */
265 if (cy != 0) { /* carry and borrow did not cancel one another, apply the right one */
266 if (cy == 1)
267 MPN_DECR_U (vm1 + s, 2 * n + 1 - s, 1);
268 else /* (cy == -1) */
269 MPN_INCR_U (vm1 + s, 2 * n + 1 - s, 1);
270 }
271 if (LIKELY(t>n)) { /* It works also without this "if", t<=n implies t-s==0,v1[2*n]==0 */
272 mpn_sub (vm1 + n, vm1 + n, n + 1, vinf + n, t - s); /* Hvm1-Hvinf */
273 MPN_INCR_U (vinf + s, t - s, v1[2 * n]);
274 }
275 cy = mpn_add_n (pp + n + s, pp + n + s, vm1 + s, 2 * n + 1 - s);
276 MPN_INCR_U (vinf + 1, t - 1, cy);
278 #undef a0
279 #undef a1
280 #undef a2
281 #undef b0
282 #undef b1
283 #undef v0
284 #undef v1
285 #undef vinf
286 #undef vm1
287 #undef a0_a2
288 #undef bsm1
289 #undef asm1
290 #undef bs1
291 #undef as1
292 }
