mpz_prevprime
Niels Möller
nisse at lysator.liu.se
Wed Feb 12 18:25:42 UTC 2020
"Marco Bodrato" <bodrato at mail.dm.unipi.it> writes:
> Ciao,
>
> Il Mer, 12 Febbraio 2020 7:26 am, Niels Möller ha scritto:
>> And for a small prime gap (common case), this cost should be the main
>> part of the sieving. So it would make sense to optimize it. If we also
>> use the ptab, we could compute modulo single-limb prime products using
>> precomputed constants.
>
> Yes, of course.
Another version below, using ptab. Timing:
$ ./tune/speed -s 100-300,500,1000 -f 1.1 mpz_nextprime
mpz_nextprime
100 0.000116762
110 0.000126770
121 0.000147203
133 0.000257994
146 0.000291051
160 0.000343168
176 0.000386605
193 0.000625994
212 0.000670035
233 0.000763240
256 0.000942154
281 0.001418639
500 0.006632588
1000 0.073935992
So also close to 15% speedup for 100 bits, and the same as previous
version, almost 25%, for 1000 bits.
I used the very crude tuning
limit = (nbits < 200) ? nbits / 2 : PTAB_SIZE;
to avoid regressing for smaller sizes.
> And the next question will be: should we generate on the fly an even
> larger table of primes when the required size grows beyond the
> precomputed table?
Don't know. I don't quite understand how large a table could be
beneficial, but I guess it might make sense to at least limit it to size
of L1 cache.
Regards,
/Niels
/* mpz_nextprime(p,t) - compute the next prime > t and store that in p.
Copyright 1999-2001, 2008, 2009, 2012 Free Software Foundation, Inc.
Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of either:
* the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your
option) any later version.
or
* the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any
later version.
or both in parallel, as here.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library. If not,
see https://www.gnu.org/licenses/. */
#include "gmp-impl.h"
#include "longlong.h"
#if 1
struct gmp_primes_dtab {
mp_limb_t p;
mp_limb_t binv;
mp_limb_t lim;
};
struct gmp_primes_ptab {
mp_limb_t ppp; /* primes, multiplied together */
mp_limb_t cps[7]; /* ppp values pre-computed for mpn_mod_1s_4p */
gmp_uint_least32_t idx:24; /* index of first primes in dtab */
gmp_uint_least32_t np :8; /* number of primes related to this entry */
};
static const struct gmp_primes_dtab dtab[] =
{
#define WANT_dtab
#define P(p,inv,lim) {p, inv,lim}
#include "trialdivtab.h"
#undef WANT_dtab
#undef P
};
#define DTAB_SIZE (sizeof (dtab) / sizeof (dtab[0]))
static const struct gmp_primes_ptab ptab[] =
{
#define WANT_ptab
#include "trialdivtab.h"
#undef WANT_ptab
};
#define PTAB_SIZE (sizeof (ptab) / sizeof (ptab[0]))
void
mpz_nextprime (mpz_ptr p, mpz_srcptr n)
{
mp_limb_t *moduli;
unsigned long difference;
int i;
unsigned prime_limit;
mp_bitcnt_t nbits;
unsigned incr;
TMP_DECL;
/* First handle tiny numbers */
if (mpz_cmp_ui (n, 2) < 0)
{
mpz_set_ui (p, 2);
return;
}
mpz_add_ui (p, n, 1);
mpz_setbit (p, 0);
if (mpz_cmp_ui (p, 7) <= 0)
return;
MPN_SIZEINBASE_2EXP(nbits, PTR(p), SIZ(p), 1);
if (nbits < 200)
{
prime_limit = nbits / 2;
if (SIZ(p) == 1)
{
/* If the prime list includes any prime >= p, do plain
search. */
mp_limb_t limb = PTR(p)[0];
unsigned last = ptab[prime_limit-1].idx + ptab[prime_limit-1].np - 1;
if (dtab[last].p >= limb)
{
while (dtab[last-1].p >= limb)
last--;
PTR(p)[0] = dtab[last].p;
return;
}
}
}
else
prime_limit = PTAB_SIZE;
TMP_MARK;
/* Compute residues modulo small odd primes */
moduli = TMP_ALLOC_TYPE (prime_limit, mp_limb_t);
for (;;)
{
for (i = 0; i < prime_limit; i++)
{
mp_limb_t ppp = ptab[i].ppp;
const mp_limb_t *cps = ptab[i].cps;
moduli[i] = mpn_mod_1s_4p (PTR(p), SIZ(p), ppp << cps[1], cps);
}
#define INCR_LIMIT 0x10000 /* deep science */
for (difference = incr = 0; incr < INCR_LIMIT; difference += 2)
{
/* First check divisibility based on prime list */
for (i = 0; i < prime_limit; i++)
{
/* FIXME: Ensure that this addition can't overflow */
mp_limb_t m = moduli[i] + incr;
int idx = ptab[i].idx;
int np = ptab[i].np;
int j;
for (j = 0; j < np; j++)
if (m * dtab[idx+j].binv <= dtab[idx+j].lim)
goto next;
}
mpz_add_ui (p, p, difference);
difference = 0;
/* Miller-Rabin test */
if (mpz_millerrabin (p, 25))
goto done;
next:;
incr += 2;
}
mpz_add_ui (p, p, difference);
difference = 0;
}
done:
TMP_FREE;
}
#else
static const unsigned char primegap[] =
{
2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6,
2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2,
4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6,
12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8,
6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6,
6,14,4,6,6,8,6,12
};
#define NUMBER_OF_PRIMES 167
void
mpz_nextprime (mpz_ptr p, mpz_srcptr n)
{
unsigned short *moduli;
unsigned long difference;
int i;
unsigned prime_limit;
unsigned long prime;
mp_size_t pn;
mp_bitcnt_t nbits;
unsigned incr;
TMP_SDECL;
/* First handle tiny numbers */
if (mpz_cmp_ui (n, 2) < 0)
{
mpz_set_ui (p, 2);
return;
}
mpz_add_ui (p, n, 1);
mpz_setbit (p, 0);
if (mpz_cmp_ui (p, 7) <= 0)
return;
pn = SIZ(p);
MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1);
if (nbits / 2 >= NUMBER_OF_PRIMES)
prime_limit = NUMBER_OF_PRIMES - 1;
else
prime_limit = nbits / 2;
TMP_SMARK;
/* Compute residues modulo small odd primes */
moduli = TMP_SALLOC_TYPE (prime_limit, unsigned short);
for (;;)
{
/* FIXME: Compute lazily? */
prime = 3;
for (i = 0; i < prime_limit; i++)
{
moduli[i] = mpz_tdiv_ui (p, prime);
prime += primegap[i];
}
#define INCR_LIMIT 0x10000 /* deep science */
for (difference = incr = 0; incr < INCR_LIMIT; difference += 2)
{
/* First check residues */
prime = 3;
for (i = 0; i < prime_limit; i++)
{
unsigned r;
/* FIXME: Reduce moduli + incr and store back, to allow for
division-free reductions. Alternatively, table primes[]'s
inverses (mod 2^16). */
r = (moduli[i] + incr) % prime;
prime += primegap[i];
if (r == 0)
goto next;
}
mpz_add_ui (p, p, difference);
difference = 0;
/* Miller-Rabin test */
if (mpz_millerrabin (p, 25))
goto done;
next:;
incr += 2;
}
mpz_add_ui (p, p, difference);
difference = 0;
}
done:
TMP_SFREE;
}
#endif
--
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
More information about the gmp-devel
mailing list