29 #ifndef RANDOM_DSFMT_H
30 #define RANDOM_DSFMT_H
39 # include <emmintrin.h>
92 template <
int MEXP,
int POS1,
int SL1, uint64_t MSK1, uint64_t MSK2,
93 uint32_t MSK32_1, uint32_t MSK32_2,
94 uint32_t MSK32_3, uint32_t MSK32_4,
95 uint64_t
FIX1, uint64_t FIX2, uint64_t PCV1, uint64_t PCV2>
121 int size = (N + 1) * 4;
122 uint32_t *psfmt = &status[0].u32[0];
123 ivec state(size + 1);
124 for (
int i = 0; i <
size; ++i) {
133 int size = (N + 1) * 4;
134 it_assert(state.size() == size + 1,
"Random_Generator::set_state(): "
135 "Invalid state initialization vector");
136 uint32_t *psfmt = &status[0].u32[0];
137 for (
int i = 0; i <
size; ++i) {
151 uint32_t *psfmt = &status[0].u32[0];
152 psfmt[idxof(0)] = seed;
153 for (
int i = 1; i < (N + 1) * 4; i++) {
154 psfmt[idxof(i)] = 1812433253UL
155 * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i;
158 period_certification();
160 #if defined(__SSE2__)
161 sse2_param_mask = _mm_set_epi32(MSK32_3, MSK32_4, MSK32_1, MSK32_2);
169 uint64_t *psfmt64 = &status[0].u[0];
171 dsfmt_gen_rand_all();
174 return psfmt64[idx++] & 0xffffffffU;
187 double *psfmt64 = &status[0].d[0];
189 dsfmt_gen_rand_all();
192 return psfmt64[idx++];
224 double *dsfmt64 = &status[0].d[0];
231 dsfmt_gen_rand_all();
234 r.d = dsfmt64[idx++];
241 static const int N = (MEXP - 128) / 104 + 1;
242 static const int Nx2 = N * 2;
243 static const uint64_t LOW_MASK = 0x000fffffffffffffULL;
244 static const uint64_t HIGH_CONST = 0x3ff0000000000000ULL;
245 static const unsigned int SR = 12U;
246 #if defined(__SSE2__)
247 static const unsigned int SSE2_SHUFF = 0x1bU;
252 #if defined(__SSE2__)
261 typedef union W128_T w128_t;
263 static w128_t status[N + 1];
267 static unsigned int last_seed;
270 static bool initialized;
272 static bool bigendian;
274 #if defined(__SSE2__)
276 static __m128i sse2_param_mask;
285 static unsigned int hash(time_t t, clock_t c)
287 static unsigned int differ = 0;
290 unsigned char *p = (
unsigned char *) &t;
291 for (
size_t i = 0; i <
sizeof(t); ++i) {
296 p = (
unsigned char *) &c;
297 for (
size_t j = 0; j <
sizeof(c); ++j) {
301 return (h1 + differ++) ^ h2;
308 static int idxof(
int i) {
return (bigendian ? (i ^ 1) : i); }
314 static void initial_mask() {
315 uint64_t *psfmt = &status[0].u[0];
316 for (
int i = 0; i < Nx2; i++) {
317 psfmt[i] = (psfmt[i] & LOW_MASK) | HIGH_CONST;
322 static void period_certification() {
323 uint64_t pcv[2] = {PCV1, PCV2};
327 tmp[0] = (status[N].u[0] ^
FIX1);
328 tmp[1] = (status[N].u[1] ^ FIX2);
330 inner = tmp[0] & pcv[0];
331 inner ^= tmp[1] & pcv[1];
332 for (
int i = 32; i > 0; i >>= 1) {
345 for (
int i = 1; i >= 0; i--) {
347 for (
int j = 0; j < 64; j++) {
348 if ((work & pcv[i]) != 0) {
349 status[N].u[i] ^= work;
355 #endif // (PCV2 & 1) == 1
366 static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *lung) {
367 #if defined(__SSE2__)
369 __m128i z = _mm_slli_epi64(x, SL1);
370 __m128i y = _mm_shuffle_epi32(lung->si, SSE2_SHUFF);
371 z = _mm_xor_si128(z, b->si);
372 y = _mm_xor_si128(y, z);
374 __m128i v = _mm_srli_epi64(y, SR);
375 __m128i w = _mm_and_si128(y, sse2_param_mask);
376 v = _mm_xor_si128(v, x);
377 v = _mm_xor_si128(v, w);
380 #else // standard C++
381 uint64_t t0 = a->u[0];
382 uint64_t t1 = a->u[1];
383 uint64_t L0 = lung->u[0];
384 uint64_t L1 = lung->u[1];
385 lung->u[0] = (t0 << SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0];
386 lung->u[1] = (t1 << SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1];
387 r->u[0] = (lung->u[0] >> SR) ^ (lung->u[0] & MSK1) ^ t0;
388 r->u[1] = (lung->u[1] >> SR) ^ (lung->u[1] & MSK2) ^ t1;
396 static void dsfmt_gen_rand_all() {
398 w128_t lung = status[N];
399 do_recursion(&status[0], &status[0], &status[POS1], &lung);
400 for (i = 1; i < N - POS1; i++) {
401 do_recursion(&status[i], &status[i], &status[i + POS1], &lung);
404 do_recursion(&status[i], &status[i], &status[i + POS1 - N], &lung);
416 typedef DSFMT<521, 3, 25,
417 0x000fbfefff77efffULL, 0x000ffeebfbdfbfdfULL,
418 0x000fbfefU, 0xff77efffU, 0x000ffeebU, 0xfbdfbfdfU,
419 0xcfb393d661638469ULL, 0xc166867883ae2adbULL,
420 0xccaa588000000000ULL, 0x0000000000000001ULL> DSFMT_521_RNG;
422 typedef DSFMT<1279, 9, 19,
423 0x000efff7ffddffeeULL, 0x000fbffffff77fffULL,
424 0x000efff7U, 0xffddffeeU, 0x000fbfffU, 0xfff77fffU,
425 0xb66627623d1a31beULL, 0x04b6c51147b6109bULL,
426 0x7049f2da382a6aebULL, 0xde4ca84a40000001ULL> DSFMT_1279_RNG;
428 typedef DSFMT<2203, 7, 19,
429 0x000fdffff5edbfffULL, 0x000f77fffffffbfeULL,
430 0x000fdfffU, 0xf5edbfffU, 0x000f77ffU, 0xfffffbfeU,
431 0xb14e907a39338485ULL, 0xf98f0735c637ef90ULL,
432 0x8000000000000000ULL, 0x0000000000000001ULL> DSFMT_2203_RNG;
434 typedef DSFMT<4253, 19, 19,
435 0x0007b7fffef5feffULL, 0x000ffdffeffefbfcULL,
436 0x0007b7ffU, 0xfef5feffU, 0x000ffdffU, 0xeffefbfcU,
437 0x80901b5fd7a11c65ULL, 0x5a63ff0e7cb0ba74ULL,
438 0x1ad277be12000000ULL, 0x0000000000000001ULL> DSFMT_4253_RNG;
440 typedef DSFMT<11213, 37, 19,
441 0x000ffffffdf7fffdULL, 0x000ffffffdf7fffdULL,
442 0x000fffffU, 0xfdf7fffdU, 0x000dffffU, 0xfff6bfffU,
443 0xd0ef7b7c75b06793ULL, 0x9c50ff4caae0a641ULL,
444 0x8234c51207c80000ULL, 0x0000000000000001ULL> DSFMT_11213_RNG;
446 typedef DSFMT<19937, 117, 19,
447 0x000ffafffffffb3fULL, 0x000ffdfffc90fffdULL,
448 0x000ffaffU, 0xfffffb3fU, 0x000ffdffU, 0xfc90fffdU,
449 0x90014964b32f4329ULL, 0x3b8d12ac548a7c7aULL,
450 0x3d84e1ac0dc82880ULL, 0x0000000000000001ULL> DSFMT_19937_RNG;
455 #endif // #ifndef RANDOM_DSFMT_H