xoshiro.hpp 14 KB


  1. /*
  2. * Copyright Matt Borland 2022 - 2025.
  3. * Distributed under the Boost Software License, Version 1.0. (See
  4. * accompanying file LICENSE_1_0.txt or copy at
  5. * http://www.boost.org/LICENSE_1_0.txt)
  6. *
  7. * See http://www.boost.org for most recent version including documentation.
  8. *
  9. * $Id$
  10. */
  11. #ifndef BOOST_RANDOM_XOSHIRO_HPP
  12. #define BOOST_RANDOM_XOSHIRO_HPP
  13. #include <boost/random/detail/config.hpp>
  14. #include <boost/random/detail/xoshiro_base.hpp>
  15. #include <boost/core/bit.hpp>
  16. #include <array>
  17. #include <cstdint>
  18. namespace boost {
  19. namespace random {
  20. /**
  21. * This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.
  22. * It has excellent (sub-ns) speed, a state (256 bits) that is large
  23. * enough for any parallel application, and it passes all tests we are
  24. * aware of.
  25. *
  26. * For generating just floating-point numbers, xoshiro256+ is even faster.
  27. */
  28. class xoshiro256pp final : public detail::xoshiro_base<xoshiro256pp, 4>
  29. {
  30. private:
  31. using Base = detail::xoshiro_base<xoshiro256pp, 4>;
  32. public:
  33. using Base::Base;
  34. inline result_type next() noexcept
  35. {
  36. const std::uint64_t result = boost::core::rotl(state_[0] + state_[3], 23) + state_[0];
  37. const std::uint64_t t = state_[1] << 17;
  38. state_[2] ^= state_[0];
  39. state_[3] ^= state_[1];
  40. state_[1] ^= state_[2];
  41. state_[0] ^= state_[3];
  42. state_[2] ^= t;
  43. state_[3] = boost::core::rotl(state_[3], 45);
  44. return result;
  45. }
  46. };
  47. /**
  48. * This is xoshiro256+ 1.0, our best and fastest generator for floating-point
  49. * numbers. We suggest to use its upper bits for floating-point
  50. * generation, as it is slightly faster than xoshiro256++/xoshiro256**. It
  51. * passes all tests we are aware of except for the lowest three bits,
  52. * which might fail linearity tests (and just those), so if low linear
  53. * complexity is not considered an issue (as it is usually the case) it
  54. * can be used to generate 64-bit outputs, too.
  55. */
  56. class xoshiro256d final : public detail::xoshiro_base<xoshiro256d, 4, double>
  57. {
  58. private:
  59. using Base = detail::xoshiro_base<xoshiro256d, 4, double>;
  60. public:
  61. using Base::Base;
  62. inline std::uint64_t next_int() noexcept
  63. {
  64. const std::uint64_t result = state_[0] + state_[3];
  65. const std::uint64_t t = state_[1] << 17;
  66. state_[2] ^= state_[0];
  67. state_[3] ^= state_[1];
  68. state_[1] ^= state_[2];
  69. state_[0] ^= state_[3];
  70. state_[2] ^= t;
  71. state_[3] = boost::core::rotl(state_[3], 45);
  72. return result;
  73. }
  74. inline result_type next() noexcept
  75. {
  76. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  77. return static_cast<double>((next_int() >> 11)) * 0x1.0p-53;
  78. #else
  79. return static_cast<double>((next_int() >> 11)) * 1.11022302462515654e-16;
  80. #endif
  81. }
  82. static constexpr result_type (min)() noexcept
  83. {
  84. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  85. return static_cast<double>((std::numeric_limits<std::uint64_t>::min)() >> 11) * 0x1.0p-53;
  86. #else
  87. return static_cast<double>((std::numeric_limits<std::uint64_t>::min)() >> 11) * 1.11022302462515654e-16;
  88. #endif
  89. }
  90. static constexpr result_type (max)() noexcept
  91. {
  92. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  93. return static_cast<double>((std::numeric_limits<std::uint64_t>::max)()) * 0x1.0p-53;
  94. #else
  95. return static_cast<double>((std::numeric_limits<std::uint64_t>::max)()) * 1.11022302462515654e-16;
  96. #endif
  97. }
  98. };
  99. /**
  100. * This is xoshiro256** 1.0, one of our all-purpose, rock-solid
  101. * generators. It has excellent (sub-ns) speed, a state (256 bits) that is
  102. * large enough for any parallel application, and it passes all tests we
  103. * are aware of.
  104. *
  105. * For generating just floating-point numbers, xoshiro256+ is even faster.
  106. */
  107. class xoshiro256mm final : public detail::xoshiro_base<xoshiro256mm, 4>
  108. {
  109. private:
  110. using Base = detail::xoshiro_base<xoshiro256mm, 4>;
  111. public:
  112. using Base::Base;
  113. inline result_type next() noexcept
  114. {
  115. const std::uint64_t result = boost::core::rotl(state_[1] * 5, 7) * 9U;
  116. const std::uint64_t t = state_[1] << 17;
  117. state_[2] ^= state_[0];
  118. state_[3] ^= state_[1];
  119. state_[1] ^= state_[2];
  120. state_[0] ^= state_[3];
  121. state_[2] ^= t;
  122. state_[3] = boost::core::rotl(state_[3], 45);
  123. return result;
  124. }
  125. };
  126. /**
  127. * This is xoshiro512++ 1.0, one of our all-purpose, rock-solid
  128. * generators. It has excellent (about 1ns) speed, a state (512 bits) that
  129. * is large enough for any parallel application, and it passes all tests
  130. * we are aware of.
  131. *
  132. * For generating just floating-point numbers, xoshiro512+ is even faster.
  133. *
  134. * The state must be seeded so that it is not everywhere zero. If you have
  135. * a 64-bit seed, we suggest to seed a splitmix64 generator and use its
  136. * output to fill s.
  137. */
  138. class xoshiro512pp final : public detail::xoshiro_base<xoshiro512pp, 8>
  139. {
  140. private:
  141. using Base = detail::xoshiro_base<xoshiro512pp, 8>;
  142. public:
  143. using Base::Base;
  144. inline result_type next() noexcept
  145. {
  146. const std::uint64_t result = boost::core::rotl(state_[0] + state_[2], 17) + state_[2];
  147. const std::uint64_t t = state_[1] << 11;
  148. state_[2] ^= state_[0];
  149. state_[5] ^= state_[1];
  150. state_[1] ^= state_[2];
  151. state_[7] ^= state_[3];
  152. state_[3] ^= state_[4];
  153. state_[4] ^= state_[5];
  154. state_[0] ^= state_[6];
  155. state_[6] ^= state_[7];
  156. state_[6] ^= t;
  157. state_[7] = boost::core::rotl(state_[7], 21);
  158. return result;
  159. }
  160. };
  161. /**
  162. * This is xoshiro512** 1.0, one of our all-purpose, rock-solid generators
  163. * with increased state size. It has excellent (about 1ns) speed, a state
  164. * (512 bits) that is large enough for any parallel application, and it
  165. * passes all tests we are aware of.
  166. *
  167. * For generating just floating-point numbers, xoshiro512+ is even faster.
  168. *
  169. * The state must be seeded so that it is not everywhere zero. If you have
  170. * a 64-bit seed, we suggest to seed a splitmix64 generator and use its
  171. * output to fill s.
  172. */
  173. class xoshiro512mm final : public detail::xoshiro_base<xoshiro512mm, 8>
  174. {
  175. private:
  176. using Base = detail::xoshiro_base<xoshiro512mm, 8>;
  177. public:
  178. using Base::Base;
  179. inline result_type next() noexcept
  180. {
  181. const std::uint64_t result = boost::core::rotl(state_[1] * 5, 7) * 9;
  182. const std::uint64_t t = state_[1] << 11;
  183. state_[2] ^= state_[0];
  184. state_[5] ^= state_[1];
  185. state_[1] ^= state_[2];
  186. state_[7] ^= state_[3];
  187. state_[3] ^= state_[4];
  188. state_[4] ^= state_[5];
  189. state_[0] ^= state_[6];
  190. state_[6] ^= state_[7];
  191. state_[6] ^= t;
  192. state_[7] = boost::core::rotl(state_[7], 21);
  193. return result;
  194. }
  195. };
  196. /**
  197. * This is xoshiro512+ 1.0, our generator for floating-point numbers with
  198. * increased state size. We suggest to use its upper bits for
  199. * floating-point generation, as it is slightly faster than xoshiro512**.
  200. * It passes all tests we are aware of except for the lowest three bits,
  201. * which might fail linearity tests (and just those), so if low linear
  202. * complexity is not considered an issue (as it is usually the case) it
  203. * can be used to generate 64-bit outputs, too.
  204. *
  205. * We suggest to use a sign test to extract a random Boolean value, and
  206. * right shifts to extract subsets of bits.
  207. *
  208. * The state must be seeded so that it is not everywhere zero. If you have
  209. * a 64-bit seed, we suggest to seed a splitmix64 generator and use its
  210. * output to fill s.
  211. */
  212. class xoshiro512d final : public detail::xoshiro_base<xoshiro512d, 8, double>
  213. {
  214. private:
  215. using Base = detail::xoshiro_base<xoshiro512d, 8, double>;
  216. public:
  217. using Base::Base;
  218. inline std::uint64_t next_int() noexcept
  219. {
  220. const std::uint64_t result = state_[0] + state_[2];
  221. const std::uint64_t t = state_[1] << 11;
  222. state_[2] ^= state_[0];
  223. state_[5] ^= state_[1];
  224. state_[1] ^= state_[2];
  225. state_[7] ^= state_[3];
  226. state_[3] ^= state_[4];
  227. state_[4] ^= state_[5];
  228. state_[0] ^= state_[6];
  229. state_[6] ^= state_[7];
  230. state_[6] ^= t;
  231. state_[7] = boost::core::rotl(state_[7], 21);
  232. return result;
  233. }
  234. inline result_type next() noexcept
  235. {
  236. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  237. return static_cast<double>((next_int() >> 11)) * 0x1.0p-53;
  238. #else
  239. return static_cast<double>((next_int() >> 11)) * 1.11022302462515654e-16;
  240. #endif
  241. }
  242. static constexpr result_type (min)() noexcept
  243. {
  244. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  245. return static_cast<double>((std::numeric_limits<std::uint64_t>::min)() >> 11) * 0x1.0p-53;
  246. #else
  247. return static_cast<double>((std::numeric_limits<std::uint64_t>::min)() >> 11) * 1.11022302462515654e-16;
  248. #endif
  249. }
  250. static constexpr result_type (max)() noexcept
  251. {
  252. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  253. return static_cast<double>((std::numeric_limits<std::uint64_t>::max)() >> 11) * 0x1.0p-53;
  254. #else
  255. return static_cast<double>((std::numeric_limits<std::uint64_t>::max)() >> 11) * 1.11022302462515654e-16;
  256. #endif
  257. }
  258. };
  259. /**
  260. * This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid
  261. * generators. It has excellent speed, a state size (128 bits) that is
  262. * large enough for mild parallelism, and it passes all tests we are aware
  263. * of.
  264. *
  265. * For generating just single-precision (i.e., 32-bit) floating-point
  266. * numbers, xoshiro128+ is even faster.
  267. *
  268. * The state must be seeded so that it is not everywhere zero.
  269. */
  270. class xoshiro128pp final : public detail::xoshiro_base<xoshiro128pp, 4, std::uint32_t, std::uint32_t>
  271. {
  272. private:
  273. using Base = detail::xoshiro_base<xoshiro128pp, 4, std::uint32_t, std::uint32_t>;
  274. public:
  275. using Base::Base;
  276. inline result_type next() noexcept
  277. {
  278. const std::uint32_t result = boost::core::rotl(state_[0] + state_[3], 7) + state_[0];
  279. const std::uint32_t t = state_[1] << 9;
  280. state_[2] ^= state_[0];
  281. state_[3] ^= state_[1];
  282. state_[1] ^= state_[2];
  283. state_[0] ^= state_[3];
  284. state_[2] ^= t;
  285. state_[3] = boost::core::rotl(state_[3], 11);
  286. return result;
  287. }
  288. };
  289. /**
  290. * This is xoshiro128** 1.1, one of our 32-bit all-purpose, rock-solid
  291. * generators. It has excellent speed, a state size (128 bits) that is
  292. * large enough for mild parallelism, and it passes all tests we are aware
  293. * of.
  294. *
  295. * Note that version 1.0 had mistakenly state_[0] instead of state_[1] as state
  296. * word passed to the scrambler.
  297. *
  298. * For generating just single-precision (i.e., 32-bit) floating-point
  299. * numbers, xoshiro128+ is even faster.
  300. *
  301. * The state must be seeded so that it is not everywhere zero.
  302. */
  303. class xoshiro128mm final : public detail::xoshiro_base<xoshiro128mm, 4, std::uint32_t, std::uint32_t>
  304. {
  305. private:
  306. using Base = detail::xoshiro_base<xoshiro128mm, 4, std::uint32_t, std::uint32_t>;
  307. public:
  308. using Base::Base;
  309. inline result_type next() noexcept
  310. {
  311. const std::uint32_t result = boost::core::rotl(state_[1] * 5, 7) * 9;
  312. const std::uint32_t t = state_[1] << 9;
  313. state_[2] ^= state_[0];
  314. state_[3] ^= state_[1];
  315. state_[1] ^= state_[2];
  316. state_[0] ^= state_[3];
  317. state_[2] ^= t;
  318. state_[3] = boost::core::rotl(state_[3], 11);
  319. return result;
  320. }
  321. };
  322. /**
  323. * This is xoshiro128+ 1.0, our best and fastest 32-bit generator for 32-bit
  324. * floating-point numbers. We suggest to use its upper bits for
  325. * floating-point generation, as it is slightly faster than xoshiro128**.
  326. * It passes all tests we are aware of except for
  327. * linearity tests, as the lowest four bits have low linear complexity, so
  328. * if low linear complexity is not considered an issue (as it is usually
  329. * the case) it can be used to generate 32-bit outputs, too.
  330. *
  331. * We suggest to use a sign test to extract a random Boolean value, and
  332. * right shifts to extract subsets of bits.
  333. *
  334. * The state must be seeded so that it is not everywhere zero.
  335. */
  336. class xoshiro128f final : public detail::xoshiro_base<xoshiro128f, 4, float, std::uint32_t>
  337. {
  338. private:
  339. using Base = detail::xoshiro_base<xoshiro128f, 4, float, std::uint32_t>;
  340. public:
  341. using Base::Base;
  342. inline std::uint32_t next_int() noexcept
  343. {
  344. const std::uint32_t result = state_[0] + state_[3];
  345. const std::uint32_t t = state_[1] << 9;
  346. state_[2] ^= state_[0];
  347. state_[3] ^= state_[1];
  348. state_[1] ^= state_[2];
  349. state_[0] ^= state_[3];
  350. state_[2] ^= t;
  351. state_[3] = boost::core::rotl(state_[3], 11);
  352. return result;
  353. }
  354. inline result_type next() noexcept
  355. {
  356. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  357. return static_cast<float>((next_int() >> 8)) * 0x1.0p-24f;
  358. #else
  359. return static_cast<float>((next_int() >> 8)) * 5.9604645e-08f;
  360. #endif
  361. }
  362. static constexpr result_type (min)() noexcept
  363. {
  364. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  365. return static_cast<float>((std::numeric_limits<std::uint32_t>::min)() >> 8) * 0x1.0p-24f;
  366. #else
  367. return static_cast<float>((std::numeric_limits<std::uint64_t>::min)() >> 8) * 5.9604645e-08f;
  368. #endif
  369. }
  370. static constexpr result_type (max)() noexcept
  371. {
  372. #ifdef BOOST_RANDOM_HAS_HEX_FLOAT
  373. return static_cast<float>((std::numeric_limits<std::uint32_t>::max)() >> 8) * 0x1.0p-24f;
  374. #else
  375. return static_cast<float>((std::numeric_limits<std::uint64_t>::max)() >> 8) * 5.9604645e-08f;
  376. #endif
  377. }
  378. };
  379. } // namespace random
  380. } // namespace boost
  381. #endif //BOOST_RANDOM_XOSHIRO_HPP