naive_monte_carlo.hpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /*
  2. * Copyright Nick Thompson, 2018
  3. * Use, modification and distribution are subject to the
  4. * Boost Software License, Version 1.0. (See accompanying file
  5. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. */
  7. #ifndef BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
  8. #define BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
  9. #include <sstream>
  10. #include <algorithm>
  11. #include <vector>
  12. #include <atomic>
  13. #include <memory>
  14. #include <functional>
  15. #include <future>
  16. #include <thread>
  17. #include <initializer_list>
  18. #include <utility>
  19. #include <random>
  20. #include <chrono>
  21. #include <map>
  22. #include <type_traits>
  23. #include <cstdint>
  24. #include <boost/math/policies/error_handling.hpp>
  25. #include <boost/math/special_functions/fpclassify.hpp>
  26. #ifdef BOOST_NAIVE_MONTE_CARLO_DEBUG_FAILURES
  27. # include <iostream>
  28. #endif
  29. namespace boost { namespace math { namespace quadrature {
  30. namespace detail {
  31. enum class limit_classification {FINITE,
  32. LOWER_BOUND_INFINITE,
  33. UPPER_BOUND_INFINITE,
  34. DOUBLE_INFINITE};
  35. }
  36. template<class Real, class F, class RandomNumberGenerator = std::mt19937_64, class Policy = boost::math::policies::policy<>,
  37. typename std::enable_if<std::is_trivially_copyable<Real>::value, bool>::type = true>
  38. class naive_monte_carlo
  39. {
  40. public:
  41. naive_monte_carlo(const F& integrand,
  42. std::vector<std::pair<Real, Real>> const & bounds,
  43. Real error_goal,
  44. bool singular = true,
  45. std::uint64_t threads = std::thread::hardware_concurrency(),
  46. std::uint64_t seed = 0) noexcept : m_num_threads{threads}, m_seed{seed}, m_volume(1)
  47. {
  48. using std::numeric_limits;
  49. using std::sqrt;
  50. using boost::math::isinf;
  51. std::uint64_t n = bounds.size();
  52. m_lbs.resize(n);
  53. m_dxs.resize(n);
  54. m_limit_types.resize(n);
  55. static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
  56. for (std::uint64_t i = 0; i < n; ++i)
  57. {
  58. if (bounds[i].second <= bounds[i].first)
  59. {
  60. boost::math::policies::raise_domain_error(function, "The upper bound is <= the lower bound.\n", bounds[i].second, Policy());
  61. return;
  62. }
  63. if (isinf(bounds[i].first))
  64. {
  65. if (isinf(bounds[i].second))
  66. {
  67. m_limit_types[i] = detail::limit_classification::DOUBLE_INFINITE;
  68. }
  69. else
  70. {
  71. m_limit_types[i] = detail::limit_classification::LOWER_BOUND_INFINITE;
  72. // Ok ok this is bad to use the second bound as the lower limit and then reflect.
  73. m_lbs[i] = bounds[i].second;
  74. m_dxs[i] = numeric_limits<Real>::quiet_NaN();
  75. }
  76. }
  77. else if (isinf(bounds[i].second))
  78. {
  79. m_limit_types[i] = detail::limit_classification::UPPER_BOUND_INFINITE;
  80. if (singular)
  81. {
  82. // I've found that it's easier to sample on a closed set and perturb the boundary
  83. // than to try to sample very close to the boundary.
  84. m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
  85. }
  86. else
  87. {
  88. m_lbs[i] = bounds[i].first;
  89. }
  90. m_dxs[i] = numeric_limits<Real>::quiet_NaN();
  91. }
  92. else
  93. {
  94. m_limit_types[i] = detail::limit_classification::FINITE;
  95. if (singular)
  96. {
  97. if (bounds[i].first == 0)
  98. {
  99. m_lbs[i] = std::numeric_limits<Real>::epsilon();
  100. }
  101. else
  102. {
  103. m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
  104. }
  105. m_dxs[i] = std::nextafter(bounds[i].second, std::numeric_limits<Real>::lowest()) - m_lbs[i];
  106. }
  107. else
  108. {
  109. m_lbs[i] = bounds[i].first;
  110. m_dxs[i] = bounds[i].second - bounds[i].first;
  111. }
  112. m_volume *= m_dxs[i];
  113. }
  114. }
  115. m_integrand = [this, &integrand](std::vector<Real> & x)->Real
  116. {
  117. Real coeff = m_volume;
  118. for (std::uint64_t i = 0; i < x.size(); ++i)
  119. {
  120. // Variable transformation are listed at:
  121. // https://en.wikipedia.org/wiki/Numerical_integration
  122. // However, we've made some changes to these so that we can evaluate on a compact domain.
  123. if (m_limit_types[i] == detail::limit_classification::FINITE)
  124. {
  125. x[i] = m_lbs[i] + x[i]*m_dxs[i];
  126. }
  127. else if (m_limit_types[i] == detail::limit_classification::UPPER_BOUND_INFINITE)
  128. {
  129. Real t = x[i];
  130. Real z = 1/(1 + numeric_limits<Real>::epsilon() - t);
  131. coeff *= (z*z)*(1 + numeric_limits<Real>::epsilon());
  132. x[i] = m_lbs[i] + t*z;
  133. }
  134. else if (m_limit_types[i] == detail::limit_classification::LOWER_BOUND_INFINITE)
  135. {
  136. Real t = x[i];
  137. Real z = 1/(t+sqrt((numeric_limits<Real>::min)()));
  138. coeff *= (z*z);
  139. x[i] = m_lbs[i] + (t-1)*z;
  140. }
  141. else
  142. {
  143. Real t1 = 1/(1+numeric_limits<Real>::epsilon() - x[i]);
  144. Real t2 = 1/(x[i]+numeric_limits<Real>::epsilon());
  145. x[i] = (2*x[i]-1)*t1*t2/4;
  146. coeff *= (t1*t1+t2*t2)/4;
  147. }
  148. }
  149. return coeff*integrand(x);
  150. };
  151. // If we don't do a single function call in the constructor,
  152. // we can't do a restart.
  153. std::vector<Real> x(m_lbs.size());
  154. // If the seed is zero, that tells us to choose a random seed for the user:
  155. if (seed == 0)
  156. {
  157. std::random_device rd;
  158. seed = rd();
  159. }
  160. RandomNumberGenerator gen(seed);
  161. Real inv_denom = 1/static_cast<Real>(((gen.max)()-(gen.min)()));
  162. m_num_threads = (std::max)(m_num_threads, static_cast<std::uint64_t>(1));
  163. m_thread_calls.reset(new std::atomic<std::uint64_t>[threads]);
  164. m_thread_Ss.reset(new std::atomic<Real>[threads]);
  165. m_thread_averages.reset(new std::atomic<Real>[threads]);
  166. Real avg = 0;
  167. for (std::uint64_t i = 0; i < m_num_threads; ++i)
  168. {
  169. for (std::uint64_t j = 0; j < m_lbs.size(); ++j)
  170. {
  171. x[j] = (gen()-(gen.min)())*inv_denom;
  172. }
  173. Real y = m_integrand(x);
  174. m_thread_averages[i] = y; // relaxed store
  175. m_thread_calls[i] = 1;
  176. m_thread_Ss[i] = 0;
  177. avg += y;
  178. }
  179. avg /= m_num_threads;
  180. m_avg = avg; // relaxed store
  181. m_error_goal = error_goal; // relaxed store
  182. m_start = std::chrono::system_clock::now();
  183. m_done = false; // relaxed store
  184. m_total_calls = m_num_threads; // relaxed store
  185. m_variance = (numeric_limits<Real>::max)();
  186. }
  187. std::future<Real> integrate()
  188. {
  189. // Set done to false in case we wish to restart:
  190. m_done.store(false); // relaxed store, no worker threads yet
  191. m_start = std::chrono::system_clock::now();
  192. return std::async(std::launch::async,
  193. &naive_monte_carlo::m_integrate, this);
  194. }
  195. void cancel()
  196. {
  197. // If seed = 0 (meaning have the routine pick the seed), this leaves the seed the same.
  198. // If seed != 0, then the seed is changed, so a restart doesn't do the exact same thing.
  199. m_seed = m_seed*m_seed;
  200. m_done = true; // relaxed store, worker threads will get the message eventually
  201. // Make sure the error goal is infinite, because otherwise we'll loop when we do the final error goal check:
  202. m_error_goal = (std::numeric_limits<Real>::max)();
  203. }
  204. Real variance() const
  205. {
  206. return m_variance.load();
  207. }
  208. Real current_error_estimate() const
  209. {
  210. using std::sqrt;
  211. //
  212. // There is a bug here: m_variance and m_total_calls get updated asynchronously
  213. // and may be out of synch when we compute the error estimate, not sure if it matters though...
  214. //
  215. return sqrt(m_variance.load()/m_total_calls.load());
  216. }
  217. std::chrono::duration<Real> estimated_time_to_completion() const
  218. {
  219. auto now = std::chrono::system_clock::now();
  220. std::chrono::duration<Real> elapsed_seconds = now - m_start;
  221. Real r = this->current_error_estimate()/m_error_goal.load(); // relaxed load
  222. if (r*r <= 1) {
  223. return 0*elapsed_seconds;
  224. }
  225. return (r*r - 1)*elapsed_seconds;
  226. }
  227. void update_target_error(Real new_target_error)
  228. {
  229. m_error_goal = new_target_error; // relaxed store
  230. }
  231. Real progress() const
  232. {
  233. Real r = m_error_goal.load()/this->current_error_estimate(); // relaxed load
  234. if (r*r >= 1)
  235. {
  236. return 1;
  237. }
  238. return r*r;
  239. }
  240. Real current_estimate() const
  241. {
  242. return m_avg.load();
  243. }
  244. std::uint64_t calls() const
  245. {
  246. return m_total_calls.load(); // relaxed load
  247. }
  248. private:
  249. Real m_integrate()
  250. {
  251. std::uint64_t seed;
  252. // If the user tells us to pick a seed, pick a seed:
  253. if (m_seed == 0)
  254. {
  255. std::random_device rd;
  256. seed = rd();
  257. }
  258. else // use the seed we are given:
  259. {
  260. seed = m_seed;
  261. }
  262. RandomNumberGenerator gen(seed);
  263. int max_repeat_tries = 5;
  264. do{
  265. if (max_repeat_tries < 5)
  266. {
  267. m_done = false;
  268. #ifdef BOOST_NAIVE_MONTE_CARLO_DEBUG_FAILURES
  269. std::cerr << "Failed to achieve required tolerance first time through..\n";
  270. std::cerr << " variance = " << m_variance << std::endl;
  271. std::cerr << " average = " << m_avg << std::endl;
  272. std::cerr << " total calls = " << m_total_calls << std::endl;
  273. for (std::size_t i = 0; i < m_num_threads; ++i)
  274. std::cerr << " thread_calls[" << i << "] = " << m_thread_calls[i] << std::endl;
  275. for (std::size_t i = 0; i < m_num_threads; ++i)
  276. std::cerr << " thread_averages[" << i << "] = " << m_thread_averages[i] << std::endl;
  277. for (std::size_t i = 0; i < m_num_threads; ++i)
  278. std::cerr << " thread_Ss[" << i << "] = " << m_thread_Ss[i] << std::endl;
  279. #endif
  280. }
  281. std::vector<std::thread> threads(m_num_threads);
  282. for (std::uint64_t i = 0; i < threads.size(); ++i)
  283. {
  284. threads[i] = std::thread(&naive_monte_carlo::m_thread_monte, this, i, gen());
  285. }
  286. do {
  287. std::this_thread::sleep_for(std::chrono::milliseconds(100));
  288. std::uint64_t total_calls = 0;
  289. for (std::uint64_t i = 0; i < m_num_threads; ++i)
  290. {
  291. std::uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
  292. total_calls += t_calls;
  293. }
  294. Real variance = 0;
  295. Real avg = 0;
  296. for (std::uint64_t i = 0; i < m_num_threads; ++i)
  297. {
  298. std::uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
  299. // Will this overflow? Not hard to remove . . .
  300. avg += m_thread_averages[i].load(std::memory_order_relaxed)*(static_cast<Real>(t_calls) / static_cast<Real>(total_calls));
  301. variance += m_thread_Ss[i].load(std::memory_order_relaxed);
  302. }
  303. m_avg.store(avg, std::memory_order_release);
  304. m_variance.store(variance / (total_calls - 1), std::memory_order_release);
  305. m_total_calls = total_calls; // relaxed store, it's just for user feedback
  306. // Allow cancellation:
  307. if (m_done) // relaxed load
  308. {
  309. break;
  310. }
  311. } while (m_total_calls < 2048 || this->current_error_estimate() > m_error_goal.load(std::memory_order_consume));
  312. // Error bound met; signal the threads:
  313. m_done = true; // relaxed store, threads will get the message in the end
  314. std::for_each(threads.begin(), threads.end(),
  315. std::mem_fn(&std::thread::join));
  316. if (m_exception)
  317. {
  318. std::rethrow_exception(m_exception);
  319. }
  320. // Incorporate their work into the final estimate:
  321. std::uint64_t total_calls = 0;
  322. for (std::uint64_t i = 0; i < m_num_threads; ++i)
  323. {
  324. std::uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
  325. total_calls += t_calls;
  326. }
  327. Real variance = 0;
  328. Real avg = 0;
  329. for (std::uint64_t i = 0; i < m_num_threads; ++i)
  330. {
  331. std::uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
  332. // Averages weighted by the number of calls the thread made:
  333. avg += m_thread_averages[i].load(std::memory_order_relaxed)*(static_cast<Real>(t_calls) / static_cast<Real>(total_calls));
  334. variance += m_thread_Ss[i].load(std::memory_order_relaxed);
  335. }
  336. m_avg.store(avg, std::memory_order_release);
  337. m_variance.store(variance / (total_calls - 1), std::memory_order_release);
  338. m_total_calls = total_calls; // relaxed store, this is just user feedback
  339. // Sometimes, the master will observe the variance at a very "good" (or bad?) moment,
  340. // Then the threads proceed to find the variance is much greater by the time they hear the message to stop.
  341. // This *WOULD* make sure that the final error estimate is within the error bounds.
  342. }
  343. while ((--max_repeat_tries >= 0) && (this->current_error_estimate() > m_error_goal));
  344. return m_avg.load(std::memory_order_consume);
  345. }
  346. void m_thread_monte(std::uint64_t thread_index, std::uint64_t seed)
  347. {
  348. using std::numeric_limits;
  349. try
  350. {
  351. std::vector<Real> x(m_lbs.size());
  352. RandomNumberGenerator gen(seed);
  353. Real inv_denom = static_cast<Real>(1) / static_cast<Real>(( (gen.max)() - (gen.min)() ));
  354. Real M1 = m_thread_averages[thread_index].load(std::memory_order_consume);
  355. Real S = m_thread_Ss[thread_index].load(std::memory_order_consume);
  356. // Kahan summation is required or the value of the integrand will go on a random walk during long computations.
  357. // See the implementation discussion.
  358. // The idea is that the unstabilized additions have error sigma(f)/sqrt(N) + epsilon*N, which diverges faster than it converges!
  359. // Kahan summation turns this to sigma(f)/sqrt(N) + epsilon^2*N, and the random walk occurs on a timescale of 10^14 years (on current hardware)
  360. Real compensator = 0;
  361. std::uint64_t k = m_thread_calls[thread_index].load(std::memory_order_consume);
  362. while (!m_done) // relaxed load
  363. {
  364. int j = 0;
  365. // If we don't have a certain number of calls before an update, we can easily terminate prematurely
  366. // because the variance estimate is way too low. This magic number is a reasonable compromise, as 1/sqrt(2048) = 0.02,
  367. // so it should recover 2 digits if the integrand isn't poorly behaved, and if it is, it should discover that before premature termination.
  368. // Of course if the user has 64 threads, then this number is probably excessive.
  369. int magic_calls_before_update = 2048;
  370. while (j++ < magic_calls_before_update)
  371. {
  372. for (std::uint64_t i = 0; i < m_lbs.size(); ++i)
  373. {
  374. x[i] = (gen() - (gen.min)())*inv_denom;
  375. }
  376. Real f = m_integrand(x);
  377. using std::isfinite;
  378. if (!isfinite(f))
  379. {
  380. // The call to m_integrand transform x, so this error message states the correct node.
  381. std::stringstream os;
  382. os << "Your integrand was evaluated at {";
  383. for (std::uint64_t i = 0; i < x.size() -1; ++i)
  384. {
  385. os << x[i] << ", ";
  386. }
  387. os << x[x.size() -1] << "}, and returned " << f << std::endl;
  388. static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
  389. boost::math::policies::raise_domain_error(function, os.str().c_str(), /*this is a dummy arg to make it compile*/ 7.2, Policy());
  390. }
  391. ++k;
  392. Real term = (f - M1)/k;
  393. Real y1 = term - compensator;
  394. Real M2 = M1 + y1;
  395. compensator = (M2 - M1) - y1;
  396. S += (f - M1)*(f - M2);
  397. M1 = M2;
  398. }
  399. m_thread_averages[thread_index].store(M1, std::memory_order_release);
  400. m_thread_Ss[thread_index].store(S, std::memory_order_release);
  401. m_thread_calls[thread_index].store(k, std::memory_order_release);
  402. }
  403. }
  404. catch (...)
  405. {
  406. // Signal the other threads that the computation is ruined:
  407. m_done = true; // relaxed store
  408. std::lock_guard<std::mutex> lock(m_exception_mutex); // Scoped lock to prevent race writing to m_exception
  409. m_exception = std::current_exception();
  410. }
  411. }
  412. std::function<Real(std::vector<Real> &)> m_integrand;
  413. std::uint64_t m_num_threads;
  414. std::atomic<std::uint64_t> m_seed;
  415. std::atomic<Real> m_error_goal;
  416. std::atomic<bool> m_done{};
  417. std::vector<Real> m_lbs;
  418. std::vector<Real> m_dxs;
  419. std::vector<detail::limit_classification> m_limit_types;
  420. Real m_volume;
  421. std::atomic<std::uint64_t> m_total_calls{};
  422. // I wanted these to be vectors rather than maps,
  423. // but you can't resize a vector of atomics.
  424. std::unique_ptr<std::atomic<std::uint64_t>[]> m_thread_calls;
  425. std::atomic<Real> m_variance;
  426. std::unique_ptr<std::atomic<Real>[]> m_thread_Ss;
  427. std::atomic<Real> m_avg;
  428. std::unique_ptr<std::atomic<Real>[]> m_thread_averages;
  429. std::chrono::time_point<std::chrono::system_clock> m_start;
  430. std::exception_ptr m_exception;
  431. std::mutex m_exception_mutex;
  432. };
  433. }}}
  434. #endif