fast_perfect_hash.hpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. // Copyright (c) 2018-2025 Jean-Louis Leroy
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // See accompanying file LICENSE_1_0.txt
  4. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #ifndef BOOST_OPENMETHOD_POLICY_FAST_PERFECT_HASH_HPP
  6. #define BOOST_OPENMETHOD_POLICY_FAST_PERFECT_HASH_HPP
  7. #include <boost/openmethod/preamble.hpp>
  8. #include <limits>
  9. #include <random>
  10. #ifdef _MSC_VER
  11. #pragma warning(push)
  12. #pragma warning(disable : 4702) // unreachable code
  13. #endif
  14. namespace boost::openmethod {
  15. namespace detail {
  16. #if defined(UINTPTR_MAX)
  17. using uintptr = std::uintptr_t;
  18. constexpr uintptr uintptr_max = UINTPTR_MAX;
  19. #else
  20. static_assert(
  21. sizeof(std::size_t) == sizeof(void*),
  22. "This implementation requires that size_t and void* have the same size.");
  23. using uintptr = std::size_t;
  24. constexpr uintptr uintptr_max = (std::numeric_limits<std::size_t>::max)();
  25. #endif
  26. template<class Registry>
  27. std::vector<type_id> fast_perfect_hash_control;
  28. } // namespace detail
  29. namespace policies {
  30. //! Hash type ids using a fast, perfect hash function.
  31. //!
  32. //! `fast_perfect_hash` implements the @ref type_hash policy using a hash
  33. //! function in the form `H(x)=(M*x)>>S`. It attempts to determine values for
  34. //! `M` and `S` that do not result in collisions for the set of registered
  35. //! type_ids. This may fail for certain sets of inputs, although it is very
  36. //! likely to succeed for addresses of `std::type_info` objects.
  37. //!
  38. //! There is no guarantee that every value in the codomain of the function
  39. //! corresponds to a value in the domain, or even that the codomain is a dense
  40. //! range of integers. In other words, a lot of space may be wasted in presence
  41. //! of large sets of type_ids.
  42. struct fast_perfect_hash : type_hash {
  43. //! Cannot find hash factors
  44. struct search_error : openmethod_error {
  45. //! Number of attempts to find hash factors
  46. std::size_t attempts;
  47. //! Number of buckets used in the last attempt
  48. std::size_t buckets;
  49. //! Write a short description to an output stream
  50. //! @param os The output stream
  51. //! @tparam Registry The registry
  52. //! @tparam Stream A @ref LightweightOutputStream
  53. template<class Registry, class Stream>
  54. auto write(Stream& os) const -> void;
  55. };
  56. using errors = std::variant<search_error>;
  57. //! A TypeHashFn metafunction.
  58. //!
  59. //! @tparam Registry The registry containing this policy
  60. template<class Registry>
  61. class fn {
  62. static std::size_t mult;
  63. static std::size_t shift;
  64. static std::size_t min_value;
  65. static std::size_t max_value;
  66. static void check(std::size_t index, type_id type);
  67. template<class InitializeContext, class... Options>
  68. static void initialize(
  69. const InitializeContext& ctx, std::vector<type_id>& buckets,
  70. const std::tuple<Options...>& options);
  71. public:
  72. //! Find the hash factors
  73. //!
  74. //! Attempts to find suitable values for the multiplication factor `M`
  75. //! and the shift amount `S` to that do not result in collisions for the
  76. //! specified input values.
  77. //!
  78. //! If no suitable values are found, calls the error handler with
  79. //! a @ref hash_error object then calls `abort`.
  80. //!
  81. //! @tparam Context An @ref InitializeContext.
  82. //! @param ctx A Context object.
  83. //! @return A pair containing the minimum and maximum hash values.
  84. template<class Context, class... Options>
  85. static auto
  86. initialize(const Context& ctx, const std::tuple<Options...>& options) {
  87. if constexpr (Registry::has_runtime_checks) {
  88. initialize(
  89. ctx, detail::fast_perfect_hash_control<Registry>, options);
  90. } else {
  91. std::vector<type_id> buckets;
  92. initialize(ctx, buckets, options);
  93. }
  94. return std::pair{min_value, max_value};
  95. }
  96. //! Hash a type id
  97. //!
  98. //! Hash a type id.
  99. //!
  100. //! If `Registry` contains the @ref runtime_checks policy, checks that
  101. //! the type id is valid, i.e. if it was present in the set passed to
  102. //! @ref initialize. Its absence indicates that a class involved in a
  103. //! method definition, method overrider, or method call was not
  104. //! registered. In this case, signal a @ref missing_class using
  105. //! the registry's @ref error_handler if present; then calls `abort`.
  106. //!
  107. //! @param type The type_id to hash
  108. //! @return The hash value
  109. BOOST_FORCEINLINE
  110. static auto hash(type_id type) -> std::size_t {
  111. auto index =
  112. (mult * reinterpret_cast<detail::uintptr>(type)) >> shift;
  113. if constexpr (Registry::has_runtime_checks) {
  114. check(index, type);
  115. }
  116. return index;
  117. }
  118. //! Releases the memory allocated by `initialize`.
  119. //!
  120. //! @tparam Options... Zero or more option types, deduced from the function
  121. //! arguments.
  122. //! @param options Zero or more option objects.
  123. template<class... Options>
  124. static auto finalize(const std::tuple<Options...>&) -> void {
  125. detail::fast_perfect_hash_control<Registry>.clear();
  126. }
  127. };
  128. };
  129. template<class Registry>
  130. std::size_t fast_perfect_hash::fn<Registry>::mult;
  131. template<class Registry>
  132. std::size_t fast_perfect_hash::fn<Registry>::shift;
  133. template<class Registry>
  134. std::size_t fast_perfect_hash::fn<Registry>::min_value;
  135. template<class Registry>
  136. std::size_t fast_perfect_hash::fn<Registry>::max_value;
  137. template<class Registry>
  138. template<class InitializeContext, class... Options>
  139. void fast_perfect_hash::fn<Registry>::initialize(
  140. const InitializeContext& ctx, std::vector<type_id>& buckets,
  141. const std::tuple<Options...>& options) {
  142. (void)options;
  143. const auto N = std::distance(ctx.classes_begin(), ctx.classes_end());
  144. if constexpr (mp11::mp_contains<mp11::mp_list<Options...>, trace>::value) {
  145. Registry::output::os << "Finding hash factor for " << N << " types\n";
  146. }
  147. std::default_random_engine rnd(13081963);
  148. std::size_t total_attempts = 0;
  149. std::size_t M = 1;
  150. for (auto size = N * 5 / 4; size >>= 1;) {
  151. ++M;
  152. }
  153. std::uniform_int_distribution<std::size_t> uniform_dist;
  154. for (std::size_t pass = 0; pass < 4; ++pass, ++M) {
  155. shift = 8 * sizeof(type_id) - M;
  156. auto hash_size = 1 << M;
  157. min_value = (std::numeric_limits<std::size_t>::max)();
  158. max_value = (std::numeric_limits<std::size_t>::min)();
  159. if constexpr (InitializeContext::template has_option<trace>) {
  160. ctx.tr << " trying with M = " << M << ", " << hash_size
  161. << " buckets\n";
  162. }
  163. std::size_t attempts = 0;
  164. buckets.resize(hash_size);
  165. while (attempts < 100000) {
  166. std::fill(
  167. buckets.begin(), buckets.end(), type_id(detail::uintptr_max));
  168. ++attempts;
  169. ++total_attempts;
  170. mult = uniform_dist(rnd) | 1;
  171. for (auto iter = ctx.classes_begin(); iter != ctx.classes_end();
  172. ++iter) {
  173. for (auto type_iter = iter->type_id_begin();
  174. type_iter != iter->type_id_end(); ++type_iter) {
  175. auto type = *type_iter;
  176. auto index = (detail::uintptr(type) * mult) >> shift;
  177. min_value = (std::min)(min_value, index);
  178. max_value = (std::max)(max_value, index);
  179. if (detail::uintptr(buckets[index]) !=
  180. detail::uintptr_max) {
  181. goto collision;
  182. }
  183. buckets[index] = type;
  184. }
  185. }
  186. if constexpr (InitializeContext::template has_option<trace>) {
  187. ctx.tr << " found " << mult << " after " << total_attempts
  188. << " attempts; span = [" << min_value << ", "
  189. << max_value << "]\n";
  190. }
  191. return;
  192. collision: {}
  193. }
  194. }
  195. search_error error;
  196. error.attempts = total_attempts;
  197. error.buckets = std::size_t(1) << M;
  198. if constexpr (Registry::has_error_handler) {
  199. Registry::error_handler::error(error);
  200. }
  201. abort();
  202. }
  203. template<class Registry>
  204. void fast_perfect_hash::fn<Registry>::check(std::size_t index, type_id type) {
  205. if (index < min_value || index > max_value ||
  206. detail::fast_perfect_hash_control<Registry>[index] != type) {
  207. if constexpr (Registry::has_error_handler) {
  208. missing_class error;
  209. error.type = type;
  210. Registry::error_handler::error(error);
  211. }
  212. abort();
  213. }
  214. }
  215. template<class Registry, class Stream>
  216. auto fast_perfect_hash::search_error::write(Stream& os) const -> void {
  217. os << "could not find hash factors after " << attempts << "s using "
  218. << buckets << " buckets\n";
  219. }
  220. } // namespace policies
  221. } // namespace boost::openmethod
  222. #endif