fast_multiblock64_avx2.hpp 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /* Copyright 2025 Joaquin M Lopez Munoz.
  2. * Distributed under the Boost Software License, Version 1.0.
  3. * (See accompanying file LICENSE_1_0.txt or copy at
  4. * http://www.boost.org/LICENSE_1_0.txt)
  5. *
  6. * See https://www.boost.org/libs/bloom for library home page.
  7. */
  8. #ifndef BOOST_BLOOM_DETAIL_FAST_MULTIBLOCK64_AVX2_HPP
  9. #define BOOST_BLOOM_DETAIL_FAST_MULTIBLOCK64_AVX2_HPP
  10. #include <boost/bloom/detail/avx2.hpp>
  11. #include <boost/bloom/detail/multiblock_fpr_base.hpp>
  12. #include <boost/bloom/detail/mulx64.hpp>
  13. #include <boost/config.hpp>
  14. #include <boost/config/workaround.hpp>
  15. #include <cstddef>
  16. #include <cstdint>
  17. namespace boost{
  18. namespace bloom{
  19. #if defined(BOOST_MSVC)
  20. #pragma warning(push)
  21. #pragma warning(disable:4714) /* marked as __forceinline not inlined */
  22. #endif
  23. namespace detail{
  24. struct m256ix2
  25. {
  26. __m256i lo,hi;
  27. };
  28. } /* namespace detail */
  29. template<std::size_t K>
  30. struct fast_multiblock64:detail::multiblock_fpr_base<K>
  31. {
  32. static constexpr std::size_t k=K;
  33. using value_type=detail::m256ix2[(k+7)/8];
  34. static constexpr std::size_t used_value_size=sizeof(std::uint64_t)*k;
  35. static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
  36. {
  37. for(int i=0;i<k/8;++i){
  38. mark_m256ix2(x[i],hash,8);
  39. hash=detail::mulx64(hash);
  40. }
  41. if(k%8){
  42. mark_m256ix2(x[k/8],hash,k%8);
  43. }
  44. }
  45. static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
  46. {
  47. bool res=true;
  48. for(int i=0;i<k/8;++i){
  49. res&=check_m256ix2(x[i],hash,8);
  50. hash=detail::mulx64(hash);
  51. }
  52. if(k%8){
  53. res&=check_m256ix2(x[k/8],hash,k%8);
  54. }
  55. return res;
  56. }
  57. private:
  58. static BOOST_FORCEINLINE detail::m256ix2 make_m256ix2(
  59. std::uint64_t hash,std::size_t kp)
  60. {
  61. const detail::m256ix2 ones[8]={
  62. {_mm256_set_epi64x(0,0,0,1),_mm256_set_epi64x(0,0,0,0)},
  63. {_mm256_set_epi64x(0,0,1,1),_mm256_set_epi64x(0,0,0,0)},
  64. {_mm256_set_epi64x(0,1,1,1),_mm256_set_epi64x(0,0,0,0)},
  65. {_mm256_set_epi64x(1,1,1,1),_mm256_set_epi64x(0,0,0,0)},
  66. {_mm256_set_epi64x(1,1,1,1),_mm256_set_epi64x(0,0,0,1)},
  67. {_mm256_set_epi64x(1,1,1,1),_mm256_set_epi64x(0,0,1,1)},
  68. {_mm256_set_epi64x(1,1,1,1),_mm256_set_epi64x(0,1,1,1)},
  69. {_mm256_set_epi64x(1,1,1,1),_mm256_set_epi64x(1,1,1,1)},
  70. };
  71. __m256i h=_mm256_set1_epi64x(hash);
  72. h=_mm256_sllv_epi64(h,_mm256_set_epi64x(18,12,6,0));
  73. h=_mm256_srli_epi32(h,32-6);
  74. return {
  75. _mm256_sllv_epi64(
  76. ones[kp-1].lo,_mm256_cvtepu32_epi64(_mm256_extracti128_si256(h,0))),
  77. kp<=4?
  78. _mm256_set1_epi64x(0):
  79. _mm256_sllv_epi64(
  80. ones[kp-1].hi,_mm256_cvtepu32_epi64(_mm256_extracti128_si256(h,1)))
  81. };
  82. }
  83. static BOOST_FORCEINLINE void mark_m256ix2(
  84. detail::m256ix2& x,std::uint64_t hash,std::size_t kp)
  85. {
  86. detail::m256ix2 h=make_m256ix2(hash,kp);
  87. x.lo=_mm256_or_si256(x.lo,h.lo);
  88. if(kp>4)x.hi=_mm256_or_si256(x.hi,h.hi);
  89. }
  90. #if BOOST_WORKAROUND(BOOST_MSVC,<=1900)
  91. /* 'int': forcing value to bool 'true' or 'false' */
  92. #pragma warning(push)
  93. #pragma warning(disable:4800)
  94. #endif
  95. static BOOST_FORCEINLINE bool check_m256ix2(
  96. const detail::m256ix2& x,std::uint64_t hash,std::size_t kp)
  97. {
  98. detail::m256ix2 h=make_m256ix2(hash,kp);
  99. auto res=_mm256_testc_si256(x.lo,h.lo);
  100. if(kp>4)res&=_mm256_testc_si256(x.hi,h.hi);
  101. return res;
  102. }
  103. #if BOOST_WORKAROUND(BOOST_MSVC,<=1900)
  104. #pragma warning(pop) /* C4800 */
  105. #endif
  106. };
  107. #if defined(BOOST_MSVC)
  108. #pragma warning(pop) /* C4714 */
  109. #endif
  110. } /* namespace bloom */
  111. } /* namespace boost */
  112. #endif