fast_multiblock32_sse2.hpp 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /* Copyright 2025 Joaquin M Lopez Munoz.
  2. * Distributed under the Boost Software License, Version 1.0.
  3. * (See accompanying file LICENSE_1_0.txt or copy at
  4. * http://www.boost.org/LICENSE_1_0.txt)
  5. *
  6. * See https://www.boost.org/libs/bloom for library home page.
  7. */
  8. #ifndef BOOST_BLOOM_DETAIL_FAST_MULTIBLOCK32_SSE2_HPP
  9. #define BOOST_BLOOM_DETAIL_FAST_MULTIBLOCK32_SSE2_HPP
  10. #include <boost/bloom/detail/multiblock_fpr_base.hpp>
  11. #include <boost/bloom/detail/mulx64.hpp>
  12. #include <boost/bloom/detail/sse2.hpp>
  13. #include <boost/config.hpp>
  14. #include <boost/config/workaround.hpp>
  15. #include <cstddef>
  16. #include <cstdint>
  17. #ifdef __SSE4_1__
  18. #include <smmintrin.h>
  19. #endif
  20. namespace boost{
  21. namespace bloom{
  22. #if defined(BOOST_MSVC)
  23. #pragma warning(push)
  24. #pragma warning(disable:4714) /* marked as __forceinline not inlined */
  25. #endif
  26. namespace detail{
  27. struct m128ix2
  28. {
  29. __m128i lo,hi;
  30. };
  31. /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
  32. static inline int mm_testc_si128(__m128i x,__m128i y)
  33. {
  34. #ifdef __SSE4_1__
  35. return _mm_testc_si128(x,y);
  36. #else
  37. return _mm_movemask_epi8(_mm_cmpeq_epi32(_mm_and_si128(x,y),y))==0xFFFF;
  38. #endif
  39. }
  40. } /* namespace detail */
  41. template<std::size_t K>
  42. struct fast_multiblock32:detail::multiblock_fpr_base<K>
  43. {
  44. static constexpr std::size_t k=K;
  45. using value_type=detail::m128ix2[(k+7)/8];
  46. static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;
  47. static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
  48. {
  49. for(std::size_t i=0;i<k/8;++i){
  50. mark_m128ix2(x[i],hash,8);
  51. hash=detail::mulx64(hash);
  52. }
  53. if(k%8){
  54. mark_m128ix2(x[k/8],hash,k%8);
  55. }
  56. }
  57. static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
  58. {
  59. bool res=true;
  60. for(std::size_t i=0;i<k/8;++i){
  61. res&=check_m128ix2(x[i],hash,8);
  62. hash=detail::mulx64(hash);
  63. }
  64. if(k%8){
  65. res&=check_m128ix2(x[k/8],hash,k%8);
  66. }
  67. return res;
  68. }
  69. private:
  70. static BOOST_FORCEINLINE detail::m128ix2 make_m128ix2(
  71. std::uint64_t hash,std::size_t kp)
  72. {
  73. const std::uint32_t mask=std::uint32_t(31)<<23,
  74. exp=std::uint32_t(127)<<23;
  75. const __m128i exps[4]={
  76. _mm_set_epi32( 0 , 0 , 0 ,exp),
  77. _mm_set_epi32( 0 , 0 ,exp,exp),
  78. _mm_set_epi32( 0 ,exp,exp,exp),
  79. _mm_set_epi32(exp,exp,exp,exp),
  80. };
  81. if(kp<=4){
  82. __m128i h_lo=_mm_set_epi64x(hash<<5,hash);
  83. h_lo=_mm_and_si128(h_lo,_mm_set1_epi32(mask));
  84. h_lo=_mm_add_epi32(h_lo,exps[kp-1]);
  85. return {
  86. _mm_cvttps_epi32(*(__m128*)&h_lo),
  87. _mm_set1_epi32(0)
  88. };
  89. }
  90. else{
  91. __m128i h_lo=_mm_set_epi64x(hash<<5,hash),
  92. h_hi=_mm_slli_si128(h_lo,2);
  93. h_lo=_mm_and_si128(h_lo,_mm_set1_epi32(mask));
  94. h_hi=_mm_and_si128(h_hi,_mm_set1_epi32(mask));
  95. h_lo=_mm_add_epi32(h_lo,_mm_set1_epi32(exp));
  96. h_hi=_mm_add_epi32(h_hi,exps[kp-5]);
  97. return {
  98. _mm_cvttps_epi32(*(__m128*)&h_lo),
  99. _mm_cvttps_epi32(*(__m128*)&h_hi)
  100. };
  101. }
  102. }
  103. static BOOST_FORCEINLINE void mark_m128ix2(
  104. detail::m128ix2& x,std::uint64_t hash,std::size_t kp)
  105. {
  106. detail::m128ix2 h=make_m128ix2(hash,kp);
  107. x.lo=_mm_or_si128(x.lo,h.lo);
  108. if(kp>4)x.hi=_mm_or_si128(x.hi,h.hi);
  109. }
  110. #if BOOST_WORKAROUND(BOOST_MSVC,<=1900)
  111. /* 'int': forcing value to bool 'true' or 'false' */
  112. #pragma warning(push)
  113. #pragma warning(disable:4800)
  114. #endif
  115. static BOOST_FORCEINLINE bool check_m128ix2(
  116. const detail::m128ix2& x,std::uint64_t hash,std::size_t kp)
  117. {
  118. detail::m128ix2 h=make_m128ix2(hash,kp);
  119. auto res=detail::mm_testc_si128(x.lo,h.lo);
  120. if(kp>4)res&=detail::mm_testc_si128(x.hi,h.hi);
  121. return res;
  122. }
  123. #if BOOST_WORKAROUND(BOOST_MSVC,<=1900)
  124. #pragma warning(pop) /* C4800 */
  125. #endif
  126. };
  127. #if defined(BOOST_MSVC)
  128. #pragma warning(pop) /* C4714 */
  129. #endif
  130. } /* namespace bloom */
  131. } /* namespace boost */
  132. #endif