fast_multiblock32_neon.hpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /* Copyright 2025 Joaquin M Lopez Munoz.
  2. * Distributed under the Boost Software License, Version 1.0.
  3. * (See accompanying file LICENSE_1_0.txt or copy at
  4. * http://www.boost.org/LICENSE_1_0.txt)
  5. *
  6. * See https://www.boost.org/libs/bloom for library home page.
  7. */
  8. #ifndef BOOST_BLOOM_DETAIL_FAST_MULTIBLOCK32_NEON_HPP
  9. #define BOOST_BLOOM_DETAIL_FAST_MULTIBLOCK32_NEON_HPP
  10. #include <boost/bloom/detail/multiblock_fpr_base.hpp>
  11. #include <boost/bloom/detail/mulx64.hpp>
  12. #include <boost/bloom/detail/neon.hpp>
  13. #include <boost/config.hpp>
  14. #include <cstddef>
  15. #include <cstdint>
  16. namespace boost{
  17. namespace bloom{
  18. #if defined(BOOST_MSVC)
  19. #pragma warning(push)
  20. #pragma warning(disable:4714) /* marked as __forceinline not inlined */
  21. #endif
  22. /* https://stackoverflow.com/a/54018882/213114 */
  23. #ifdef _MSC_VER
  24. #define BOOST_BLOOM_INIT_U32X4(w,x,y,z) \
  25. {(std::uint32_t(w)+(unsigned long long(x)<<32)), \
  26. (std::uint32_t(y)+(unsigned long long(z)<<32))}
  27. #else
  28. #define BOOST_BLOOM_INIT_U32X4(w,x,y,z) \
  29. {std::uint32_t(w),std::uint32_t(x),std::uint32_t(y),std::uint32_t(z)}
  30. #endif
  31. #define BOOST_BLOOM_INIT_U32X4X2(w0,x0,y0,z0,w1,x1,y1,z1) \
  32. {{BOOST_BLOOM_INIT_U32X4(w0,x0,y0,z0),BOOST_BLOOM_INIT_U32X4(w1,x1,y1,z1)}}
  33. template<std::size_t K>
  34. struct fast_multiblock32:detail::multiblock_fpr_base<K>
  35. {
  36. static constexpr std::size_t k=K;
  37. using value_type=uint32x4x2_t[(k+7)/8];
  38. static constexpr std::size_t used_value_size=sizeof(std::uint32_t)*k;
  39. static BOOST_FORCEINLINE void mark(value_type& x,std::uint64_t hash)
  40. {
  41. for(std::size_t i=0;i<k/8;++i){
  42. mark_uint32x4x2_t(x[i],hash,8);
  43. hash=detail::mulx64(hash);
  44. }
  45. if(k%8){
  46. mark_uint32x4x2_t(x[k/8],hash,k%8);
  47. }
  48. }
  49. static BOOST_FORCEINLINE bool check(const value_type& x,std::uint64_t hash)
  50. {
  51. bool res=true;
  52. for(std::size_t i=0;i<k/8;++i){
  53. res&=check_uint32x4x2_t(x[i],hash,8);
  54. hash=detail::mulx64(hash);
  55. }
  56. if(k%8){
  57. res&=check_uint32x4x2_t(x[k/8],hash,k%8);
  58. }
  59. return res;
  60. }
  61. private:
  62. static BOOST_FORCEINLINE uint32x4x2_t make_uint32x4x2_t(
  63. std::uint64_t hash,std::size_t kp)
  64. {
  65. static const uint32x4x2_t ones[8]={
  66. BOOST_BLOOM_INIT_U32X4X2(1,0,0,0,0,0,0,0),
  67. BOOST_BLOOM_INIT_U32X4X2(1,1,0,0,0,0,0,0),
  68. BOOST_BLOOM_INIT_U32X4X2(1,1,1,0,0,0,0,0),
  69. BOOST_BLOOM_INIT_U32X4X2(1,1,1,1,0,0,0,0),
  70. BOOST_BLOOM_INIT_U32X4X2(1,1,1,1,1,0,0,0),
  71. BOOST_BLOOM_INIT_U32X4X2(1,1,1,1,1,1,0,0),
  72. BOOST_BLOOM_INIT_U32X4X2(1,1,1,1,1,1,1,0),
  73. BOOST_BLOOM_INIT_U32X4X2(1,1,1,1,1,1,1,1)
  74. };
  75. uint32x4_t h_lo=vreinterpretq_u32_u64(vdupq_n_u64(hash)),
  76. h_hi=h_lo;
  77. h_lo=vreinterpretq_u32_u64(
  78. vshlq_u64(vreinterpretq_u64_u32(h_lo),(int64x2_t{0,5})));
  79. h_hi=vreinterpretq_u32_u64(
  80. vshlq_u64(vreinterpretq_u64_u32(h_hi),(int64x2_t{10,15})));
  81. h_lo=vshrq_n_u32(h_lo,32-5);
  82. h_hi=vshrq_n_u32(h_hi,32-5);
  83. return {
  84. vshlq_u32(ones[kp-1].val[0],vreinterpretq_s32_u32(h_lo)),
  85. vshlq_u32(ones[kp-1].val[1],vreinterpretq_s32_u32(h_hi))
  86. };
  87. }
  88. static BOOST_FORCEINLINE void mark_uint32x4x2_t(
  89. uint32x4x2_t& x,std::uint64_t hash,std::size_t kp)
  90. {
  91. uint32x4x2_t h=make_uint32x4x2_t(hash,kp);
  92. x.val[0]=vorrq_u32(x.val[0],h.val[0]);
  93. x.val[1]=vorrq_u32(x.val[1],h.val[1]);
  94. }
  95. static BOOST_FORCEINLINE bool check_uint32x4x2_t(
  96. const uint32x4x2_t& x,std::uint64_t hash,std::size_t kp)
  97. {
  98. uint32x4x2_t h=make_uint32x4x2_t(hash,kp);
  99. uint32x4_t lo=vtstq_u32(x.val[0],h.val[0]);
  100. uint32x4_t hi=vtstq_u32(x.val[1],h.val[1]);
  101. if(kp!=8){
  102. static const uint32x4x2_t masks[7]={
  103. BOOST_BLOOM_INIT_U32X4X2( 0,-1,-1,-1,-1,-1,-1,-1),
  104. BOOST_BLOOM_INIT_U32X4X2( 0, 0,-1,-1,-1,-1,-1,-1),
  105. BOOST_BLOOM_INIT_U32X4X2( 0, 0, 0,-1,-1,-1,-1,-1),
  106. BOOST_BLOOM_INIT_U32X4X2( 0, 0, 0, 0,-1,-1,-1,-1),
  107. BOOST_BLOOM_INIT_U32X4X2( 0, 0, 0, 0, 0,-1,-1,-1),
  108. BOOST_BLOOM_INIT_U32X4X2( 0, 0, 0, 0, 0, 0,-1,-1),
  109. BOOST_BLOOM_INIT_U32X4X2( 0, 0, 0, 0, 0, 0, 0,-1)
  110. };
  111. lo=vorrq_u32(lo,masks[kp-1].val[0]);
  112. hi=vorrq_u32(hi,masks[kp-1].val[1]);
  113. }
  114. int64x2_t res=vreinterpretq_s64_u32(vandq_u32(lo,hi));
  115. return (vgetq_lane_s64(res,0)&vgetq_lane_s64(res,1))==-1;
  116. }
  117. };
  118. #undef BOOST_BLOOM_INIT_U32X4X2
  119. #undef BOOST_BLOOM_INIT_U32X4
  120. #if defined(BOOST_MSVC)
  121. #pragma warning(pop) /* C4714 */
  122. #endif
  123. } /* namespace bloom */
  124. } /* namespace boost */
  125. #endif