core.hpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. /* Common base for all boost::bloom::filter instantiations.
  2. *
  3. * Copyright 2025 Joaquin M Lopez Munoz.
  4. * Distributed under the Boost Software License, Version 1.0.
  5. * (See accompanying file LICENSE_1_0.txt or copy at
  6. * http://www.boost.org/LICENSE_1_0.txt)
  7. *
  8. * See https://www.boost.org/libs/bloom for library home page.
  9. */
  10. #ifndef BOOST_BLOOM_DETAIL_CORE_HPP
  11. #define BOOST_BLOOM_DETAIL_CORE_HPP
  12. #include <algorithm>
  13. #include <boost/assert.hpp>
  14. #include <boost/bloom/detail/mulx64.hpp>
  15. #include <boost/bloom/detail/sse2.hpp>
  16. #include <boost/config.hpp>
  17. #include <boost/core/allocator_traits.hpp>
  18. #include <boost/core/bit.hpp>
  19. #include <boost/core/empty_value.hpp>
  20. #include <boost/core/span.hpp>
  21. #include <boost/throw_exception.hpp>
  22. #include <climits>
  23. #include <cmath>
  24. #include <cstdint>
  25. #include <cstring>
  26. #include <limits>
  27. #include <memory>
  28. #include <stdexcept>
  29. #include <tuple>
  30. #include <type_traits>
  31. #include <utility>
  32. #ifdef __has_builtin
  33. #define BOOST_BLOOM_HAS_BUILTIN(x) __has_builtin(x)
  34. #else
  35. #define BOOST_BLOOM_HAS_BUILTIN(x) 0
  36. #endif
  37. #if !defined(NDEBUG)
  38. #define BOOST_BLOOM_ASSUME(cond) BOOST_ASSERT(cond)
  39. #elif BOOST_BLOOM_HAS_BUILTIN(__builtin_assume)
  40. #define BOOST_BLOOM_ASSUME(cond) __builtin_assume(cond)
  41. #elif defined(__GNUC__) || BOOST_BLOOM_HAS_BUILTIN(__builtin_unreachable)
  42. #define BOOST_BLOOM_ASSUME(cond) \
  43. do{ \
  44. if(!(cond))__builtin_unreachable(); \
  45. }while(0)
  46. #elif defined(_MSC_VER)
  47. #define BOOST_BLOOM_ASSUME(cond) __assume(cond)
  48. #else
  49. #define BOOST_BLOOM_ASSUME(cond) \
  50. do{ \
  51. static_cast<void>(false&&(cond)); \
  52. }while(0)
  53. #endif
  54. /* We use BOOST_BLOOM_PREFETCH[_WRITE] macros rather than proper
  55. * functions because of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109985
  56. */
  57. #if defined(BOOST_GCC)||defined(BOOST_CLANG)
  58. #define BOOST_BLOOM_PREFETCH(p) __builtin_prefetch((const char*)(p))
  59. #define BOOST_BLOOM_PREFETCH_WRITE(p) __builtin_prefetch((const char*)(p),1)
  60. #elif defined(BOOST_BLOOM_SSE2)
  61. #define BOOST_BLOOM_PREFETCH(p) _mm_prefetch((const char*)(p),_MM_HINT_T0)
  62. #if defined(_MM_HINT_ET0)
  63. #define BOOST_BLOOM_PREFETCH_WRITE(p) \
  64. _mm_prefetch((const char*)(p),_MM_HINT_ET0)
  65. #else
  66. #define BOOST_BLOOM_PREFETCH_WRITE(p) \
  67. _mm_prefetch((const char*)(p),_MM_HINT_T0)
  68. #endif
  69. #else
  70. #define BOOST_BLOOM_PREFETCH(p) ((void)(p))
  71. #define BOOST_BLOOM_PREFETCH_WRITE(p) ((void)(p))
  72. #endif
  73. namespace boost{
  74. namespace bloom{
  75. namespace detail{
  76. #if defined(BOOST_MSVC)
  77. #pragma warning(push)
  78. #pragma warning(disable:4714) /* marked as __forceinline not inlined */
  79. #endif
  80. /* fastrange_and_mcg produces (pos,hash') from hash as follows:
  81. * - pos=high(mulx64(hash,range))
  82. * - hash'=c*m
  83. * pos is uniformly distributed in [0,range) (see Lemire 2018
  84. * https://arxiv.org/pdf/1805.10941), whereas hash'<-hash is a multiplicative
  85. * congruential generator using well-behaved multipliers c from Steele and
  86. * Vigna 2021 https://arxiv.org/pdf/2001.05304 . To ensure the MCG generates
  87. * long cycles the initial value of hash is adjusted to be odd, which implies
  88. * that the least significant of hash' is always one. In general, the low bits
  89. * of MCG-produced values are of low quality and we don't use them downstream.
  90. */
  91. struct fastrange_and_mcg
  92. {
  93. constexpr fastrange_and_mcg(std::size_t m)noexcept:rng{m}{}
  94. /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
  95. inline constexpr std::size_t range()const noexcept{return (std::size_t)rng;}
  96. /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
  97. inline void prepare_hash(std::uint64_t& hash)const noexcept
  98. {
  99. hash|=1u;
  100. }
  101. /* NOLINTNEXTLINE(readability-redundant-inline-specifier) */
  102. inline std::size_t next_position(std::uint64_t& hash)const noexcept
  103. {
  104. boost::uint64_t hi;
  105. umul128(hash,rng,hi);
  106. #if ((((SIZE_MAX>>16)>>16)>>16)>>15)!=0 /* 64-bit mode (or higher) */
  107. hash*=0xf1357aea2e62a9c5ull;
  108. #else /* 32-bit mode */
  109. hash*=0xe817fb2d;
  110. #endif
  111. return (std::size_t)hi;
  112. }
  113. std::uint64_t rng;
  114. };
  115. /* used_value_size<Subfilter>::value is Subfilter::used_value_size if it
  116. * exists, or sizeof(Subfilter::value_type) otherwise. This covers the
  117. * case where a subfilter only operates on the first bytes of its entire
  118. * value_type (e.g. fast_multiblock32<K> with K<8).
  119. */
  120. template<typename Subfilter,typename=void>
  121. struct used_value_size
  122. {
  123. static constexpr std::size_t value=sizeof(typename Subfilter::value_type);
  124. };
  125. template<typename Subfilter>
  126. struct used_value_size<
  127. Subfilter,
  128. typename std::enable_if<Subfilter::used_value_size!=0>::type
  129. >
  130. {
  131. static constexpr std::size_t value=Subfilter::used_value_size;
  132. };
  133. /* GCD with x,p > 1, p a power of two */
  134. constexpr std::size_t gcd_pow2(std::size_t x,std::size_t p)
  135. {
  136. /* x&-x: maximum power of two dividing x */
  137. return (x&(0-x))<p?(x&(0-x)):p;
  138. }
  139. /* std::ldexp is not constexpr in C++11 */
  140. constexpr double constexpr_ldexp_1_positive(int exp)
  141. {
  142. return exp==0?1.0:2.0*constexpr_ldexp_1_positive(exp-1);
  143. }
  144. inline unsigned int unchecked_countr_zero(std::uint64_t x)
  145. {
  146. #if defined(BOOST_MSVC)&&(defined(_M_X64)||defined(_M_ARM64))
  147. unsigned long r;
  148. _BitScanForward64(&r,x);
  149. return (unsigned int)r;
  150. #elif defined(BOOST_GCC)||defined(BOOST_CLANG)
  151. return (unsigned int)__builtin_ctzll(x);
  152. #else
  153. BOOST_BLOOM_ASSUME(x!=0);
  154. return (unsigned int)boost::core::countr_zero(x);
  155. #endif
  156. }
  157. struct filter_array
  158. {
  159. unsigned char* data;
  160. unsigned char* array; /* adjusted from data for proper alignment */
  161. };
  162. struct if_constexpr_void_else{void operator()()const{}};
  163. template<bool B,typename F,typename G=if_constexpr_void_else>
  164. void if_constexpr(F f,G g={})
  165. {
  166. std::get<B?0:1>(std::forward_as_tuple(f,g))();
  167. }
  168. template<bool B,typename T,typename std::enable_if<B>::type* =nullptr>
  169. void copy_assign_if(T& x,const T& y){x=y;}
  170. template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr>
  171. void copy_assign_if(T&,const T&){}
  172. template<bool B,typename T,typename std::enable_if<B>::type* =nullptr>
  173. void move_assign_if(T& x,T& y){x=std::move(y);}
  174. template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr>
  175. void move_assign_if(T&,T&){}
  176. template<bool B,typename T,typename std::enable_if<B>::type* =nullptr>
  177. void swap_if(T& x,T& y){using std::swap; swap(x,y);}
  178. template<bool B,typename T,typename std::enable_if<!B>::type* =nullptr>
  179. void swap_if(T&,T&){}
  180. template<
  181. std::size_t K,typename Subfilter,std::size_t Stride,typename Allocator
  182. >
  183. class filter_core:empty_value<Allocator,0>
  184. {
  185. static_assert(K>0,"K must be >= 1");
  186. static_assert(
  187. std::is_same<allocator_value_type_t<Allocator>,unsigned char>::value,
  188. "Allocator value_type must be unsigned char");
  189. public:
  190. static constexpr std::size_t k=K;
  191. using subfilter=Subfilter;
  192. private:
  193. static constexpr std::size_t kp=subfilter::k;
  194. static constexpr std::size_t k_total=k*kp;
  195. using block_type=typename subfilter::value_type;
  196. static constexpr std::size_t block_size=sizeof(block_type);
  197. static constexpr std::size_t used_value_size=
  198. detail::used_value_size<subfilter>::value;
  199. public:
  200. static constexpr std::size_t stride=Stride?Stride:used_value_size;
  201. static_assert(
  202. stride<=used_value_size,"Stride can't exceed the block size");
  203. private:
  204. static constexpr std::size_t tail_size=sizeof(block_type)-stride;
  205. static constexpr bool are_blocks_aligned=
  206. (stride%alignof(block_type)==0);
  207. static constexpr std::size_t cacheline=64; /* unknown at compile time */
  208. static constexpr std::size_t initial_alignment=
  209. are_blocks_aligned?
  210. alignof(block_type)>cacheline?alignof(block_type):cacheline:
  211. 1;
  212. static constexpr std::size_t prefetched_cachelines=
  213. 1+(block_size+cacheline-1-gcd_pow2(stride,cacheline))/cacheline;
  214. using hash_strategy=detail::fastrange_and_mcg;
  215. public:
  216. using allocator_type=Allocator;
  217. using size_type=std::size_t;
  218. using difference_type=std::ptrdiff_t;
  219. using pointer=unsigned char*;
  220. using const_pointer=const unsigned char*;
  221. static constexpr std::size_t bulk_insert_size=
  222. (64+prefetched_cachelines-1)/prefetched_cachelines;
  223. static constexpr std::size_t bulk_may_contain_size=
  224. (64+prefetched_cachelines-1)/prefetched_cachelines;
  225. static_assert(
  226. bulk_may_contain_size<=64, /* see results in bulk_may_contain */
  227. "internal check, bulk_may_contain_size must be <= 64");
  228. explicit filter_core(std::size_t m=0):filter_core{m,allocator_type{}}{}
  229. filter_core(std::size_t m,const allocator_type& al_):
  230. allocator_base{empty_init,al_},
  231. hs{requested_range(m)},
  232. ar(new_array(al(),m?hs.range():0))
  233. {
  234. clear_bytes();
  235. }
  236. filter_core(std::size_t n,double fpr,const allocator_type& al_):
  237. filter_core(unadjusted_capacity_for(n,fpr),al_){}
  238. filter_core(const filter_core& x):
  239. filter_core{x,allocator_select_on_container_copy_construction(x.al())}{}
  240. filter_core(filter_core&& x)noexcept:
  241. filter_core{std::move(x),allocator_type(std::move(x.al()))}{}
  242. filter_core(const filter_core& x,const allocator_type& al_):
  243. allocator_base{empty_init,al_},
  244. hs{x.hs},
  245. ar(new_array(al(),x.range()))
  246. {
  247. copy_bytes(x);
  248. }
  249. filter_core(filter_core&& x,const allocator_type& al_):
  250. allocator_base{empty_init,al_},
  251. hs{x.hs}
  252. {
  253. auto empty_ar=new_array(x.al(),0); /* we're relying on this not throwing */
  254. if(al()==x.al()){
  255. ar=x.ar;
  256. }
  257. else{
  258. ar=new_array(al(),x.range());
  259. copy_bytes(x);
  260. x.delete_array();
  261. }
  262. x.hs=hash_strategy{0};
  263. x.ar=empty_ar;
  264. }
  265. ~filter_core()noexcept
  266. {
  267. delete_array();
  268. }
  269. filter_core& operator=(const filter_core& x)
  270. {
  271. static constexpr auto pocca=
  272. allocator_propagate_on_container_copy_assignment_t<allocator_type>::
  273. value;
  274. if(this!=&x){
  275. if_constexpr<pocca>([&,this]{
  276. if(al()!=x.al()||range()!=x.range()){
  277. auto x_al=x.al();
  278. auto new_ar=new_array(x_al,x.range());
  279. delete_array();
  280. hs=x.hs;
  281. ar=new_ar;
  282. }
  283. copy_assign_if<pocca>(al(),x.al());
  284. },
  285. [&,this]{ /* else */
  286. if(range()!=x.range()){
  287. auto new_ar=new_array(al(),x.range());
  288. delete_array();
  289. hs=x.hs;
  290. ar=new_ar;
  291. }
  292. });
  293. copy_bytes(x);
  294. }
  295. return *this;
  296. }
  297. #if defined(BOOST_MSVC)
  298. #pragma warning(push)
  299. #pragma warning(disable:4127) /* conditional expression is constant */
  300. #endif
  301. filter_core& operator=(filter_core&& x)noexcept(
  302. allocator_propagate_on_container_move_assignment_t<allocator_type>::value||
  303. allocator_is_always_equal_t<allocator_type>::value)
  304. {
  305. static constexpr auto pocma=
  306. allocator_propagate_on_container_move_assignment_t<allocator_type>::
  307. value;
  308. if(this!=&x){
  309. auto empty_ar=new_array(x.al(),0); /* relying on this not throwing */
  310. if(pocma||al()==x.al()){
  311. delete_array();
  312. move_assign_if<pocma>(al(),x.al());
  313. hs=x.hs;
  314. ar=x.ar;
  315. }
  316. else{
  317. if(range()!=x.range()){
  318. auto new_ar=new_array(al(),x.range());
  319. delete_array();
  320. hs=x.hs;
  321. ar=new_ar;
  322. }
  323. copy_bytes(x);
  324. x.delete_array();
  325. }
  326. x.hs=hash_strategy{0};
  327. x.ar=empty_ar;
  328. }
  329. return *this;
  330. }
  331. #if defined(BOOST_MSVC)
  332. #pragma warning(pop) /* C4127 */
  333. #endif
  334. allocator_type get_allocator()const noexcept
  335. {
  336. return al();
  337. }
  338. std::size_t capacity()const noexcept
  339. {
  340. return used_array_size()*CHAR_BIT;
  341. }
  342. static std::size_t capacity_for(std::size_t n,double fpr)
  343. {
  344. auto m=unadjusted_capacity_for(n,fpr);
  345. if(m==0)return 0;
  346. auto rng=hash_strategy{requested_range(m)}.range();
  347. return used_array_size(rng)*CHAR_BIT;
  348. }
  349. static double fpr_for(std::size_t n,std::size_t m)
  350. {
  351. return m==0?1.0:n==0?0.0:fpr_for_c((double)m/n);
  352. }
  353. boost::span<unsigned char> array()noexcept
  354. {
  355. return {ar.data?ar.array:nullptr,capacity()/CHAR_BIT};
  356. }
  357. boost::span<const unsigned char> array()const noexcept
  358. {
  359. return {ar.data?ar.array:nullptr,capacity()/CHAR_BIT};
  360. }
  361. BOOST_FORCEINLINE void insert(std::uint64_t hash)
  362. {
  363. hs.prepare_hash(hash);
  364. for(auto n=k;n--;){
  365. auto p=next_element(hash); /* modifies h */
  366. /* We do the unhappy-path null check here rather than at the beginning
  367. * of the function because prefetch completion wait gives us free CPU
  368. * cycles to spare.
  369. */
  370. if(BOOST_UNLIKELY(n==k-1&&ar.data==nullptr))return;
  371. set(p,hash);
  372. }
  373. }
  374. template<typename HashStream>
  375. void bulk_insert(HashStream h,std::size_t n)
  376. {
  377. std::uint64_t hashes[bulk_insert_size];
  378. unsigned char* positions[bulk_insert_size];
  379. if(n>=2*bulk_insert_size){
  380. for(auto i=bulk_insert_size;i--;){
  381. auto& hash=hashes[i]=h();
  382. auto& p=positions[i];
  383. hs.prepare_hash(hash);
  384. p=next_element(hash);
  385. }
  386. if(BOOST_UNLIKELY(ar.data==nullptr))return;
  387. do{
  388. for(auto j=k-1;j--;){
  389. for(auto i=bulk_insert_size;i--;){
  390. auto& hash=hashes[i];
  391. auto& p=positions[i];
  392. auto hash0=hash;
  393. auto p0=p;
  394. p=next_element(hash);
  395. set(p0,hash0);
  396. }
  397. }
  398. for(auto i=bulk_insert_size;i--;){
  399. auto& hash=hashes[i];
  400. auto& p=positions[i];
  401. auto hash0=hash;
  402. auto p0=p;
  403. hash=h();
  404. hs.prepare_hash(hash);
  405. p=next_element(hash);
  406. set(p0,hash0);
  407. }
  408. n-=bulk_insert_size;
  409. }while(n>=2*bulk_insert_size);
  410. for(auto j=k-1;j--;){
  411. for(auto i=bulk_insert_size;i--;){
  412. auto& hash=hashes[i];
  413. auto& p=positions[i];
  414. auto hash0=hash;
  415. auto p0=p;
  416. p=next_element(hash);
  417. set(p0,hash0);
  418. }
  419. }
  420. for(auto i=bulk_insert_size;i--;){
  421. auto& hash=hashes[i];
  422. auto& p=positions[i];
  423. set(p,hash);
  424. }
  425. n-=bulk_insert_size;
  426. }
  427. while(n--)insert(h());
  428. }
  429. void swap(filter_core& x)noexcept(
  430. allocator_propagate_on_container_swap_t<allocator_type>::value||
  431. allocator_is_always_equal_t<allocator_type>::value)
  432. {
  433. static constexpr auto pocs=
  434. allocator_propagate_on_container_swap_t<allocator_type>::value;
  435. if_constexpr<pocs>([&,this]{
  436. swap_if<pocs>(al(),x.al());
  437. },
  438. [&,this]{ /* else */
  439. BOOST_ASSERT(al()==x.al());
  440. (void)this; /* makes sure captured this is used */
  441. });
  442. std::swap(hs,x.hs);
  443. std::swap(ar,x.ar);
  444. }
  445. void clear()noexcept
  446. {
  447. clear_bytes();
  448. }
  449. void reset(std::size_t m=0)
  450. {
  451. hash_strategy new_hs{requested_range(m)};
  452. std::size_t rng=m?new_hs.range():0;
  453. if(rng!=range()){
  454. auto new_ar=new_array(al(),rng);
  455. delete_array();
  456. hs=new_hs;
  457. ar=new_ar;
  458. }
  459. clear_bytes();
  460. }
  461. void reset(std::size_t n,double fpr)
  462. {
  463. reset(capacity_for(n,fpr));
  464. }
  465. filter_core& operator&=(const filter_core& x)
  466. {
  467. combine(x,[](unsigned char& a,unsigned char b){a&=b;});
  468. return *this;
  469. }
  470. filter_core& operator|=(const filter_core& x)
  471. {
  472. combine(x,[](unsigned char& a,unsigned char b){a|=b;});
  473. return *this;
  474. }
  475. BOOST_FORCEINLINE bool may_contain(std::uint64_t hash)const
  476. {
  477. hs.prepare_hash(hash);
  478. #if 1
  479. auto p0=next_element(hash);
  480. for(std::size_t n=k-1;n--;){
  481. auto p=p0;
  482. auto hash0=hash;
  483. p0=next_element(hash);
  484. if(!get(p,hash0))return false;
  485. }
  486. if(!get(p0,hash))return false;
  487. return true;
  488. #else
  489. for(auto n=k;n--;){
  490. auto p=next_element(hash); /* modifies hash */
  491. if(!get(p,hash))return false;
  492. }
  493. return true;
  494. #endif
  495. }
  496. template<typename HashStream,typename F>
  497. void bulk_may_contain(HashStream h,std::size_t n,F f)const
  498. {
  499. if(k==1){
  500. std::uint64_t hashes[bulk_may_contain_size];
  501. const unsigned char* positions[bulk_may_contain_size];
  502. if(n>=2*bulk_may_contain_size){
  503. for(auto i=bulk_may_contain_size;i--;){
  504. auto& hash=hashes[i]=h();
  505. auto& p=positions[i];
  506. hs.prepare_hash(hash);
  507. p=next_element(hash);
  508. }
  509. do{
  510. for(auto i=bulk_may_contain_size;i--;){
  511. auto& hash=hashes[i];
  512. auto& p=positions[i];
  513. auto hash0=hash;
  514. auto p0=p;
  515. hash=h();
  516. hs.prepare_hash(hash);
  517. p=next_element(hash);
  518. f(get(p0,hash0));
  519. }
  520. n-=bulk_may_contain_size;
  521. }while(n>=2*bulk_may_contain_size);
  522. for(auto i=bulk_may_contain_size;i--;){
  523. auto& hash=hashes[i];
  524. auto& p=positions[i];
  525. f(get(p,hash));
  526. }
  527. n-=bulk_may_contain_size;
  528. }
  529. while(n--)f(may_contain(h()));
  530. }
  531. else{
  532. static constexpr std::uint64_t initial_result_mask=
  533. ((std::uint64_t(1)<<(bulk_may_contain_size/2))<<
  534. ((bulk_may_contain_size+1)/2))-1;
  535. std::uint64_t hashes[bulk_may_contain_size];
  536. const unsigned char* positions[bulk_may_contain_size];
  537. std::uint64_t results=initial_result_mask;
  538. if(n>=2*bulk_may_contain_size){
  539. for(std::size_t i=0;i<bulk_may_contain_size;++i){
  540. auto& hash=hashes[i]=h();
  541. auto& p=positions[i];
  542. hs.prepare_hash(hash);
  543. p=next_element(hash);
  544. }
  545. do{
  546. for(auto j=k;j--;){
  547. auto mask=results;
  548. if(!mask)break;
  549. do{
  550. auto i=unchecked_countr_zero(mask);
  551. auto& hash=hashes[i];
  552. auto& p=positions[i];
  553. auto b=get(p,hash);
  554. p=next_element(hash);
  555. results&=~(std::uint64_t(!b)<<i);
  556. mask&=mask-1;
  557. }while(mask);
  558. }
  559. for(std::size_t i=0;i<bulk_may_contain_size;++i){
  560. auto& hash=hashes[i];
  561. auto& p=positions[i];
  562. hash=h();
  563. hs.prepare_hash(hash);
  564. p=next_element(hash);
  565. f(results&1);
  566. results>>=1;
  567. }
  568. results=initial_result_mask;
  569. n-=bulk_may_contain_size;
  570. }while(n>=2*bulk_may_contain_size);
  571. for(auto j=k;j--;){
  572. auto mask=results;
  573. if(!mask)break;
  574. do{
  575. auto i=unchecked_countr_zero(mask);
  576. auto& hash=hashes[i];
  577. auto& p=positions[i];
  578. auto b=get(p,hash);
  579. p=next_element(hash);
  580. results&=~(std::uint64_t(!b)<<i);
  581. mask&=mask-1;
  582. }while(mask);
  583. }
  584. for(std::size_t i=0;i<bulk_may_contain_size;++i){
  585. f(results&1);
  586. results>>=1;
  587. }
  588. n-=bulk_may_contain_size;
  589. }
  590. while(n--)f(may_contain(h()));
  591. }
  592. }
  593. friend bool operator==(const filter_core& x,const filter_core& y)
  594. {
  595. if(x.range()!=y.range())return false;
  596. else if(!x.ar.data)return true;
  597. else return std::memcmp(x.ar.array,y.ar.array,x.used_array_size())==0;
  598. }
  599. private:
  600. using allocator_base=empty_value<Allocator,0>;
  601. const Allocator& al()const{return allocator_base::get();}
  602. Allocator& al(){return allocator_base::get();}
  603. static std::size_t requested_range(std::size_t m)
  604. {
  605. if(m>(used_value_size-stride)*CHAR_BIT){
  606. /* ensures filter_core{f.capacity()}.capacity()==f.capacity() */
  607. m-=(used_value_size-stride)*CHAR_BIT;
  608. }
  609. return
  610. (std::numeric_limits<std::size_t>::max)()-m>=stride*CHAR_BIT-1?
  611. (m+stride*CHAR_BIT-1)/(stride*CHAR_BIT):
  612. m/(stride*CHAR_BIT);
  613. }
  614. static filter_array new_array(allocator_type& al,std::size_t rng)
  615. {
  616. if(rng){
  617. auto p=allocator_allocate(al,space_for(rng));
  618. return {p,array_for(p)};
  619. }
  620. else{
  621. /* To avoid dynamic allocation for zero capacity or moved-from filters,
  622. * we point array to a statically allocated dummy array with all bits
  623. * set to one. This is good for read operations but not so for write
  624. * operations, where we need to resort to a null check on
  625. * filter_array::data.
  626. */
  627. static struct {unsigned char x=-1;}
  628. dummy[space_for(hash_strategy{0}.range())];
  629. return {nullptr,array_for(reinterpret_cast<unsigned char*>(&dummy))};
  630. }
  631. }
  632. void delete_array()noexcept
  633. {
  634. if(ar.data)allocator_deallocate(al(),ar.data,space_for(range()));
  635. }
  636. void clear_bytes()noexcept
  637. {
  638. std::memset(ar.array,0,used_array_size());
  639. }
  640. void copy_bytes(const filter_core& x)
  641. {
  642. BOOST_ASSERT(range()==x.range());
  643. std::memcpy(ar.array,x.ar.array,used_array_size());
  644. }
  645. std::size_t range()const noexcept
  646. {
  647. return ar.data?hs.range():0;
  648. }
  649. static constexpr std::size_t space_for(std::size_t rng)noexcept
  650. {
  651. return (initial_alignment-1)+rng*stride+tail_size;
  652. }
  653. static unsigned char* array_for(unsigned char* p)noexcept
  654. {
  655. return p+
  656. (std::uintptr_t(initial_alignment)-
  657. std::uintptr_t(p))%initial_alignment;
  658. }
  659. std::size_t used_array_size()const noexcept
  660. {
  661. return used_array_size(range());
  662. }
  663. static std::size_t used_array_size(std::size_t rng)noexcept
  664. {
  665. return rng?rng*stride+(used_value_size-stride):0;
  666. }
  667. static std::size_t unadjusted_capacity_for(std::size_t n,double fpr)
  668. {
  669. using size_t_limits=std::numeric_limits<std::size_t>;
  670. using double_limits=std::numeric_limits<double>;
  671. BOOST_ASSERT(fpr>=0.0&&fpr<=1.0);
  672. if(n==0)return fpr==1.0?0:1;
  673. constexpr double eps=1.0/(double)(size_t_limits::max)();
  674. constexpr double max_size_t_as_double=
  675. size_t_limits::digits<=double_limits::digits?
  676. (double)(size_t_limits::max)():
  677. (double)(size_t_limits::max)()
  678. /* ensure value is portably castable back to std::size_t */
  679. -constexpr_ldexp_1_positive(
  680. size_t_limits::digits-double_limits::digits);
  681. const double c_max=max_size_t_as_double/n;
  682. /* Capacity of a classical Bloom filter as a lower bound:
  683. * c = k / -log(1 - fpr^(1/k)).
  684. */
  685. double d=1.0-std::pow(fpr,1.0/k_total);
  686. if(std::fpclassify(d)==FP_ZERO)return 0; /* fpr ~ 1 */
  687. double l=std::log(d);
  688. if(std::fpclassify(l)==FP_ZERO)return (std::size_t)(c_max*n); /* fpr ~ 0 */
  689. double c0=(std::min)(k_total/-l,c_max);
  690. /* bracket target fpr between c0 and c1 */
  691. double c1=c0;
  692. if(fpr_for_c(c1)>fpr){ /* expected case */
  693. do{
  694. double cn=c1*1.5;
  695. if(cn>c_max)return (std::size_t)(c_max*n);
  696. c0=c1;
  697. c1=cn;
  698. }while(fpr_for_c(c1)>fpr);
  699. }
  700. else{ /* c0 shouldn't overshoot ever, just in case */
  701. do{
  702. double cn=c0/1.5;
  703. c1=c0;
  704. c0=cn;
  705. }while(fpr_for_c(c0)<fpr);
  706. }
  707. /* bisect */
  708. double cm;
  709. while((cm=c0+(c1-c0)/2)>c0 && cm<c1 && c1-c0>=eps){
  710. if(fpr_for_c(cm)>fpr)c0=cm;
  711. else c1=cm;
  712. }
  713. return (std::size_t)(cm*n);
  714. }
  715. static double fpr_for_c(double c)
  716. {
  717. constexpr std::size_t w=(2*used_value_size-stride)*CHAR_BIT;
  718. const double lambda=w*k/c;
  719. const double loglambda=std::log(lambda);
  720. double res=0.0;
  721. double deltap=0.0;
  722. for(int i=0;i<1000;++i){
  723. double poisson=std::exp(i*loglambda-lambda-std::lgamma(i+1));
  724. double delta=poisson*subfilter::fpr(i,w);
  725. double resn=res+delta;
  726. /* The terms of this summation are unimodal, so we check we're on the
  727. * descending slope before stopping.
  728. */
  729. if(delta<deltap&&resn==res)break;
  730. deltap=delta;
  731. res=resn;
  732. }
  733. /* For small values of c (high values of lambda), truncation errors,loop
  734. * exhaustion and the use of Poisson instead of binomial may result in a
  735. * calculated value less than the classical Bloom filter formula, which we
  736. * know is always the minimum attainable.
  737. */
  738. return (std::max)(
  739. std::pow((double)res,(double)k),
  740. std::pow(1.0-std::exp(-(double)k_total/c),(double)k_total));
  741. }
  742. BOOST_FORCEINLINE bool get(const unsigned char* p,std::uint64_t hash)const
  743. {
  744. return get(p,hash,std::integral_constant<bool,are_blocks_aligned>{});
  745. }
  746. BOOST_FORCEINLINE bool get(
  747. const unsigned char* p,std::uint64_t hash,
  748. std::true_type /* blocks aligned */)const
  749. {
  750. return subfilter::check(*reinterpret_cast<const block_type*>(p),hash);
  751. }
  752. BOOST_FORCEINLINE bool get(
  753. const unsigned char* p,std::uint64_t hash,
  754. std::false_type /* blocks not aligned */)const
  755. {
  756. block_type x;
  757. std::memcpy(&x,p,block_size);
  758. return subfilter::check(x,hash);
  759. }
  760. BOOST_FORCEINLINE void set(unsigned char* p,std::uint64_t hash)
  761. {
  762. return set(p,hash,std::integral_constant<bool,are_blocks_aligned>{});
  763. }
  764. BOOST_FORCEINLINE void set(
  765. unsigned char* p,std::uint64_t hash,
  766. std::true_type /* blocks aligned */)
  767. {
  768. subfilter::mark(*reinterpret_cast<block_type*>(p),hash);
  769. }
  770. BOOST_FORCEINLINE void set(
  771. unsigned char* p,std::uint64_t hash,
  772. std::false_type /* blocks not aligned */)
  773. {
  774. block_type x;
  775. std::memcpy(&x,p,block_size);
  776. subfilter::mark(x,hash);
  777. std::memcpy(p,&x,block_size);
  778. }
  779. BOOST_FORCEINLINE
  780. unsigned char* next_element(std::uint64_t& h)noexcept
  781. {
  782. auto p=ar.array+hs.next_position(h)*stride;
  783. for(std::size_t i=0;i<prefetched_cachelines;++i){
  784. BOOST_BLOOM_PREFETCH_WRITE((unsigned char*)p+i*cacheline);
  785. }
  786. return p;
  787. }
  788. BOOST_FORCEINLINE
  789. const unsigned char* next_element(std::uint64_t& h)const noexcept
  790. {
  791. auto p=ar.array+hs.next_position(h)*stride;
  792. for(std::size_t i=0;i<prefetched_cachelines;++i){
  793. BOOST_BLOOM_PREFETCH((unsigned char*)p+i*cacheline);
  794. }
  795. return p;
  796. }
  797. template<typename F>
  798. void combine(const filter_core& x,F f)
  799. {
  800. if(range()!=x.range()){
  801. BOOST_THROW_EXCEPTION(std::invalid_argument("incompatible filters"));
  802. }
  803. auto first0=ar.array,
  804. last0=first0+used_array_size(),
  805. first1=x.ar.array;
  806. while(first0!=last0)f(*first0++,*first1++);
  807. }
  808. hash_strategy hs;
  809. filter_array ar;
  810. };
  811. #if defined(BOOST_MSVC)
  812. #pragma warning(pop) /* C4714 */
  813. #endif
  814. } /* namespace detail */
  815. } /* namespace bloom */
  816. } /* namespace boost */
  817. #endif