adaptive_merge.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. //////////////////////////////////////////////////////////////////////////////
  2. //
  3. // (C) Copyright Ion Gaztanaga 2015-2016.
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // See http://www.boost.org/libs/move for documentation.
  9. //
  10. //////////////////////////////////////////////////////////////////////////////
  11. #ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP
  12. #define BOOST_MOVE_ADAPTIVE_MERGE_HPP
  13. #include <boost/move/detail/config_begin.hpp>
  14. #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
  15. namespace boost {
  16. namespace movelib {
  17. ///@cond
  18. namespace detail_adaptive {
  19. template<class RandIt, class Compare, class XBuf>
  20. inline void adaptive_merge_combine_blocks( RandIt first
  21. , typename iterator_traits<RandIt>::size_type len1
  22. , typename iterator_traits<RandIt>::size_type len2
  23. , typename iterator_traits<RandIt>::size_type collected
  24. , typename iterator_traits<RandIt>::size_type n_keys
  25. , typename iterator_traits<RandIt>::size_type l_block
  26. , bool use_internal_buf
  27. , bool xbuf_used
  28. , Compare comp
  29. , XBuf & xbuf
  30. )
  31. {
  32. typedef typename iterator_traits<RandIt>::size_type size_type;
  33. size_type const len = len1+len2;
  34. size_type const l_combine = len-collected;
  35. size_type const l_combine1 = len1-collected;
  36. if(n_keys){
  37. RandIt const first_data = first+collected;
  38. RandIt const keys = first;
  39. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
  40. if(xbuf_used){
  41. if(xbuf.size() < l_block){
  42. xbuf.initialize_until(l_block, *first);
  43. }
  44. BOOST_ASSERT(xbuf.size() >= l_block);
  45. size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
  46. combine_params( keys, comp, l_combine
  47. , l_combine1, l_block, xbuf
  48. , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
  49. op_merge_blocks_with_buf
  50. (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
  51. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len);
  52. }
  53. else{
  54. size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
  55. combine_params( keys, comp, l_combine
  56. , l_combine1, l_block, xbuf
  57. , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
  58. if(use_internal_buf){
  59. op_merge_blocks_with_buf
  60. (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block);
  61. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len);
  62. }
  63. else{
  64. merge_blocks_bufferless
  65. (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
  66. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len);
  67. }
  68. }
  69. }
  70. else{
  71. xbuf.shrink_to_fit(l_block);
  72. if(xbuf.size() < l_block){
  73. xbuf.initialize_until(l_block, *first);
  74. }
  75. size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
  76. size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
  77. combine_params( uint_keys, less(), l_combine
  78. , l_combine1, l_block, xbuf
  79. , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs
  80. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len);
  81. BOOST_ASSERT(xbuf.size() >= l_block);
  82. op_merge_blocks_with_buf
  83. (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
  84. xbuf.clear();
  85. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len);
  86. }
  87. }
  88. template<class RandIt, class Compare, class XBuf>
  89. inline void adaptive_merge_final_merge( RandIt first
  90. , typename iterator_traits<RandIt>::size_type len1
  91. , typename iterator_traits<RandIt>::size_type len2
  92. , typename iterator_traits<RandIt>::size_type collected
  93. , typename iterator_traits<RandIt>::size_type l_intbuf
  94. , typename iterator_traits<RandIt>::size_type l_block
  95. , bool use_internal_buf
  96. , bool xbuf_used
  97. , Compare comp
  98. , XBuf & xbuf
  99. )
  100. {
  101. typedef typename iterator_traits<RandIt>::size_type size_type;
  102. (void)l_block;
  103. size_type n_keys = collected-l_intbuf;
  104. size_type len = len1+len2;
  105. if(use_internal_buf){
  106. if(xbuf_used){
  107. xbuf.clear();
  108. //Nothing to do
  109. if(n_keys){
  110. unstable_sort(first, first+n_keys, comp, xbuf);
  111. stable_merge(first, first+n_keys, first+len, comp, xbuf);
  112. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len);
  113. }
  114. }
  115. else{
  116. xbuf.clear();
  117. unstable_sort(first, first+collected, comp, xbuf);
  118. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
  119. stable_merge(first, first+collected, first+len, comp, xbuf);
  120. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
  121. }
  122. }
  123. else{
  124. xbuf.clear();
  125. unstable_sort(first, first+collected, comp, xbuf);
  126. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len);
  127. stable_merge(first, first+collected, first+len1+len2, comp, xbuf);
  128. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len);
  129. }
  130. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len);
  131. }
  132. template<class SizeType>
  133. inline static SizeType adaptive_merge_n_keys_without_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
  134. {
  135. typedef SizeType size_type;
  136. //This is the minimum number of keys to implement the ideal algorithm
  137. size_type n_keys = len1/l_block+len2/l_block;
  138. const size_type second_half_blocks = len2/l_block;
  139. const size_type first_half_aux = len1-l_intbuf;
  140. while(n_keys >= ((first_half_aux-n_keys)/l_block + second_half_blocks)){
  141. --n_keys;
  142. }
  143. ++n_keys;
  144. return n_keys;
  145. }
  146. template<class SizeType>
  147. inline static SizeType adaptive_merge_n_keys_with_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
  148. {
  149. typedef SizeType size_type;
  150. //This is the minimum number of keys to implement the ideal algorithm
  151. size_type n_keys = (len1-l_intbuf)/l_block + len2/l_block;
  152. return n_keys;
  153. }
  154. template<class SizeType, class Xbuf>
  155. inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
  156. {
  157. typedef SizeType size_type;
  158. size_type l_block = rl_block;
  159. size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
  160. while(xbuf.capacity() >= l_block*2){
  161. l_block *= 2;
  162. }
  163. //This is the minimum number of keys to implement the ideal algorithm
  164. size_type n_keys = adaptive_merge_n_keys_without_external_keys(l_block, len1, len2, l_intbuf);
  165. BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
  166. if(xbuf.template supports_aligned_trailing<size_type>
  167. ( l_block
  168. , adaptive_merge_n_keys_with_external_keys(l_block, len1, len2, l_intbuf)))
  169. {
  170. n_keys = 0u;
  171. }
  172. l_intbuf_inout = l_intbuf;
  173. rl_block = l_block;
  174. return n_keys;
  175. }
  176. // Main explanation of the merge algorithm.
  177. //
  178. // csqrtlen = ceil(sqrt(len));
  179. //
  180. // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
  181. // unique elements are extracted from elements to be sorted and placed in the beginning of the range.
  182. //
  183. // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
  184. // are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
  185. //
  186. // Explanation of the "combine_blocks" step:
  187. //
  188. // * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
  189. // Remaining elements that can't form a group are grouped in front of those elements.
  190. // * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
  191. // Remaining elements that can't form a group are grouped in the back of those elements.
  192. // * In parallel the following two steps are performed:
  193. // * Groups are selection-sorted by first or last element (depending whether they are going
  194. // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
  195. // * Elements of each block pair are merged using the csqrtlen buffer taking into account
  196. // if they belong to the first half or second half (marked by the key).
  197. //
  198. // * In the final merge step leading "to_collect" elements are merged with rotations
  199. // with the rest of merged elements in the "combine_blocks" step.
  200. //
  201. // Corner cases:
  202. //
  203. // * If no "to_collect" elements can be extracted:
  204. //
  205. // * If more than a minimum number of elements is extracted
  206. // then reduces the number of elements used as buffer and keys in the
  207. // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
  208. // then uses a rotation based smart merge.
  209. //
  210. // * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
  211. //
  212. // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
  213. //
  214. // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
  215. //
  216. // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
  217. // then no csqrtlen need to be extracted and "combine_blocks" will use integral
  218. // keys to combine blocks.
  219. template<class RandIt, class Compare, class XBuf>
  220. void adaptive_merge_impl
  221. ( RandIt first
  222. , typename iterator_traits<RandIt>::size_type len1
  223. , typename iterator_traits<RandIt>::size_type len2
  224. , Compare comp
  225. , XBuf & xbuf
  226. )
  227. {
  228. typedef typename iterator_traits<RandIt>::size_type size_type;
  229. if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
  230. buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
  231. }
  232. else{
  233. const size_type len = len1+len2;
  234. //Calculate ideal parameters and try to collect needed unique keys
  235. size_type l_block = size_type(ceil_sqrt(len));
  236. //One range is not big enough to extract keys and the internal buffer so a
  237. //rotation-based based merge will do just fine
  238. if(len1 <= l_block*2 || len2 <= l_block*2){
  239. merge_bufferless(first, first+len1, first+len1+len2, comp);
  240. return;
  241. }
  242. //Detail the number of keys and internal buffer. If xbuf has enough memory, no
  243. //internal buffer is needed so l_intbuf will remain 0.
  244. size_type l_intbuf = 0;
  245. size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
  246. size_type const to_collect = l_intbuf+n_keys;
  247. //Try to extract needed unique values from the first range
  248. size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
  249. BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len);
  250. //Not the minimum number of keys is not available on the first range, so fallback to rotations
  251. if(collected != to_collect && collected < 4){
  252. merge_bufferless(first, first+collected, first+len1, comp);
  253. merge_bufferless(first, first + len1, first + len1 + len2, comp);
  254. return;
  255. }
  256. //If not enough keys but more than minimum, adjust the internal buffer and key count
  257. bool use_internal_buf = collected == to_collect;
  258. if (!use_internal_buf){
  259. l_intbuf = 0u;
  260. n_keys = collected;
  261. l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
  262. //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
  263. l_intbuf = use_internal_buf ? l_block : 0u;
  264. }
  265. bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
  266. //Merge trailing elements using smart merges
  267. adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
  268. //Merge buffer and keys with the rest of the values
  269. adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
  270. }
  271. }
  272. } //namespace detail_adaptive {
  273. ///@endcond
  274. //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last)
  275. //! into one sorted range [first, last) according to the given comparison function comp.
  276. //! The algorithm is stable (if there are equivalent elements in the original two ranges,
  277. //! the elements from the first range (preserving their original order) precede the elements
  278. //! from the second range (preserving their original order).
  279. //!
  280. //! <b>Requires</b>:
  281. //! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
  282. //! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
  283. //!
  284. //! <b>Parameters</b>:
  285. //! - first: the beginning of the first sorted range.
  286. //! - middle: the end of the first sorted range and the beginning of the second
  287. //! - last: the end of the second sorted range
  288. //! - comp: comparison function object which returns true if the first argument is is ordered before the second.
  289. //! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
  290. //! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
  291. //! is min(std::distance(first, middle), std::distance(middle, last)).
  292. //!
  293. //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
  294. //! of dereferenced RandIt throws.
  295. //!
  296. //! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps.
  297. //! Constant factor for comparisons and data movement is minimized when uninitialized_len
  298. //! is min(std::distance(first, middle), std::distance(middle, last)).
  299. //! Pretty good enough performance is achieved when uninitialized_len is
  300. //! ceil(sqrt(std::distance(first, last)))*2.
  301. //!
  302. //! <b>Caution</b>: Experimental implementation, not production-ready.
  303. template<class RandIt, class Compare>
  304. void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp
  305. , typename iterator_traits<RandIt>::value_type* uninitialized = 0
  306. , std::size_t uninitialized_len = 0)
  307. {
  308. typedef typename iterator_traits<RandIt>::size_type size_type;
  309. typedef typename iterator_traits<RandIt>::value_type value_type;
  310. ::boost::movelib::detail_adaptive::adaptive_xbuf<value_type> xbuf(uninitialized, uninitialized_len);
  311. ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf);
  312. }
  313. } //namespace movelib {
  314. } //namespace boost {
  315. #include <boost/move/detail/config_end.hpp>
  316. #endif //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP