copy.hpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
  11. #define BOOST_COMPUTE_ALGORITHM_COPY_HPP
  12. #include <algorithm>
  13. #include <iterator>
  14. #include <boost/utility/enable_if.hpp>
  15. #include <boost/mpl/and.hpp>
  16. #include <boost/mpl/not.hpp>
  17. #include <boost/mpl/or.hpp>
  18. #include <boost/compute/buffer.hpp>
  19. #include <boost/compute/system.hpp>
  20. #include <boost/compute/command_queue.hpp>
  21. #include <boost/compute/algorithm/detail/copy_on_device.hpp>
  22. #include <boost/compute/algorithm/detail/copy_to_device.hpp>
  23. #include <boost/compute/algorithm/detail/copy_to_host.hpp>
  24. #include <boost/compute/async/future.hpp>
  25. #include <boost/compute/container/mapped_view.hpp>
  26. #include <boost/compute/detail/device_ptr.hpp>
  27. #include <boost/compute/detail/is_contiguous_iterator.hpp>
  28. #include <boost/compute/detail/iterator_range_size.hpp>
  29. #include <boost/compute/detail/parameter_cache.hpp>
  30. #include <boost/compute/iterator/buffer_iterator.hpp>
  31. #include <boost/compute/type_traits/type_name.hpp>
  32. #include <boost/compute/type_traits/is_device_iterator.hpp>
  33. namespace boost {
  34. namespace compute {
  35. namespace detail {
  36. namespace mpl = boost::mpl;
  37. // meta-function returning true if copy() between InputIterator and
  38. // OutputIterator can be implemented with clEnqueueCopyBuffer().
  39. template<class InputIterator, class OutputIterator>
  40. struct can_copy_with_copy_buffer :
  41. mpl::and_<
  42. mpl::or_<
  43. boost::is_same<
  44. InputIterator,
  45. buffer_iterator<typename InputIterator::value_type>
  46. >,
  47. boost::is_same<
  48. InputIterator,
  49. detail::device_ptr<typename InputIterator::value_type>
  50. >
  51. >,
  52. mpl::or_<
  53. boost::is_same<
  54. OutputIterator,
  55. buffer_iterator<typename OutputIterator::value_type>
  56. >,
  57. boost::is_same<
  58. OutputIterator,
  59. detail::device_ptr<typename OutputIterator::value_type>
  60. >
  61. >,
  62. boost::is_same<
  63. typename InputIterator::value_type,
  64. typename OutputIterator::value_type
  65. >
  66. >::type {};
  67. // meta-function returning true if value_types of HostIterator and
  68. // DeviceIterator are same
  69. template<class HostIterator, class DeviceIterator>
  70. struct is_same_value_type :
  71. boost::is_same<
  72. typename boost::remove_cv<
  73. typename std::iterator_traits<HostIterator>::value_type
  74. >::type,
  75. typename boost::remove_cv<
  76. typename DeviceIterator::value_type
  77. >::type
  78. >::type {};
  79. // meta-function returning true if value_type of HostIterator is bool
  80. template<class HostIterator>
  81. struct is_bool_value_type :
  82. boost::is_same<
  83. typename boost::remove_cv<
  84. typename std::iterator_traits<HostIterator>::value_type
  85. >::type,
  86. bool
  87. >::type {};
  88. // host -> device (async)
  89. template<class InputIterator, class OutputIterator>
  90. inline future<OutputIterator>
  91. dispatch_copy_async(InputIterator first,
  92. InputIterator last,
  93. OutputIterator result,
  94. command_queue &queue,
  95. typename boost::enable_if<
  96. mpl::and_<
  97. mpl::not_<
  98. is_device_iterator<InputIterator>
  99. >,
  100. is_device_iterator<OutputIterator>,
  101. is_same_value_type<InputIterator, OutputIterator>
  102. >
  103. >::type* = 0)
  104. {
  105. BOOST_STATIC_ASSERT_MSG(
  106. is_contiguous_iterator<InputIterator>::value,
  107. "copy_async() is only supported for contiguous host iterators"
  108. );
  109. return copy_to_device_async(first, last, result, queue);
  110. }
  111. // host -> device (async)
  112. // Type mismatch between InputIterator and OutputIterator value_types
  113. template<class InputIterator, class OutputIterator>
  114. inline future<OutputIterator>
  115. dispatch_copy_async(InputIterator first,
  116. InputIterator last,
  117. OutputIterator result,
  118. command_queue &queue,
  119. typename boost::enable_if<
  120. mpl::and_<
  121. mpl::not_<
  122. is_device_iterator<InputIterator>
  123. >,
  124. is_device_iterator<OutputIterator>,
  125. mpl::not_<
  126. is_same_value_type<InputIterator, OutputIterator>
  127. >
  128. >
  129. >::type* = 0)
  130. {
  131. BOOST_STATIC_ASSERT_MSG(
  132. is_contiguous_iterator<InputIterator>::value,
  133. "copy_async() is only supported for contiguous host iterators"
  134. );
  135. typedef typename std::iterator_traits<InputIterator>::value_type input_type;
  136. const context &context = queue.get_context();
  137. size_t count = iterator_range_size(first, last);
  138. if(count < size_t(1)) {
  139. return future<OutputIterator>();
  140. }
  141. // map [first; last) to device and run copy kernel
  142. // on device for copying & casting
  143. ::boost::compute::mapped_view<input_type> mapped_host(
  144. // make sure it's a pointer to constant data
  145. // to force read only mapping
  146. const_cast<const input_type*>(
  147. ::boost::addressof(*first)
  148. ),
  149. count,
  150. context
  151. );
  152. return copy_on_device_async(
  153. mapped_host.begin(), mapped_host.end(), result, queue
  154. );
  155. }
  156. // host -> device
  157. // InputIterator is a contiguous iterator
  158. template<class InputIterator, class OutputIterator>
  159. inline OutputIterator
  160. dispatch_copy(InputIterator first,
  161. InputIterator last,
  162. OutputIterator result,
  163. command_queue &queue,
  164. typename boost::enable_if<
  165. mpl::and_<
  166. mpl::not_<
  167. is_device_iterator<InputIterator>
  168. >,
  169. is_device_iterator<OutputIterator>,
  170. is_same_value_type<InputIterator, OutputIterator>,
  171. is_contiguous_iterator<InputIterator>
  172. >
  173. >::type* = 0)
  174. {
  175. return copy_to_device(first, last, result, queue);
  176. }
  177. // host -> device
  178. // Type mismatch between InputIterator and OutputIterator value_types
  179. // InputIterator is a contiguous iterator
  180. template<class InputIterator, class OutputIterator>
  181. inline OutputIterator
  182. dispatch_copy(InputIterator first,
  183. InputIterator last,
  184. OutputIterator result,
  185. command_queue &queue,
  186. typename boost::enable_if<
  187. mpl::and_<
  188. mpl::not_<
  189. is_device_iterator<InputIterator>
  190. >,
  191. is_device_iterator<OutputIterator>,
  192. mpl::not_<
  193. is_same_value_type<InputIterator, OutputIterator>
  194. >,
  195. is_contiguous_iterator<InputIterator>
  196. >
  197. >::type* = 0)
  198. {
  199. typedef typename OutputIterator::value_type output_type;
  200. typedef typename std::iterator_traits<InputIterator>::value_type input_type;
  201. const device &device = queue.get_device();
  202. // loading parameters
  203. std::string cache_key =
  204. std::string("__boost_compute_copy_to_device_")
  205. + type_name<input_type>() + "_" + type_name<output_type>();
  206. boost::shared_ptr<parameter_cache> parameters =
  207. detail::parameter_cache::get_global_cache(device);
  208. uint_ map_copy_threshold;
  209. uint_ direct_copy_threshold;
  210. // calculate default values of thresholds
  211. if (device.type() & device::gpu) {
  212. // GPUs
  213. map_copy_threshold = 524288; // 0.5 MB
  214. direct_copy_threshold = 52428800; // 50 MB
  215. }
  216. else {
  217. // CPUs and other devices
  218. map_copy_threshold = 134217728; // 128 MB
  219. direct_copy_threshold = 0; // it's never efficient for CPUs
  220. }
  221. // load thresholds
  222. map_copy_threshold =
  223. parameters->get(
  224. cache_key, "map_copy_threshold", map_copy_threshold
  225. );
  226. direct_copy_threshold =
  227. parameters->get(
  228. cache_key, "direct_copy_threshold", direct_copy_threshold
  229. );
  230. // select copy method based on thresholds & input_size_bytes
  231. size_t count = iterator_range_size(first, last);
  232. size_t input_size_bytes = count * sizeof(input_type);
  233. // [0; map_copy_threshold) -> copy_to_device_map()
  234. if(input_size_bytes < map_copy_threshold) {
  235. return copy_to_device_map(first, last, result, queue);
  236. }
  237. // [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
  238. // on host and then perform copy_to_device()
  239. else if(input_size_bytes < direct_copy_threshold) {
  240. std::vector<output_type> vector(first, last);
  241. return copy_to_device(vector.begin(), vector.end(), result, queue);
  242. }
  243. // [direct_copy_threshold; inf) -> map [first; last) to device and
  244. // run copy kernel on device for copying & casting
  245. // At this point we are sure that count > 1 (first != last).
  246. // Perform async copy to device, wait for it to be finished and
  247. // return the result.
  248. // At this point we are sure that count > 1 (first != last), so event
  249. // returned by dispatch_copy_async() must be valid.
  250. return dispatch_copy_async(first, last, result, queue).get();
  251. }
  252. // host -> device
  253. // InputIterator is NOT a contiguous iterator
  254. template<class InputIterator, class OutputIterator>
  255. inline OutputIterator
  256. dispatch_copy(InputIterator first,
  257. InputIterator last,
  258. OutputIterator result,
  259. command_queue &queue,
  260. typename boost::enable_if<
  261. mpl::and_<
  262. mpl::not_<
  263. is_device_iterator<InputIterator>
  264. >,
  265. is_device_iterator<OutputIterator>,
  266. mpl::not_<
  267. is_contiguous_iterator<InputIterator>
  268. >
  269. >
  270. >::type* = 0)
  271. {
  272. typedef typename OutputIterator::value_type output_type;
  273. typedef typename std::iterator_traits<InputIterator>::value_type input_type;
  274. const device &device = queue.get_device();
  275. // loading parameters
  276. std::string cache_key =
  277. std::string("__boost_compute_copy_to_device_")
  278. + type_name<input_type>() + "_" + type_name<output_type>();
  279. boost::shared_ptr<parameter_cache> parameters =
  280. detail::parameter_cache::get_global_cache(device);
  281. uint_ map_copy_threshold;
  282. uint_ direct_copy_threshold;
  283. // calculate default values of thresholds
  284. if (device.type() & device::gpu) {
  285. // GPUs
  286. map_copy_threshold = 524288; // 0.5 MB
  287. direct_copy_threshold = 52428800; // 50 MB
  288. }
  289. else {
  290. // CPUs and other devices
  291. map_copy_threshold = 134217728; // 128 MB
  292. direct_copy_threshold = 0; // it's never efficient for CPUs
  293. }
  294. // load thresholds
  295. map_copy_threshold =
  296. parameters->get(
  297. cache_key, "map_copy_threshold", map_copy_threshold
  298. );
  299. direct_copy_threshold =
  300. parameters->get(
  301. cache_key, "direct_copy_threshold", direct_copy_threshold
  302. );
  303. // select copy method based on thresholds & input_size_bytes
  304. size_t input_size = iterator_range_size(first, last);
  305. size_t input_size_bytes = input_size * sizeof(input_type);
  306. // [0; map_copy_threshold) -> copy_to_device_map()
  307. //
  308. // if direct_copy_threshold is less than map_copy_threshold
  309. // copy_to_device_map() is used for every input
  310. if(input_size_bytes < map_copy_threshold
  311. || direct_copy_threshold <= map_copy_threshold) {
  312. return copy_to_device_map(first, last, result, queue);
  313. }
  314. // [map_copy_threshold; inf) -> convert [first; last)
  315. // on host and then perform copy_to_device()
  316. std::vector<output_type> vector(first, last);
  317. return copy_to_device(vector.begin(), vector.end(), result, queue);
  318. }
  319. // device -> host (async)
  320. template<class InputIterator, class OutputIterator>
  321. inline future<OutputIterator>
  322. dispatch_copy_async(InputIterator first,
  323. InputIterator last,
  324. OutputIterator result,
  325. command_queue &queue,
  326. typename boost::enable_if<
  327. mpl::and_<
  328. is_device_iterator<InputIterator>,
  329. mpl::not_<
  330. is_device_iterator<OutputIterator>
  331. >,
  332. is_same_value_type<OutputIterator, InputIterator>
  333. >
  334. >::type* = 0)
  335. {
  336. BOOST_STATIC_ASSERT_MSG(
  337. is_contiguous_iterator<OutputIterator>::value,
  338. "copy_async() is only supported for contiguous host iterators"
  339. );
  340. return copy_to_host_async(first, last, result, queue);
  341. }
  342. // device -> host (async)
  343. // Type mismatch between InputIterator and OutputIterator value_types
  344. template<class InputIterator, class OutputIterator>
  345. inline future<OutputIterator>
  346. dispatch_copy_async(InputIterator first,
  347. InputIterator last,
  348. OutputIterator result,
  349. command_queue &queue,
  350. typename boost::enable_if<
  351. mpl::and_<
  352. is_device_iterator<InputIterator>,
  353. mpl::not_<
  354. is_device_iterator<OutputIterator>
  355. >,
  356. mpl::not_<
  357. is_same_value_type<OutputIterator, InputIterator>
  358. >
  359. >
  360. >::type* = 0)
  361. {
  362. BOOST_STATIC_ASSERT_MSG(
  363. is_contiguous_iterator<OutputIterator>::value,
  364. "copy_async() is only supported for contiguous host iterators"
  365. );
  366. typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
  367. const context &context = queue.get_context();
  368. size_t count = iterator_range_size(first, last);
  369. if(count < size_t(1)) {
  370. return future<OutputIterator>();
  371. }
  372. // map host memory to device
  373. buffer mapped_host(
  374. context,
  375. count * sizeof(output_type),
  376. buffer::write_only | buffer::use_host_ptr,
  377. static_cast<void*>(
  378. ::boost::addressof(*result)
  379. )
  380. );
  381. // copy async on device
  382. ::boost::compute::future<buffer_iterator<output_type> > future =
  383. copy_on_device_async(
  384. first,
  385. last,
  386. make_buffer_iterator<output_type>(mapped_host),
  387. queue
  388. );
  389. // update host memory asynchronously by maping and unmaping memory
  390. event map_event;
  391. void* ptr = queue.enqueue_map_buffer_async(
  392. mapped_host,
  393. CL_MAP_READ,
  394. 0,
  395. count * sizeof(output_type),
  396. map_event,
  397. future.get_event()
  398. );
  399. event unmap_event =
  400. queue.enqueue_unmap_buffer(mapped_host, ptr, map_event);
  401. return make_future(result + count, unmap_event);
  402. }
  403. // device -> host
  404. // OutputIterator is a contiguous iterator
  405. template<class InputIterator, class OutputIterator>
  406. inline OutputIterator
  407. dispatch_copy(InputIterator first,
  408. InputIterator last,
  409. OutputIterator result,
  410. command_queue &queue,
  411. typename boost::enable_if<
  412. mpl::and_<
  413. is_device_iterator<InputIterator>,
  414. mpl::not_<
  415. is_device_iterator<OutputIterator>
  416. >,
  417. is_same_value_type<OutputIterator, InputIterator>,
  418. is_contiguous_iterator<OutputIterator>,
  419. mpl::not_<
  420. is_bool_value_type<OutputIterator>
  421. >
  422. >
  423. >::type* = 0)
  424. {
  425. return copy_to_host(first, last, result, queue);
  426. }
  427. // device -> host
  428. // Type mismatch between InputIterator and OutputIterator value_types
  429. // OutputIterator is NOT a contiguous iterator or value_type of OutputIterator
  430. // is a boolean type.
  431. template<class InputIterator, class OutputIterator>
  432. inline OutputIterator
  433. dispatch_copy(InputIterator first,
  434. InputIterator last,
  435. OutputIterator result,
  436. command_queue &queue,
  437. typename boost::enable_if<
  438. mpl::and_<
  439. is_device_iterator<InputIterator>,
  440. mpl::not_<
  441. is_device_iterator<OutputIterator>
  442. >,
  443. mpl::or_<
  444. mpl::not_<
  445. is_contiguous_iterator<OutputIterator>
  446. >,
  447. is_bool_value_type<OutputIterator>
  448. >
  449. >
  450. >::type* = 0)
  451. {
  452. typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
  453. typedef typename InputIterator::value_type input_type;
  454. const device &device = queue.get_device();
  455. // loading parameters
  456. std::string cache_key =
  457. std::string("__boost_compute_copy_to_host_")
  458. + type_name<input_type>() + "_" + type_name<output_type>();
  459. boost::shared_ptr<parameter_cache> parameters =
  460. detail::parameter_cache::get_global_cache(device);
  461. uint_ map_copy_threshold;
  462. uint_ direct_copy_threshold;
  463. // calculate default values of thresholds
  464. if (device.type() & device::gpu) {
  465. // GPUs
  466. map_copy_threshold = 33554432; // 30 MB
  467. direct_copy_threshold = 0; // it's never efficient for GPUs
  468. }
  469. else {
  470. // CPUs and other devices
  471. map_copy_threshold = 134217728; // 128 MB
  472. direct_copy_threshold = 0; // it's never efficient for CPUs
  473. }
  474. // load thresholds
  475. map_copy_threshold =
  476. parameters->get(
  477. cache_key, "map_copy_threshold", map_copy_threshold
  478. );
  479. direct_copy_threshold =
  480. parameters->get(
  481. cache_key, "direct_copy_threshold", direct_copy_threshold
  482. );
  483. // select copy method based on thresholds & input_size_bytes
  484. size_t count = iterator_range_size(first, last);
  485. size_t input_size_bytes = count * sizeof(input_type);
  486. // [0; map_copy_threshold) -> copy_to_host_map()
  487. //
  488. // if direct_copy_threshold is less than map_copy_threshold
  489. // copy_to_host_map() is used for every input
  490. if(input_size_bytes < map_copy_threshold
  491. || direct_copy_threshold <= map_copy_threshold) {
  492. return copy_to_host_map(first, last, result, queue);
  493. }
  494. // [map_copy_threshold; inf) -> copy [first;last) to temporary vector
  495. // then copy (and convert) to result using std::copy()
  496. std::vector<input_type> vector(count);
  497. copy_to_host(first, last, vector.begin(), queue);
  498. return std::copy(vector.begin(), vector.end(), result);
  499. }
  500. // device -> host
  501. // Type mismatch between InputIterator and OutputIterator value_types
  502. // OutputIterator is a contiguous iterator
  503. // value_type of OutputIterator is NOT a boolean type
  504. template<class InputIterator, class OutputIterator>
  505. inline OutputIterator
  506. dispatch_copy(InputIterator first,
  507. InputIterator last,
  508. OutputIterator result,
  509. command_queue &queue,
  510. typename boost::enable_if<
  511. mpl::and_<
  512. is_device_iterator<InputIterator>,
  513. mpl::not_<
  514. is_device_iterator<OutputIterator>
  515. >,
  516. mpl::not_<
  517. is_same_value_type<OutputIterator, InputIterator>
  518. >,
  519. is_contiguous_iterator<OutputIterator>,
  520. mpl::not_<
  521. is_bool_value_type<OutputIterator>
  522. >
  523. >
  524. >::type* = 0)
  525. {
  526. typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
  527. typedef typename InputIterator::value_type input_type;
  528. const device &device = queue.get_device();
  529. // loading parameters
  530. std::string cache_key =
  531. std::string("__boost_compute_copy_to_host_")
  532. + type_name<input_type>() + "_" + type_name<output_type>();
  533. boost::shared_ptr<parameter_cache> parameters =
  534. detail::parameter_cache::get_global_cache(device);
  535. uint_ map_copy_threshold;
  536. uint_ direct_copy_threshold;
  537. // calculate default values of thresholds
  538. if (device.type() & device::gpu) {
  539. // GPUs
  540. map_copy_threshold = 524288; // 0.5 MB
  541. direct_copy_threshold = 52428800; // 50 MB
  542. }
  543. else {
  544. // CPUs and other devices
  545. map_copy_threshold = 134217728; // 128 MB
  546. direct_copy_threshold = 0; // it's never efficient for CPUs
  547. }
  548. // load thresholds
  549. map_copy_threshold =
  550. parameters->get(
  551. cache_key, "map_copy_threshold", map_copy_threshold
  552. );
  553. direct_copy_threshold =
  554. parameters->get(
  555. cache_key, "direct_copy_threshold", direct_copy_threshold
  556. );
  557. // select copy method based on thresholds & input_size_bytes
  558. size_t count = iterator_range_size(first, last);
  559. size_t input_size_bytes = count * sizeof(input_type);
  560. // [0; map_copy_threshold) -> copy_to_host_map()
  561. if(input_size_bytes < map_copy_threshold) {
  562. return copy_to_host_map(first, last, result, queue);
  563. }
  564. // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
  565. // temporary vector then copy (and convert) to result using std::copy()
  566. else if(input_size_bytes < direct_copy_threshold) {
  567. std::vector<input_type> vector(count);
  568. copy_to_host(first, last, vector.begin(), queue);
  569. return std::copy(vector.begin(), vector.end(), result);
  570. }
  571. // [direct_copy_threshold; inf) -> map [result; result + input_size) to
  572. // device and run copy kernel on device for copying & casting
  573. // map host memory to device.
  574. // Perform async copy to host, wait for it to be finished and
  575. // return the result.
  576. // At this point we are sure that count > 1 (first != last), so event
  577. // returned by dispatch_copy_async() must be valid.
  578. return dispatch_copy_async(first, last, result, queue).get();
  579. }
  580. // device -> device
  581. template<class InputIterator, class OutputIterator>
  582. inline OutputIterator
  583. dispatch_copy(InputIterator first,
  584. InputIterator last,
  585. OutputIterator result,
  586. command_queue &queue,
  587. typename boost::enable_if<
  588. mpl::and_<
  589. is_device_iterator<InputIterator>,
  590. is_device_iterator<OutputIterator>,
  591. mpl::not_<
  592. can_copy_with_copy_buffer<
  593. InputIterator, OutputIterator
  594. >
  595. >
  596. >
  597. >::type* = 0)
  598. {
  599. return copy_on_device(first, last, result, queue);
  600. }
  601. // device -> device (specialization for buffer iterators)
  602. template<class InputIterator, class OutputIterator>
  603. inline OutputIterator
  604. dispatch_copy(InputIterator first,
  605. InputIterator last,
  606. OutputIterator result,
  607. command_queue &queue,
  608. typename boost::enable_if<
  609. mpl::and_<
  610. is_device_iterator<InputIterator>,
  611. is_device_iterator<OutputIterator>,
  612. can_copy_with_copy_buffer<
  613. InputIterator, OutputIterator
  614. >
  615. >
  616. >::type* = 0)
  617. {
  618. typedef typename std::iterator_traits<InputIterator>::value_type value_type;
  619. typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
  620. difference_type n = std::distance(first, last);
  621. if(n < 1){
  622. // nothing to copy
  623. return result;
  624. }
  625. queue.enqueue_copy_buffer(first.get_buffer(),
  626. result.get_buffer(),
  627. first.get_index() * sizeof(value_type),
  628. result.get_index() * sizeof(value_type),
  629. static_cast<size_t>(n) * sizeof(value_type));
  630. return result + n;
  631. }
  632. // device -> device (async)
  633. template<class InputIterator, class OutputIterator>
  634. inline future<OutputIterator>
  635. dispatch_copy_async(InputIterator first,
  636. InputIterator last,
  637. OutputIterator result,
  638. command_queue &queue,
  639. typename boost::enable_if<
  640. mpl::and_<
  641. is_device_iterator<InputIterator>,
  642. is_device_iterator<OutputIterator>,
  643. mpl::not_<
  644. can_copy_with_copy_buffer<
  645. InputIterator, OutputIterator
  646. >
  647. >
  648. >
  649. >::type* = 0)
  650. {
  651. return copy_on_device_async(first, last, result, queue);
  652. }
  653. // device -> device (async, specialization for buffer iterators)
  654. template<class InputIterator, class OutputIterator>
  655. inline future<OutputIterator>
  656. dispatch_copy_async(InputIterator first,
  657. InputIterator last,
  658. OutputIterator result,
  659. command_queue &queue,
  660. typename boost::enable_if<
  661. mpl::and_<
  662. is_device_iterator<InputIterator>,
  663. is_device_iterator<OutputIterator>,
  664. can_copy_with_copy_buffer<
  665. InputIterator, OutputIterator
  666. >
  667. >
  668. >::type* = 0)
  669. {
  670. typedef typename std::iterator_traits<InputIterator>::value_type value_type;
  671. typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
  672. difference_type n = std::distance(first, last);
  673. if(n < 1){
  674. // nothing to copy
  675. return make_future(result, event());
  676. }
  677. event event_ =
  678. queue.enqueue_copy_buffer(
  679. first.get_buffer(),
  680. result.get_buffer(),
  681. first.get_index() * sizeof(value_type),
  682. result.get_index() * sizeof(value_type),
  683. static_cast<size_t>(n) * sizeof(value_type)
  684. );
  685. return make_future(result + n, event_);
  686. }
  687. // host -> host
  688. template<class InputIterator, class OutputIterator>
  689. inline OutputIterator
  690. dispatch_copy(InputIterator first,
  691. InputIterator last,
  692. OutputIterator result,
  693. command_queue &queue,
  694. typename boost::enable_if_c<
  695. !is_device_iterator<InputIterator>::value &&
  696. !is_device_iterator<OutputIterator>::value
  697. >::type* = 0)
  698. {
  699. (void) queue;
  700. return std::copy(first, last, result);
  701. }
  702. } // end detail namespace
  703. /// Copies the values in the range [\p first, \p last) to the range
  704. /// beginning at \p result.
  705. ///
  706. /// The generic copy() function can be used for a variety of data
  707. /// transfer tasks and provides a standard interface to the following
  708. /// OpenCL functions:
  709. ///
  710. /// \li \c clEnqueueReadBuffer()
  711. /// \li \c clEnqueueWriteBuffer()
  712. /// \li \c clEnqueueCopyBuffer()
  713. ///
  714. /// Unlike the aforementioned OpenCL functions, copy() will also work
  715. /// with non-contiguous data-structures (e.g. \c std::list<T>) as
  716. /// well as with "fancy" iterators (e.g. transform_iterator).
  717. ///
  718. /// \param first first element in the range to copy
  719. /// \param last last element in the range to copy
  720. /// \param result first element in the result range
  721. /// \param queue command queue to perform the operation
  722. ///
  723. /// \return \c OutputIterator to the end of the result range
  724. ///
  725. /// For example, to copy an array of \c int values on the host to a vector on
  726. /// the device:
  727. /// \code
  728. /// // array on the host
  729. /// int data[] = { 1, 2, 3, 4 };
  730. ///
  731. /// // vector on the device
  732. /// boost::compute::vector<int> vec(4, context);
  733. ///
  734. /// // copy values to the device vector
  735. /// boost::compute::copy(data, data + 4, vec.begin(), queue);
  736. /// \endcode
  737. ///
  738. /// The copy algorithm can also be used with standard containers such as
  739. /// \c std::vector<T>:
  740. /// \code
  741. /// std::vector<int> host_vector = ...
  742. /// boost::compute::vector<int> device_vector = ...
  743. ///
  744. /// // copy from the host to the device
  745. /// boost::compute::copy(
  746. /// host_vector.begin(), host_vector.end(), device_vector.begin(), queue
  747. /// );
  748. ///
  749. /// // copy from the device to the host
  750. /// boost::compute::copy(
  751. /// device_vector.begin(), device_vector.end(), host_vector.begin(), queue
  752. /// );
  753. /// \endcode
  754. ///
  755. /// Space complexity: \Omega(1)
  756. ///
  757. /// \see copy_n(), copy_if(), copy_async()
  758. template<class InputIterator, class OutputIterator>
  759. inline OutputIterator copy(InputIterator first,
  760. InputIterator last,
  761. OutputIterator result,
  762. command_queue &queue = system::default_queue())
  763. {
  764. return detail::dispatch_copy(first, last, result, queue);
  765. }
  766. /// Copies the values in the range [\p first, \p last) to the range
  767. /// beginning at \p result. The copy is performed asynchronously.
  768. ///
  769. /// \see copy()
  770. template<class InputIterator, class OutputIterator>
  771. inline future<OutputIterator>
  772. copy_async(InputIterator first,
  773. InputIterator last,
  774. OutputIterator result,
  775. command_queue &queue = system::default_queue())
  776. {
  777. return detail::dispatch_copy_async(first, last, result, queue);
  778. }
  779. } // end compute namespace
  780. } // end boost namespace
  781. #endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP