kernel.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_KERNEL_HPP
  11. #define BOOST_COMPUTE_KERNEL_HPP
  12. #include <string>
  13. #include <boost/assert.hpp>
  14. #include <boost/utility/enable_if.hpp>
  15. #include <boost/optional.hpp>
  16. #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
  17. #include <boost/compute/config.hpp>
  18. #include <boost/compute/exception.hpp>
  19. #include <boost/compute/program.hpp>
  20. #include <boost/compute/platform.hpp>
  21. #include <boost/compute/type_traits/is_fundamental.hpp>
  22. #include <boost/compute/detail/diagnostic.hpp>
  23. #include <boost/compute/detail/get_object_info.hpp>
  24. #include <boost/compute/detail/assert_cl_success.hpp>
  25. namespace boost {
  26. namespace compute {
  27. namespace detail {
  28. template<class T> struct set_kernel_arg;
  29. } // end detail namespace
  30. /// \class kernel
  31. /// \brief A compute kernel.
  32. ///
  33. /// \see command_queue, program
  34. class kernel
  35. {
  36. public:
  37. /// Creates a null kernel object.
  38. kernel()
  39. : m_kernel(0)
  40. {
  41. }
  42. /// Creates a new kernel object for \p kernel. If \p retain is
  43. /// \c true, the reference count for \p kernel will be incremented.
  44. explicit kernel(cl_kernel kernel, bool retain = true)
  45. : m_kernel(kernel)
  46. {
  47. if(m_kernel && retain){
  48. clRetainKernel(m_kernel);
  49. }
  50. }
  51. /// Creates a new kernel object with \p name from \p program.
  52. kernel(const program &program, const std::string &name)
  53. {
  54. cl_int error = 0;
  55. m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
  56. if(!m_kernel){
  57. BOOST_THROW_EXCEPTION(opencl_error(error));
  58. }
  59. }
  60. /// Creates a new kernel object as a copy of \p other.
  61. kernel(const kernel &other)
  62. : m_kernel(other.m_kernel)
  63. {
  64. if(m_kernel){
  65. clRetainKernel(m_kernel);
  66. }
  67. }
  68. /// Copies the kernel object from \p other to \c *this.
  69. kernel& operator=(const kernel &other)
  70. {
  71. if(this != &other){
  72. if(m_kernel){
  73. clReleaseKernel(m_kernel);
  74. }
  75. m_kernel = other.m_kernel;
  76. if(m_kernel){
  77. clRetainKernel(m_kernel);
  78. }
  79. }
  80. return *this;
  81. }
  82. #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
  83. /// Move-constructs a new kernel object from \p other.
  84. kernel(kernel&& other) BOOST_NOEXCEPT
  85. : m_kernel(other.m_kernel)
  86. {
  87. other.m_kernel = 0;
  88. }
  89. /// Move-assigns the kernel from \p other to \c *this.
  90. kernel& operator=(kernel&& other) BOOST_NOEXCEPT
  91. {
  92. if(m_kernel){
  93. clReleaseKernel(m_kernel);
  94. }
  95. m_kernel = other.m_kernel;
  96. other.m_kernel = 0;
  97. return *this;
  98. }
  99. #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
  100. /// Destroys the kernel object.
  101. ~kernel()
  102. {
  103. if(m_kernel){
  104. BOOST_COMPUTE_ASSERT_CL_SUCCESS(
  105. clReleaseKernel(m_kernel)
  106. );
  107. }
  108. }
  109. #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  110. /// Creates a new kernel object based on a shallow copy of
  111. /// the undelying OpenCL kernel object.
  112. ///
  113. /// \opencl_version_warning{2,1}
  114. ///
  115. /// \see_opencl21_ref{clCloneKernel}
  116. kernel clone()
  117. {
  118. cl_int ret = 0;
  119. cl_kernel k = clCloneKernel(m_kernel, &ret);
  120. return kernel(k, false);
  121. }
  122. #endif // BOOST_COMPUTE_CL_VERSION_2_1
  123. /// Returns a reference to the underlying OpenCL kernel object.
  124. cl_kernel& get() const
  125. {
  126. return const_cast<cl_kernel &>(m_kernel);
  127. }
  128. /// Returns the function name for the kernel.
  129. std::string name() const
  130. {
  131. return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
  132. }
  133. /// Returns the number of arguments for the kernel.
  134. size_t arity() const
  135. {
  136. return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
  137. }
  138. /// Returns the program for the kernel.
  139. program get_program() const
  140. {
  141. return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
  142. }
  143. /// Returns the context for the kernel.
  144. context get_context() const
  145. {
  146. return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
  147. }
  148. /// Returns information about the kernel.
  149. ///
  150. /// \see_opencl_ref{clGetKernelInfo}
  151. template<class T>
  152. T get_info(cl_kernel_info info) const
  153. {
  154. return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
  155. }
  156. /// \overload
  157. template<int Enum>
  158. typename detail::get_object_info_type<kernel, Enum>::type
  159. get_info() const;
  160. #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  161. /// Returns information about the argument at \p index.
  162. ///
  163. /// For example, to get the name of the first argument:
  164. /// \code
  165. /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
  166. /// \endcode
  167. ///
  168. /// Note, this function requires that the program be compiled with the
  169. /// \c "-cl-kernel-arg-info" flag. For example:
  170. /// \code
  171. /// program.build("-cl-kernel-arg-info");
  172. /// \endcode
  173. ///
  174. /// \opencl_version_warning{1,2}
  175. ///
  176. /// \see_opencl_ref{clGetKernelArgInfo}
  177. template<class T>
  178. T get_arg_info(size_t index, cl_kernel_arg_info info) const
  179. {
  180. return detail::get_object_info<T>(
  181. clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
  182. );
  183. }
  184. /// \overload
  185. template<int Enum>
  186. typename detail::get_object_info_type<kernel, Enum>::type
  187. get_arg_info(size_t index) const;
  188. #endif // BOOST_COMPUTE_CL_VERSION_1_2
  189. /// Returns work-group information for the kernel with \p device.
  190. ///
  191. /// \see_opencl_ref{clGetKernelWorkGroupInfo}
  192. template<class T>
  193. T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
  194. {
  195. return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
  196. }
  197. #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  198. /// Returns sub-group information for the kernel with \p device. Returns a null
  199. /// optional if \p device is not 2.1 device, or is not 2.0 device with support
  200. /// for cl_khr_subgroups extension.
  201. ///
  202. /// \opencl_version_warning{2,1}
  203. /// \see_opencl21_ref{clGetKernelSubGroupInfo}
  204. /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
  205. template<class T>
  206. boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
  207. const size_t input_size, const void * input) const
  208. {
  209. if(device.check_version(2, 1))
  210. {
  211. return detail::get_object_info<T>(
  212. clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
  213. );
  214. }
  215. else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
  216. {
  217. return boost::optional<T>();
  218. }
  219. // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
  220. // are supported in cl_khr_subgroups extension for 2.0 devices.
  221. else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
  222. {
  223. return boost::optional<T>();
  224. }
  225. BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
  226. clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
  227. reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
  228. reinterpret_cast<size_t>(
  229. device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
  230. )
  231. );
  232. BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
  233. return detail::get_object_info<T>(
  234. clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
  235. );
  236. }
  237. /// \overload
  238. template<class T>
  239. boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
  240. {
  241. return get_sub_group_info<T>(device, info, 0, 0);
  242. }
  243. /// \overload
  244. template<class T>
  245. boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
  246. const size_t input) const
  247. {
  248. return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
  249. }
  250. #endif // BOOST_COMPUTE_CL_VERSION_2_1
  251. #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
  252. /// Returns sub-group information for the kernel with \p device. Returns a null
  253. /// optional if cl_khr_subgroups extension is not supported by \p device.
  254. ///
  255. /// \opencl_version_warning{2,0}
  256. /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
  257. template<class T>
  258. boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
  259. const size_t input_size, const void * input) const
  260. {
  261. if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
  262. {
  263. return boost::optional<T>();
  264. }
  265. BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
  266. clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
  267. reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
  268. reinterpret_cast<size_t>(
  269. device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
  270. )
  271. );
  272. BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
  273. return detail::get_object_info<T>(
  274. clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
  275. );
  276. }
  277. #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
  278. #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  279. /// \overload
  280. template<class T>
  281. boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
  282. const std::vector<size_t> input) const
  283. {
  284. BOOST_ASSERT(input.size() > 0);
  285. return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
  286. }
  287. #endif // BOOST_COMPUTE_CL_VERSION_2_0
  288. /// Sets the argument at \p index to \p value with \p size.
  289. ///
  290. /// \see_opencl_ref{clSetKernelArg}
  291. void set_arg(size_t index, size_t size, const void *value)
  292. {
  293. BOOST_ASSERT(index < arity());
  294. cl_int ret = clSetKernelArg(m_kernel,
  295. static_cast<cl_uint>(index),
  296. size,
  297. value);
  298. if(ret != CL_SUCCESS){
  299. BOOST_THROW_EXCEPTION(opencl_error(ret));
  300. }
  301. }
  302. /// Sets the argument at \p index to \p value.
  303. ///
  304. /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
  305. /// calling set_arg(index, sizeof(type), &value).
  306. ///
  307. /// Additionally, this method is specialized for device memory objects
  308. /// such as buffer and image2d. This allows for them to be passed directly
  309. /// without having to extract their underlying cl_mem object.
  310. ///
  311. /// This method is also specialized for device container types such as
  312. /// vector<T> and array<T, N>. This allows for them to be passed directly
  313. /// as kernel arguments without having to extract their underlying buffer.
  314. ///
  315. /// For setting local memory arguments (e.g. "__local float *buf"), the
  316. /// local_buffer<T> class may be used:
  317. /// \code
  318. /// // set argument to a local buffer with storage for 32 float's
  319. /// kernel.set_arg(0, local_buffer<float>(32));
  320. /// \endcode
  321. ///
  322. /// For setting NULL to global and constant memory arguments (C++11):
  323. /// \code
  324. /// kernel.set_arg(0, nullptr);
  325. /// \endcode
  326. template<class T>
  327. void set_arg(size_t index, const T &value)
  328. {
  329. // if you get a compilation error pointing here it means you
  330. // attempted to set a kernel argument from an invalid type.
  331. detail::set_kernel_arg<T>()(*this, index, value);
  332. }
  333. #ifndef BOOST_NO_CXX11_NULLPTR
  334. /// \overload
  335. void set_arg(size_t index, std::nullptr_t nul)
  336. {
  337. set_arg(index, sizeof(cl_mem), NULL);
  338. }
  339. #endif // BOOST_NO_CXX11_NULLPTR
  340. /// \internal_
  341. void set_arg(size_t index, const cl_mem mem)
  342. {
  343. set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
  344. }
  345. /// \internal_
  346. void set_arg(size_t index, const cl_sampler sampler)
  347. {
  348. set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
  349. }
  350. /// \internal_
  351. void set_arg_svm_ptr(size_t index, void* ptr)
  352. {
  353. #ifdef BOOST_COMPUTE_CL_VERSION_2_0
  354. cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
  355. if(ret != CL_SUCCESS){
  356. BOOST_THROW_EXCEPTION(opencl_error(ret));
  357. }
  358. #else
  359. (void) index;
  360. (void) ptr;
  361. BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
  362. #endif
  363. }
  364. #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
  365. /// Sets the arguments for the kernel to \p args.
  366. template<class... T>
  367. void set_args(T&&... args)
  368. {
  369. BOOST_ASSERT(sizeof...(T) <= arity());
  370. _set_args<0>(args...);
  371. }
  372. #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
  373. #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  374. /// Sets additional execution information for the kernel.
  375. ///
  376. /// \opencl_version_warning{2,0}
  377. ///
  378. /// \see_opencl2_ref{clSetKernelExecInfo}
  379. void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
  380. {
  381. cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
  382. if(ret != CL_SUCCESS){
  383. BOOST_THROW_EXCEPTION(opencl_error(ret));
  384. }
  385. }
  386. #endif // BOOST_COMPUTE_CL_VERSION_2_0
  387. /// Returns \c true if the kernel is the same at \p other.
  388. bool operator==(const kernel &other) const
  389. {
  390. return m_kernel == other.m_kernel;
  391. }
  392. /// Returns \c true if the kernel is different from \p other.
  393. bool operator!=(const kernel &other) const
  394. {
  395. return m_kernel != other.m_kernel;
  396. }
  397. /// \internal_
  398. operator cl_kernel() const
  399. {
  400. return m_kernel;
  401. }
  402. /// \internal_
  403. static kernel create_with_source(const std::string &source,
  404. const std::string &name,
  405. const context &context)
  406. {
  407. return program::build_with_source(source, context).create_kernel(name);
  408. }
  409. private:
  410. #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
  411. /// \internal_
  412. template<size_t N>
  413. void _set_args()
  414. {
  415. }
  416. /// \internal_
  417. template<size_t N, class T, class... Args>
  418. void _set_args(T&& arg, Args&&... rest)
  419. {
  420. set_arg(N, arg);
  421. _set_args<N+1>(rest...);
  422. }
  423. #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
  424. private:
  425. cl_kernel m_kernel;
  426. };
  427. inline kernel program::create_kernel(const std::string &name) const
  428. {
  429. return kernel(*this, name);
  430. }
  431. /// \internal_ define get_info() specializations for kernel
  432. BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
  433. ((std::string, CL_KERNEL_FUNCTION_NAME))
  434. ((cl_uint, CL_KERNEL_NUM_ARGS))
  435. ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
  436. ((cl_context, CL_KERNEL_CONTEXT))
  437. ((cl_program, CL_KERNEL_PROGRAM))
  438. )
  439. #ifdef BOOST_COMPUTE_CL_VERSION_1_2
  440. BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
  441. ((std::string, CL_KERNEL_ATTRIBUTES))
  442. )
  443. #endif // BOOST_COMPUTE_CL_VERSION_1_2
  444. /// \internal_ define get_arg_info() specializations for kernel
  445. #ifdef BOOST_COMPUTE_CL_VERSION_1_2
  446. #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
  447. namespace detail { \
  448. template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
  449. } \
  450. template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
  451. return get_arg_info<result_type>(index, value); \
  452. }
  453. BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
  454. BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
  455. BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
  456. BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
  457. BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
  458. #endif // BOOST_COMPUTE_CL_VERSION_1_2
  459. namespace detail {
  460. // set_kernel_arg implementation for built-in types
  461. template<class T>
  462. struct set_kernel_arg
  463. {
  464. typename boost::enable_if<is_fundamental<T> >::type
  465. operator()(kernel &kernel_, size_t index, const T &value)
  466. {
  467. kernel_.set_arg(index, sizeof(T), &value);
  468. }
  469. };
  470. // set_kernel_arg specialization for char (different from built-in cl_char)
  471. template<>
  472. struct set_kernel_arg<char>
  473. {
  474. void operator()(kernel &kernel_, size_t index, const char c)
  475. {
  476. kernel_.set_arg(index, sizeof(char), &c);
  477. }
  478. };
  479. } // end detail namespace
  480. } // end namespace compute
  481. } // end namespace boost
  482. #endif // BOOST_COMPUTE_KERNEL_HPP