meta_kernel.hpp 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
  11. #define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
  12. #include <set>
  13. #include <string>
  14. #include <vector>
  15. #include <iomanip>
  16. #include <sstream>
  17. #include <utility>
  18. #include <boost/tuple/tuple.hpp>
  19. #include <boost/type_traits.hpp>
  20. #include <boost/lexical_cast.hpp>
  21. #include <boost/static_assert.hpp>
  22. #include <boost/algorithm/string/find.hpp>
  23. #include <boost/preprocessor/repetition.hpp>
  24. #include <boost/compute/kernel.hpp>
  25. #include <boost/compute/closure.hpp>
  26. #include <boost/compute/function.hpp>
  27. #include <boost/compute/functional.hpp>
  28. #include <boost/compute/type_traits.hpp>
  29. #include <boost/compute/command_queue.hpp>
  30. #include <boost/compute/image/image2d.hpp>
  31. #include <boost/compute/image/image_sampler.hpp>
  32. #include <boost/compute/memory_object.hpp>
  33. #include <boost/compute/memory/svm_ptr.hpp>
  34. #include <boost/compute/detail/device_ptr.hpp>
  35. #include <boost/compute/detail/sha1.hpp>
  36. #include <boost/compute/utility/program_cache.hpp>
  37. namespace boost {
  38. namespace compute {
  39. namespace detail {
  40. template<class T>
  41. class meta_kernel_variable
  42. {
  43. public:
  44. typedef T result_type;
  45. meta_kernel_variable(const std::string &name)
  46. : m_name(name)
  47. {
  48. }
  49. meta_kernel_variable(const meta_kernel_variable &other)
  50. : m_name(other.m_name)
  51. {
  52. }
  53. meta_kernel_variable& operator=(const meta_kernel_variable &other)
  54. {
  55. if(this != &other){
  56. m_name = other.m_name;
  57. }
  58. return *this;
  59. }
  60. ~meta_kernel_variable()
  61. {
  62. }
  63. std::string name() const
  64. {
  65. return m_name;
  66. }
  67. private:
  68. std::string m_name;
  69. };
  70. template<class T>
  71. class meta_kernel_literal
  72. {
  73. public:
  74. typedef T result_type;
  75. meta_kernel_literal(const T &value)
  76. : m_value(value)
  77. {
  78. }
  79. meta_kernel_literal(const meta_kernel_literal &other)
  80. : m_value(other.m_value)
  81. {
  82. }
  83. meta_kernel_literal& operator=(const meta_kernel_literal &other)
  84. {
  85. if(this != &other){
  86. m_value = other.m_value;
  87. }
  88. return *this;
  89. }
  90. ~meta_kernel_literal()
  91. {
  92. }
  93. const T& value() const
  94. {
  95. return m_value;
  96. }
  97. private:
  98. T m_value;
  99. };
  100. struct meta_kernel_stored_arg
  101. {
  102. meta_kernel_stored_arg()
  103. : m_size(0),
  104. m_value(0)
  105. {
  106. }
  107. meta_kernel_stored_arg(const meta_kernel_stored_arg &other)
  108. : m_size(0),
  109. m_value(0)
  110. {
  111. set_value(other.m_size, other.m_value);
  112. }
  113. meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other)
  114. {
  115. if(this != &other){
  116. set_value(other.m_size, other.m_value);
  117. }
  118. return *this;
  119. }
  120. template<class T>
  121. meta_kernel_stored_arg(const T &value)
  122. : m_size(0),
  123. m_value(0)
  124. {
  125. set_value(value);
  126. }
  127. ~meta_kernel_stored_arg()
  128. {
  129. if(m_value){
  130. std::free(m_value);
  131. }
  132. }
  133. void set_value(size_t size, const void *value)
  134. {
  135. if(m_value){
  136. std::free(m_value);
  137. }
  138. m_size = size;
  139. if(value){
  140. m_value = std::malloc(size);
  141. std::memcpy(m_value, value, size);
  142. }
  143. else {
  144. m_value = 0;
  145. }
  146. }
  147. template<class T>
  148. void set_value(const T &value)
  149. {
  150. set_value(sizeof(T), boost::addressof(value));
  151. }
  152. size_t m_size;
  153. void *m_value;
  154. };
  155. struct meta_kernel_buffer_info
  156. {
  157. meta_kernel_buffer_info(const buffer &buffer,
  158. const std::string &id,
  159. memory_object::address_space addr_space,
  160. size_t i)
  161. : m_mem(buffer.get()),
  162. identifier(id),
  163. address_space(addr_space),
  164. index(i)
  165. {
  166. }
  167. cl_mem m_mem;
  168. std::string identifier;
  169. memory_object::address_space address_space;
  170. size_t index;
  171. };
  172. struct meta_kernel_svm_info
  173. {
  174. template <class T>
  175. meta_kernel_svm_info(const svm_ptr<T> ptr,
  176. const std::string &id,
  177. memory_object::address_space addr_space,
  178. size_t i)
  179. : ptr(ptr.get()),
  180. identifier(id),
  181. address_space(addr_space),
  182. index(i)
  183. {
  184. }
  185. void* ptr;
  186. std::string identifier;
  187. memory_object::address_space address_space;
  188. size_t index;
  189. };
  190. class meta_kernel;
  191. template<class Type>
  192. struct inject_type_impl
  193. {
  194. void operator()(meta_kernel &)
  195. {
  196. // default implementation does nothing
  197. }
  198. };
  199. #define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \
  200. meta_kernel& operator<<(const type &x) \
  201. { \
  202. m_source << x; \
  203. return *this; \
  204. }
  205. #define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \
  206. meta_kernel& operator<<(const type &x) \
  207. { \
  208. m_source << "(" << type_name<type>() << ")"; \
  209. m_source << "("; \
  210. for(size_t i = 0; i < vector_size<type>::value; i++){ \
  211. *this << lit(x[i]); \
  212. \
  213. if(i != vector_size<type>::value - 1){ \
  214. m_source << ","; \
  215. } \
  216. } \
  217. m_source << ")"; \
  218. return *this; \
  219. }
  220. #define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \
  221. BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \
  222. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \
  223. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \
  224. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \
  225. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _))
  226. class meta_kernel
  227. {
  228. public:
  229. template<class T>
  230. class argument
  231. {
  232. public:
  233. argument(const std::string &name, size_t index)
  234. : m_name(name),
  235. m_index(index)
  236. {
  237. }
  238. const std::string &name() const
  239. {
  240. return m_name;
  241. }
  242. size_t index() const
  243. {
  244. return m_index;
  245. }
  246. private:
  247. std::string m_name;
  248. size_t m_index;
  249. };
  250. explicit meta_kernel(const std::string &name)
  251. : m_name(name)
  252. {
  253. }
  254. meta_kernel(const meta_kernel &other)
  255. {
  256. m_source.str(other.m_source.str());
  257. m_options = other.m_options;
  258. }
  259. meta_kernel& operator=(const meta_kernel &other)
  260. {
  261. if(this != &other){
  262. m_source.str(other.m_source.str());
  263. m_options = other.m_options;
  264. }
  265. return *this;
  266. }
  267. ~meta_kernel()
  268. {
  269. }
  270. std::string name() const
  271. {
  272. return m_name;
  273. }
  274. std::string source() const
  275. {
  276. std::stringstream stream;
  277. // add pragmas
  278. if(!m_pragmas.empty()){
  279. stream << m_pragmas << "\n";
  280. }
  281. // add macros
  282. stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n";
  283. stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n";
  284. stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n";
  285. stream << "#define boost_tuple_get(x, n) (x.v ## n)\n";
  286. // add type declaration source
  287. stream << m_type_declaration_source.str() << "\n";
  288. // add external function source
  289. stream << m_external_function_source.str() << "\n";
  290. // add kernel source
  291. stream << "__kernel void " << m_name
  292. << "(" << boost::join(m_args, ", ") << ")\n"
  293. << "{\n" << m_source.str() << "\n}\n";
  294. return stream.str();
  295. }
  296. kernel compile(const context &context, const std::string &options = std::string())
  297. {
  298. // generate the program source
  299. std::string source = this->source();
  300. // generate cache key
  301. std::string cache_key = "__boost_meta_kernel_" +
  302. static_cast<std::string>(detail::sha1(source));
  303. // load program cache
  304. boost::shared_ptr<program_cache> cache =
  305. program_cache::get_global_cache(context);
  306. std::string compile_options = m_options + options;
  307. // load (or build) program from cache
  308. ::boost::compute::program program =
  309. cache->get_or_build(cache_key, compile_options, source, context);
  310. // create kernel
  311. ::boost::compute::kernel kernel = program.create_kernel(name());
  312. // bind stored args
  313. for(size_t i = 0; i < m_stored_args.size(); i++){
  314. const detail::meta_kernel_stored_arg &arg = m_stored_args[i];
  315. if(arg.m_size != 0){
  316. kernel.set_arg(i, arg.m_size, arg.m_value);
  317. }
  318. }
  319. // bind buffer args
  320. for(size_t i = 0; i < m_stored_buffers.size(); i++){
  321. const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
  322. kernel.set_arg(bi.index, bi.m_mem);
  323. }
  324. // bind svm args
  325. for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){
  326. const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i];
  327. kernel.set_arg_svm_ptr(spi.index, spi.ptr);
  328. }
  329. return kernel;
  330. }
  331. template<class T>
  332. size_t add_arg(const std::string &name)
  333. {
  334. std::stringstream stream;
  335. stream << type<T>() << " " << name;
  336. // add argument to list
  337. m_args.push_back(stream.str());
  338. // return index
  339. return m_args.size() - 1;
  340. }
  341. template<class T>
  342. size_t add_arg(memory_object::address_space address_space,
  343. const std::string &name)
  344. {
  345. return add_arg_with_qualifiers<T>(address_space_prefix(address_space), name);
  346. }
  347. template<class T>
  348. void set_arg(size_t index, const T &value)
  349. {
  350. if(index >= m_stored_args.size()){
  351. m_stored_args.resize(index + 1);
  352. }
  353. m_stored_args[index] = detail::meta_kernel_stored_arg(value);
  354. }
  355. void set_arg(size_t index, const memory_object &mem)
  356. {
  357. set_arg<cl_mem>(index, mem.get());
  358. }
  359. void set_arg(size_t index, const image_sampler &sampler)
  360. {
  361. set_arg<cl_sampler>(index, cl_sampler(sampler));
  362. }
  363. template<class T>
  364. size_t add_set_arg(const std::string &name, const T &value)
  365. {
  366. size_t index = add_arg<T>(name);
  367. set_arg<T>(index, value);
  368. return index;
  369. }
  370. void add_extension_pragma(const std::string &extension,
  371. const std::string &value = "enable")
  372. {
  373. m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n";
  374. }
  375. void add_extension_pragma(const std::string &extension,
  376. const std::string &value) const
  377. {
  378. return const_cast<meta_kernel *>(this)->add_extension_pragma(extension, value);
  379. }
  380. template<class T>
  381. std::string type() const
  382. {
  383. std::stringstream stream;
  384. // const qualifier
  385. if(boost::is_const<T>::value){
  386. stream << "const ";
  387. }
  388. // volatile qualifier
  389. if(boost::is_volatile<T>::value){
  390. stream << "volatile ";
  391. }
  392. // type
  393. typedef
  394. typename boost::remove_cv<
  395. typename boost::remove_pointer<T>::type
  396. >::type Type;
  397. stream << type_name<Type>();
  398. // pointer
  399. if(boost::is_pointer<T>::value){
  400. stream << "*";
  401. }
  402. // inject type pragmas and/or definitions
  403. inject_type<Type>();
  404. return stream.str();
  405. }
  406. template<class T>
  407. std::string decl(const std::string &name) const
  408. {
  409. return type<T>() + " " + name;
  410. }
  411. template<class T, class Expr>
  412. std::string decl(const std::string &name, const Expr &init) const
  413. {
  414. meta_kernel tmp((std::string()));
  415. tmp << tmp.decl<T>(name) << " = " << init;
  416. return tmp.m_source.str();
  417. }
  418. template<class T>
  419. detail::meta_kernel_variable<T> var(const std::string &name) const
  420. {
  421. type<T>();
  422. return make_var<T>(name);
  423. }
  424. template<class T>
  425. detail::meta_kernel_literal<T> lit(const T &value) const
  426. {
  427. type<T>();
  428. return detail::meta_kernel_literal<T>(value);
  429. }
  430. template<class T>
  431. detail::meta_kernel_variable<T> expr(const std::string &expr) const
  432. {
  433. type<T>();
  434. return detail::meta_kernel_variable<T>(expr);
  435. }
  436. // define stream operators for scalar and vector types
  437. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char)
  438. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar)
  439. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short)
  440. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort)
  441. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int)
  442. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint)
  443. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long)
  444. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong)
  445. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double)
  446. // define stream operators for float scalar and vector types
  447. meta_kernel& operator<<(const float &x)
  448. {
  449. m_source << std::showpoint << x << 'f';
  450. return *this;
  451. }
  452. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_)
  453. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_)
  454. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_)
  455. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_)
  456. // define stream operators for variable types
  457. template<class T>
  458. meta_kernel& operator<<(const meta_kernel_variable<T> &variable)
  459. {
  460. return *this << variable.name();
  461. }
  462. // define stream operators for literal types
  463. template<class T>
  464. meta_kernel& operator<<(const meta_kernel_literal<T> &literal)
  465. {
  466. return *this << literal.value();
  467. }
  468. meta_kernel& operator<<(const meta_kernel_literal<bool> &literal)
  469. {
  470. return *this << (literal.value() ? "true" : "false");
  471. }
  472. meta_kernel& operator<<(const meta_kernel_literal<char> &literal)
  473. {
  474. const char c = literal.value();
  475. switch(c){
  476. // control characters
  477. case '\0':
  478. return *this << "'\\0'";
  479. case '\a':
  480. return *this << "'\\a'";
  481. case '\b':
  482. return *this << "'\\b'";
  483. case '\t':
  484. return *this << "'\\t'";
  485. case '\n':
  486. return *this << "'\\n'";
  487. case '\v':
  488. return *this << "'\\v'";
  489. case '\f':
  490. return *this << "'\\f'";
  491. case '\r':
  492. return *this << "'\\r'";
  493. // characters which need escaping
  494. case '\"':
  495. case '\'':
  496. case '\?':
  497. case '\\':
  498. return *this << "'\\" << c << "'";
  499. // all other characters
  500. default:
  501. return *this << "'" << c << "'";
  502. }
  503. }
  504. meta_kernel& operator<<(const meta_kernel_literal<signed char> &literal)
  505. {
  506. return *this << lit<char>(literal.value());
  507. }
  508. meta_kernel& operator<<(const meta_kernel_literal<unsigned char> &literal)
  509. {
  510. return *this << uint_(literal.value());
  511. }
  512. // define stream operators for strings
  513. meta_kernel& operator<<(char ch)
  514. {
  515. m_source << ch;
  516. return *this;
  517. }
  518. meta_kernel& operator<<(const char *string)
  519. {
  520. m_source << string;
  521. return *this;
  522. }
  523. meta_kernel& operator<<(const std::string &string)
  524. {
  525. m_source << string;
  526. return *this;
  527. }
  528. template<class T>
  529. static detail::meta_kernel_variable<T> make_var(const std::string &name)
  530. {
  531. return detail::meta_kernel_variable<T>(name);
  532. }
  533. template<class T>
  534. static detail::meta_kernel_literal<T> make_lit(const T &value)
  535. {
  536. return detail::meta_kernel_literal<T>(value);
  537. }
  538. template<class T>
  539. static detail::meta_kernel_variable<T> make_expr(const std::string &expr)
  540. {
  541. return detail::meta_kernel_variable<T>(expr);
  542. }
  543. event exec(command_queue &queue)
  544. {
  545. return exec_1d(queue, 0, 1);
  546. }
  547. event exec_1d(command_queue &queue,
  548. size_t global_work_offset,
  549. size_t global_work_size)
  550. {
  551. const context &context = queue.get_context();
  552. ::boost::compute::kernel kernel = compile(context);
  553. return queue.enqueue_1d_range_kernel(
  554. kernel,
  555. global_work_offset,
  556. global_work_size,
  557. 0
  558. );
  559. }
  560. event exec_1d(command_queue &queue,
  561. size_t global_work_offset,
  562. size_t global_work_size,
  563. size_t local_work_size)
  564. {
  565. const context &context = queue.get_context();
  566. ::boost::compute::kernel kernel = compile(context);
  567. return queue.enqueue_1d_range_kernel(
  568. kernel,
  569. global_work_offset,
  570. global_work_size,
  571. local_work_size
  572. );
  573. }
  574. template<class T>
  575. std::string get_buffer_identifier(const buffer &buffer,
  576. const memory_object::address_space address_space =
  577. memory_object::global_memory)
  578. {
  579. // check if we've already seen buffer
  580. for(size_t i = 0; i < m_stored_buffers.size(); i++){
  581. const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
  582. if(bi.m_mem == buffer.get() &&
  583. bi.address_space == address_space){
  584. return bi.identifier;
  585. }
  586. }
  587. // create a new binding
  588. std::string identifier =
  589. "_buf" + lexical_cast<std::string>(m_stored_buffers.size());
  590. size_t index = add_arg<T *>(address_space, identifier);
  591. // store new buffer info
  592. m_stored_buffers.push_back(
  593. detail::meta_kernel_buffer_info(buffer, identifier, address_space, index));
  594. return identifier;
  595. }
  596. template<class T>
  597. std::string get_svm_identifier(const svm_ptr<T> &svm_ptr,
  598. const memory_object::address_space address_space =
  599. memory_object::global_memory)
  600. {
  601. BOOST_ASSERT(
  602. (address_space == memory_object::global_memory)
  603. || (address_space == memory_object::constant_memory)
  604. );
  605. // check if we've already seen this pointer
  606. for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){
  607. const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i];
  608. if(spi.ptr == svm_ptr.get() &&
  609. spi.address_space == address_space){
  610. return spi.identifier;
  611. }
  612. }
  613. // create a new binding
  614. std::string identifier =
  615. "_svm_ptr" + lexical_cast<std::string>(m_stored_svm_ptrs.size());
  616. size_t index = add_arg<T *>(address_space, identifier);
  617. if(m_stored_svm_ptrs.empty()) {
  618. m_options += std::string(" -cl-std=CL2.0");
  619. }
  620. // store new svm pointer info
  621. m_stored_svm_ptrs.push_back(
  622. detail::meta_kernel_svm_info(
  623. svm_ptr, identifier, address_space, index
  624. )
  625. );
  626. return identifier;
  627. }
  628. std::string get_image_identifier(const char *qualifiers, const image2d &image)
  629. {
  630. size_t index = add_arg_with_qualifiers<image2d>(qualifiers, "image");
  631. set_arg(index, image);
  632. return "image";
  633. }
  634. std::string get_sampler_identifier(bool normalized_coords,
  635. cl_addressing_mode addressing_mode,
  636. cl_filter_mode filter_mode)
  637. {
  638. (void) normalized_coords;
  639. (void) addressing_mode;
  640. (void) filter_mode;
  641. m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
  642. " CLK_ADDRESS_NONE |\n"
  643. " CLK_FILTER_NEAREST;\n";
  644. return "sampler";
  645. }
  646. template<class Expr>
  647. static std::string expr_to_string(const Expr &expr)
  648. {
  649. meta_kernel tmp((std::string()));
  650. tmp << expr;
  651. return tmp.m_source.str();
  652. }
  653. template<class Predicate>
  654. detail::invoked_function<bool, boost::tuple<Predicate> > if_(Predicate pred) const
  655. {
  656. return detail::invoked_function<bool, boost::tuple<Predicate> >(
  657. "if", std::string(), boost::make_tuple(pred)
  658. );
  659. }
  660. template<class Predicate>
  661. detail::invoked_function<bool, boost::tuple<Predicate> > else_if_(Predicate pred) const
  662. {
  663. return detail::invoked_function<bool, boost::tuple<Predicate> >(
  664. "else if", std::string(), boost::make_tuple(pred)
  665. );
  666. }
  667. detail::meta_kernel_variable<cl_uint> get_global_id(size_t dim) const
  668. {
  669. return expr<cl_uint>("get_global_id(" + lexical_cast<std::string>(dim) + ")");
  670. }
  671. void add_function(const std::string &name, const std::string &source)
  672. {
  673. if(m_external_function_names.count(name)){
  674. return;
  675. }
  676. m_external_function_names.insert(name);
  677. m_external_function_source << source << "\n";
  678. }
  679. void add_function(const std::string &name,
  680. const std::string &source,
  681. const std::map<std::string, std::string> &definitions)
  682. {
  683. typedef std::map<std::string, std::string>::const_iterator iter;
  684. std::stringstream s;
  685. // add #define's
  686. for(iter i = definitions.begin(); i != definitions.end(); i++){
  687. s << "#define " << i->first;
  688. if(!i->second.empty()){
  689. s << " " << i->second;
  690. }
  691. s << "\n";
  692. }
  693. s << source << "\n";
  694. // add #undef's
  695. for(iter i = definitions.begin(); i != definitions.end(); i++){
  696. s << "#undef " << i->first << "\n";
  697. }
  698. add_function(name, s.str());
  699. }
  700. template<class Type>
  701. void add_type_declaration(const std::string &declaration)
  702. {
  703. const char *name = type_name<Type>();
  704. // check if the type has already been declared
  705. std::string source = m_type_declaration_source.str();
  706. if(source.find(name) != std::string::npos){
  707. return;
  708. }
  709. m_type_declaration_source << declaration;
  710. }
  711. template<class Type>
  712. void inject_type() const
  713. {
  714. inject_type_impl<Type>()(const_cast<meta_kernel &>(*this));
  715. }
  716. // the insert_function_call() method inserts a call to a function with
  717. // the given name tuple of argument values.
  718. template<class ArgTuple>
  719. void insert_function_call(const std::string &name, const ArgTuple &args)
  720. {
  721. *this << name << '(';
  722. insert_function_call_args(args);
  723. *this << ')';
  724. }
  725. // the insert_function_call_args() method takes a tuple of argument values
  726. // and inserts them into the source string with a comma in-between each.
  727. // this is useful for creating function calls given a tuple of values.
  728. void insert_function_call_args(const boost::tuple<>&)
  729. {
  730. }
  731. #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \
  732. inject_type<BOOST_PP_CAT(T, n)>();
  733. #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \
  734. << boost::get<BOOST_PP_DEC(n)>(args) << ", "
  735. #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \
  736. template<BOOST_PP_ENUM_PARAMS(n, class T)> \
  737. void insert_function_call_args( \
  738. const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &args \
  739. ) \
  740. { \
  741. BOOST_PP_REPEAT_FROM_TO( \
  742. 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \
  743. ) \
  744. *this \
  745. BOOST_PP_REPEAT_FROM_TO( \
  746. 1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \
  747. ) \
  748. << boost::get<BOOST_PP_DEC(n)>(args); \
  749. }
  750. BOOST_PP_REPEAT_FROM_TO(
  751. 1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~
  752. )
  753. #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE
  754. #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG
  755. #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS
  756. static const char* address_space_prefix(const memory_object::address_space value)
  757. {
  758. switch(value){
  759. case memory_object::global_memory: return "__global";
  760. case memory_object::local_memory: return "__local";
  761. case memory_object::private_memory: return "__private";
  762. case memory_object::constant_memory: return "__constant";
  763. };
  764. return 0; // unreachable
  765. }
  766. private:
  767. template<class T>
  768. size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name)
  769. {
  770. size_t index = add_arg<T>(name);
  771. // update argument type declaration with qualifiers
  772. std::stringstream s;
  773. s << qualifiers << " " << m_args[index];
  774. m_args[index] = s.str();
  775. return index;
  776. }
  777. private:
  778. std::string m_name;
  779. std::stringstream m_source;
  780. std::stringstream m_external_function_source;
  781. std::stringstream m_type_declaration_source;
  782. std::set<std::string> m_external_function_names;
  783. std::vector<std::string> m_args;
  784. std::string m_pragmas;
  785. std::string m_options;
  786. std::vector<detail::meta_kernel_stored_arg> m_stored_args;
  787. std::vector<detail::meta_kernel_buffer_info> m_stored_buffers;
  788. std::vector<detail::meta_kernel_svm_info> m_stored_svm_ptrs;
  789. };
  790. template<class ResultType, class ArgTuple>
  791. inline meta_kernel&
  792. operator<<(meta_kernel &kernel, const invoked_function<ResultType, ArgTuple> &expr)
  793. {
  794. if(!expr.source().empty()){
  795. kernel.add_function(expr.name(), expr.source(), expr.definitions());
  796. }
  797. kernel.insert_function_call(expr.name(), expr.args());
  798. return kernel;
  799. }
  800. template<class ResultType, class ArgTuple, class CaptureTuple>
  801. inline meta_kernel&
  802. operator<<(meta_kernel &kernel,
  803. const invoked_closure<ResultType, ArgTuple, CaptureTuple> &expr)
  804. {
  805. if(!expr.source().empty()){
  806. kernel.add_function(expr.name(), expr.source(), expr.definitions());
  807. }
  808. kernel << expr.name() << '(';
  809. kernel.insert_function_call_args(expr.args());
  810. kernel << ", ";
  811. kernel.insert_function_call_args(expr.capture());
  812. kernel << ')';
  813. return kernel;
  814. }
  815. template<class Arg1, class Arg2, class Result>
  816. inline meta_kernel& operator<<(meta_kernel &kernel,
  817. const invoked_binary_operator<Arg1,
  818. Arg2,
  819. Result> &expr)
  820. {
  821. return kernel << "((" << expr.arg1() << ")"
  822. << expr.op()
  823. << "(" << expr.arg2() << "))";
  824. }
  825. template<class T, class IndexExpr>
  826. inline meta_kernel& operator<<(meta_kernel &kernel,
  827. const detail::device_ptr_index_expr<T, IndexExpr> &expr)
  828. {
  829. if(expr.m_index == 0){
  830. return kernel <<
  831. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  832. '[' << expr.m_expr << ']';
  833. }
  834. else {
  835. return kernel <<
  836. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  837. '[' << expr.m_index << "+(" << expr.m_expr << ")]";
  838. }
  839. }
  840. template<class T1, class T2, class IndexExpr>
  841. inline meta_kernel& operator<<(meta_kernel &kernel,
  842. const detail::device_ptr_index_expr<std::pair<T1, T2>, IndexExpr> &expr)
  843. {
  844. typedef std::pair<T1, T2> T;
  845. if(expr.m_index == 0){
  846. return kernel <<
  847. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  848. '[' << expr.m_expr << ']';
  849. }
  850. else {
  851. return kernel <<
  852. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  853. '[' << expr.m_index << "+(" << expr.m_expr << ")]";
  854. }
  855. }
  856. // SVM requires OpenCL 2.0
  857. #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  858. template<class T, class IndexExpr>
  859. inline meta_kernel& operator<<(meta_kernel &kernel,
  860. const svm_ptr_index_expr<T, IndexExpr> &expr)
  861. {
  862. return kernel <<
  863. kernel.get_svm_identifier<T>(expr.m_svm_ptr) <<
  864. '[' << expr.m_expr << ']';
  865. }
  866. #endif
  867. template<class Predicate, class Arg>
  868. inline meta_kernel& operator<<(meta_kernel &kernel,
  869. const invoked_unary_negate_function<Predicate,
  870. Arg> &expr)
  871. {
  872. return kernel << "!(" << expr.pred()(expr.expr()) << ')';
  873. }
  874. template<class Predicate, class Arg1, class Arg2>
  875. inline meta_kernel& operator<<(meta_kernel &kernel,
  876. const invoked_binary_negate_function<Predicate,
  877. Arg1,
  878. Arg2> &expr)
  879. {
  880. return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')';
  881. }
  882. // get<N>() for vector types
  883. template<size_t N, class Arg, class T>
  884. inline meta_kernel& operator<<(meta_kernel &kernel,
  885. const invoked_get<N, Arg, T> &expr)
  886. {
  887. BOOST_STATIC_ASSERT(N < 16);
  888. if(N < 10){
  889. return kernel << expr.m_arg << ".s" << int_(N);
  890. }
  891. else if(N < 16){
  892. #ifdef _MSC_VER
  893. # pragma warning(push)
  894. # pragma warning(disable: 4307)
  895. #endif
  896. return kernel << expr.m_arg << ".s" << char('a' + (N - 10));
  897. #ifdef _MSC_VER
  898. # pragma warning(pop)
  899. #endif
  900. }
  901. return kernel;
  902. }
  903. template<class T, class Arg>
  904. inline meta_kernel& operator<<(meta_kernel &kernel,
  905. const invoked_field<T, Arg> &expr)
  906. {
  907. return kernel << expr.m_arg << "." << expr.m_field;
  908. }
  909. template<class T, class Arg>
  910. inline meta_kernel& operator<<(meta_kernel &k,
  911. const invoked_as<T, Arg> &expr)
  912. {
  913. return k << "as_" << type_name<T>() << "(" << expr.m_arg << ")";
  914. }
  915. template<class T, class Arg>
  916. inline meta_kernel& operator<<(meta_kernel &k,
  917. const invoked_convert<T, Arg> &expr)
  918. {
  919. return k << "convert_" << type_name<T>() << "(" << expr.m_arg << ")";
  920. }
  921. template<class T, class Arg>
  922. inline meta_kernel& operator<<(meta_kernel &k,
  923. const invoked_identity<T, Arg> &expr)
  924. {
  925. return k << expr.m_arg;
  926. }
  927. template<>
  928. struct inject_type_impl<double_>
  929. {
  930. void operator()(meta_kernel &kernel)
  931. {
  932. kernel.add_extension_pragma("cl_khr_fp64", "enable");
  933. }
  934. };
  935. template<class Scalar, size_t N>
  936. struct inject_type_impl<vector_type<Scalar, N> >
  937. {
  938. void operator()(meta_kernel &kernel)
  939. {
  940. kernel.inject_type<Scalar>();
  941. }
  942. };
  943. } // end detail namespace
  944. } // end compute namespace
  945. } // end boost namespace
  946. #endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP