codecvt.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. // Locale support (codecvt) -*- C++ -*-
  2. // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
  3. // 2009 Free Software Foundation, Inc.
  4. //
  5. // This file is part of the GNU ISO C++ Library. This library is free
  6. // software; you can redistribute it and/or modify it under the
  7. // terms of the GNU General Public License as published by the
  8. // Free Software Foundation; either version 3, or (at your option)
  9. // any later version.
  10. // This library is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU General Public License for more details.
  14. // Under Section 7 of GPL version 3, you are granted additional
  15. // permissions described in the GCC Runtime Library Exception, version
  16. // 3.1, as published by the Free Software Foundation.
  17. // You should have received a copy of the GNU General Public License and
  18. // a copy of the GCC Runtime Library Exception along with this program;
  19. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  20. // <http://www.gnu.org/licenses/>.
  21. /** @file bits/codecvt.h
  22. * This is an internal header file, included by other library headers.
  23. * You should not attempt to use it directly.
  24. */
  25. //
  26. // ISO C++ 14882: 22.2.1.5 Template class codecvt
  27. //
  28. // Written by Benjamin Kosnik <bkoz@redhat.com>
  29. #ifndef _CODECVT_H
  30. #define _CODECVT_H 1
  31. #pragma GCC system_header
  32. _GLIBCXX_BEGIN_NAMESPACE(std)
  33. /// Empty base class for codecvt facet [22.2.1.5].
  34. class codecvt_base
  35. {
  36. public:
  37. enum result
  38. {
  39. ok,
  40. partial,
  41. error,
  42. noconv
  43. };
  44. };
  45. /**
  46. * @brief Common base for codecvt functions.
  47. *
  48. * This template class provides implementations of the public functions
  49. * that forward to the protected virtual functions.
  50. *
  51. * This template also provides abstract stubs for the protected virtual
  52. * functions.
  53. */
  54. template<typename _InternT, typename _ExternT, typename _StateT>
  55. class __codecvt_abstract_base
  56. : public locale::facet, public codecvt_base
  57. {
  58. public:
  59. // Types:
  60. typedef codecvt_base::result result;
  61. typedef _InternT intern_type;
  62. typedef _ExternT extern_type;
  63. typedef _StateT state_type;
  64. // 22.2.1.5.1 codecvt members
  65. /**
  66. * @brief Convert from internal to external character set.
  67. *
  68. * Converts input string of intern_type to output string of
  69. * extern_type. This is analogous to wcsrtombs. It does this by
  70. * calling codecvt::do_out.
  71. *
  72. * The source and destination character sets are determined by the
  73. * facet's locale, internal and external types.
  74. *
  75. * The characters in [from,from_end) are converted and written to
  76. * [to,to_end). from_next and to_next are set to point to the
  77. * character following the last successfully converted character,
  78. * respectively. If the result needed no conversion, from_next and
  79. * to_next are not affected.
  80. *
  81. * The @a state argument should be initialized if the input is at the
  82. * beginning and carried from a previous call if continuing
  83. * conversion. There are no guarantees about how @a state is used.
  84. *
  85. * The result returned is a member of codecvt_base::result. If
  86. * all the input is converted, returns codecvt_base::ok. If no
  87. * conversion is necessary, returns codecvt_base::noconv. If
  88. * the input ends early or there is insufficient space in the
  89. * output, returns codecvt_base::partial. Otherwise the
  90. * conversion failed and codecvt_base::error is returned.
  91. *
  92. * @param state Persistent conversion state data.
  93. * @param from Start of input.
  94. * @param from_end End of input.
  95. * @param from_next Returns start of unconverted data.
  96. * @param to Start of output buffer.
  97. * @param to_end End of output buffer.
  98. * @param to_next Returns start of unused output area.
  99. * @return codecvt_base::result.
  100. */
  101. result
  102. out(state_type& __state, const intern_type* __from,
  103. const intern_type* __from_end, const intern_type*& __from_next,
  104. extern_type* __to, extern_type* __to_end,
  105. extern_type*& __to_next) const
  106. {
  107. return this->do_out(__state, __from, __from_end, __from_next,
  108. __to, __to_end, __to_next);
  109. }
  110. /**
  111. * @brief Reset conversion state.
  112. *
  113. * Writes characters to output that would restore @a state to initial
  114. * conditions. The idea is that if a partial conversion occurs, then
  115. * the converting the characters written by this function would leave
  116. * the state in initial conditions, rather than partial conversion
  117. * state. It does this by calling codecvt::do_unshift().
  118. *
  119. * For example, if 4 external characters always converted to 1 internal
  120. * character, and input to in() had 6 external characters with state
  121. * saved, this function would write two characters to the output and
  122. * set the state to initialized conditions.
  123. *
  124. * The source and destination character sets are determined by the
  125. * facet's locale, internal and external types.
  126. *
  127. * The result returned is a member of codecvt_base::result. If the
  128. * state could be reset and data written, returns codecvt_base::ok. If
  129. * no conversion is necessary, returns codecvt_base::noconv. If the
  130. * output has insufficient space, returns codecvt_base::partial.
  131. * Otherwise the reset failed and codecvt_base::error is returned.
  132. *
  133. * @param state Persistent conversion state data.
  134. * @param to Start of output buffer.
  135. * @param to_end End of output buffer.
  136. * @param to_next Returns start of unused output area.
  137. * @return codecvt_base::result.
  138. */
  139. result
  140. unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
  141. extern_type*& __to_next) const
  142. { return this->do_unshift(__state, __to,__to_end,__to_next); }
  143. /**
  144. * @brief Convert from external to internal character set.
  145. *
  146. * Converts input string of extern_type to output string of
  147. * intern_type. This is analogous to mbsrtowcs. It does this by
  148. * calling codecvt::do_in.
  149. *
  150. * The source and destination character sets are determined by the
  151. * facet's locale, internal and external types.
  152. *
  153. * The characters in [from,from_end) are converted and written to
  154. * [to,to_end). from_next and to_next are set to point to the
  155. * character following the last successfully converted character,
  156. * respectively. If the result needed no conversion, from_next and
  157. * to_next are not affected.
  158. *
  159. * The @a state argument should be initialized if the input is at the
  160. * beginning and carried from a previous call if continuing
  161. * conversion. There are no guarantees about how @a state is used.
  162. *
  163. * The result returned is a member of codecvt_base::result. If
  164. * all the input is converted, returns codecvt_base::ok. If no
  165. * conversion is necessary, returns codecvt_base::noconv. If
  166. * the input ends early or there is insufficient space in the
  167. * output, returns codecvt_base::partial. Otherwise the
  168. * conversion failed and codecvt_base::error is returned.
  169. *
  170. * @param state Persistent conversion state data.
  171. * @param from Start of input.
  172. * @param from_end End of input.
  173. * @param from_next Returns start of unconverted data.
  174. * @param to Start of output buffer.
  175. * @param to_end End of output buffer.
  176. * @param to_next Returns start of unused output area.
  177. * @return codecvt_base::result.
  178. */
  179. result
  180. in(state_type& __state, const extern_type* __from,
  181. const extern_type* __from_end, const extern_type*& __from_next,
  182. intern_type* __to, intern_type* __to_end,
  183. intern_type*& __to_next) const
  184. {
  185. return this->do_in(__state, __from, __from_end, __from_next,
  186. __to, __to_end, __to_next);
  187. }
  188. int
  189. encoding() const throw()
  190. { return this->do_encoding(); }
  191. bool
  192. always_noconv() const throw()
  193. { return this->do_always_noconv(); }
  194. int
  195. length(state_type& __state, const extern_type* __from,
  196. const extern_type* __end, size_t __max) const
  197. { return this->do_length(__state, __from, __end, __max); }
  198. int
  199. max_length() const throw()
  200. { return this->do_max_length(); }
  201. protected:
  202. explicit
  203. __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
  204. virtual
  205. ~__codecvt_abstract_base() { }
  206. /**
  207. * @brief Convert from internal to external character set.
  208. *
  209. * Converts input string of intern_type to output string of
  210. * extern_type. This function is a hook for derived classes to change
  211. * the value returned. @see out for more information.
  212. */
  213. virtual result
  214. do_out(state_type& __state, const intern_type* __from,
  215. const intern_type* __from_end, const intern_type*& __from_next,
  216. extern_type* __to, extern_type* __to_end,
  217. extern_type*& __to_next) const = 0;
  218. virtual result
  219. do_unshift(state_type& __state, extern_type* __to,
  220. extern_type* __to_end, extern_type*& __to_next) const = 0;
  221. virtual result
  222. do_in(state_type& __state, const extern_type* __from,
  223. const extern_type* __from_end, const extern_type*& __from_next,
  224. intern_type* __to, intern_type* __to_end,
  225. intern_type*& __to_next) const = 0;
  226. virtual int
  227. do_encoding() const throw() = 0;
  228. virtual bool
  229. do_always_noconv() const throw() = 0;
  230. virtual int
  231. do_length(state_type&, const extern_type* __from,
  232. const extern_type* __end, size_t __max) const = 0;
  233. virtual int
  234. do_max_length() const throw() = 0;
  235. };
  236. /// @brief class codecvt [22.2.1.5].
  237. /// NB: Generic, mostly useless implementation.
  238. template<typename _InternT, typename _ExternT, typename _StateT>
  239. class codecvt
  240. : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
  241. {
  242. public:
  243. // Types:
  244. typedef codecvt_base::result result;
  245. typedef _InternT intern_type;
  246. typedef _ExternT extern_type;
  247. typedef _StateT state_type;
  248. protected:
  249. __c_locale _M_c_locale_codecvt;
  250. public:
  251. static locale::id id;
  252. explicit
  253. codecvt(size_t __refs = 0)
  254. : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { }
  255. explicit
  256. codecvt(__c_locale __cloc, size_t __refs = 0);
  257. protected:
  258. virtual
  259. ~codecvt() { }
  260. virtual result
  261. do_out(state_type& __state, const intern_type* __from,
  262. const intern_type* __from_end, const intern_type*& __from_next,
  263. extern_type* __to, extern_type* __to_end,
  264. extern_type*& __to_next) const;
  265. virtual result
  266. do_unshift(state_type& __state, extern_type* __to,
  267. extern_type* __to_end, extern_type*& __to_next) const;
  268. virtual result
  269. do_in(state_type& __state, const extern_type* __from,
  270. const extern_type* __from_end, const extern_type*& __from_next,
  271. intern_type* __to, intern_type* __to_end,
  272. intern_type*& __to_next) const;
  273. virtual int
  274. do_encoding() const throw();
  275. virtual bool
  276. do_always_noconv() const throw();
  277. virtual int
  278. do_length(state_type&, const extern_type* __from,
  279. const extern_type* __end, size_t __max) const;
  280. virtual int
  281. do_max_length() const throw();
  282. };
  283. template<typename _InternT, typename _ExternT, typename _StateT>
  284. locale::id codecvt<_InternT, _ExternT, _StateT>::id;
  285. /// class codecvt<char, char, mbstate_t> specialization.
  286. template<>
  287. class codecvt<char, char, mbstate_t>
  288. : public __codecvt_abstract_base<char, char, mbstate_t>
  289. {
  290. public:
  291. // Types:
  292. typedef char intern_type;
  293. typedef char extern_type;
  294. typedef mbstate_t state_type;
  295. protected:
  296. __c_locale _M_c_locale_codecvt;
  297. public:
  298. static locale::id id;
  299. explicit
  300. codecvt(size_t __refs = 0);
  301. explicit
  302. codecvt(__c_locale __cloc, size_t __refs = 0);
  303. protected:
  304. virtual
  305. ~codecvt();
  306. virtual result
  307. do_out(state_type& __state, const intern_type* __from,
  308. const intern_type* __from_end, const intern_type*& __from_next,
  309. extern_type* __to, extern_type* __to_end,
  310. extern_type*& __to_next) const;
  311. virtual result
  312. do_unshift(state_type& __state, extern_type* __to,
  313. extern_type* __to_end, extern_type*& __to_next) const;
  314. virtual result
  315. do_in(state_type& __state, const extern_type* __from,
  316. const extern_type* __from_end, const extern_type*& __from_next,
  317. intern_type* __to, intern_type* __to_end,
  318. intern_type*& __to_next) const;
  319. virtual int
  320. do_encoding() const throw();
  321. virtual bool
  322. do_always_noconv() const throw();
  323. virtual int
  324. do_length(state_type&, const extern_type* __from,
  325. const extern_type* __end, size_t __max) const;
  326. virtual int
  327. do_max_length() const throw();
  328. };
  329. #ifdef _GLIBCXX_USE_WCHAR_T
  330. /// class codecvt<wchar_t, char, mbstate_t> specialization.
  331. template<>
  332. class codecvt<wchar_t, char, mbstate_t>
  333. : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
  334. {
  335. public:
  336. // Types:
  337. typedef wchar_t intern_type;
  338. typedef char extern_type;
  339. typedef mbstate_t state_type;
  340. protected:
  341. __c_locale _M_c_locale_codecvt;
  342. public:
  343. static locale::id id;
  344. explicit
  345. codecvt(size_t __refs = 0);
  346. explicit
  347. codecvt(__c_locale __cloc, size_t __refs = 0);
  348. protected:
  349. virtual
  350. ~codecvt();
  351. virtual result
  352. do_out(state_type& __state, const intern_type* __from,
  353. const intern_type* __from_end, const intern_type*& __from_next,
  354. extern_type* __to, extern_type* __to_end,
  355. extern_type*& __to_next) const;
  356. virtual result
  357. do_unshift(state_type& __state,
  358. extern_type* __to, extern_type* __to_end,
  359. extern_type*& __to_next) const;
  360. virtual result
  361. do_in(state_type& __state,
  362. const extern_type* __from, const extern_type* __from_end,
  363. const extern_type*& __from_next,
  364. intern_type* __to, intern_type* __to_end,
  365. intern_type*& __to_next) const;
  366. virtual
  367. int do_encoding() const throw();
  368. virtual
  369. bool do_always_noconv() const throw();
  370. virtual
  371. int do_length(state_type&, const extern_type* __from,
  372. const extern_type* __end, size_t __max) const;
  373. virtual int
  374. do_max_length() const throw();
  375. };
  376. #endif //_GLIBCXX_USE_WCHAR_T
  377. /// class codecvt_byname [22.2.1.6].
  378. template<typename _InternT, typename _ExternT, typename _StateT>
  379. class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
  380. {
  381. public:
  382. explicit
  383. codecvt_byname(const char* __s, size_t __refs = 0)
  384. : codecvt<_InternT, _ExternT, _StateT>(__refs)
  385. {
  386. if (__builtin_strcmp(__s, "C") != 0
  387. && __builtin_strcmp(__s, "POSIX") != 0)
  388. {
  389. this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
  390. this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
  391. }
  392. }
  393. protected:
  394. virtual
  395. ~codecvt_byname() { }
  396. };
  397. // Inhibit implicit instantiations for required instantiations,
  398. // which are defined via explicit instantiations elsewhere.
  399. // NB: This syntax is a GNU extension.
  400. #if _GLIBCXX_EXTERN_TEMPLATE
  401. extern template class codecvt_byname<char, char, mbstate_t>;
  402. extern template
  403. const codecvt<char, char, mbstate_t>&
  404. use_facet<codecvt<char, char, mbstate_t> >(const locale&);
  405. extern template
  406. bool
  407. has_facet<codecvt<char, char, mbstate_t> >(const locale&);
  408. #ifdef _GLIBCXX_USE_WCHAR_T
  409. extern template class codecvt_byname<wchar_t, char, mbstate_t>;
  410. extern template
  411. const codecvt<wchar_t, char, mbstate_t>&
  412. use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
  413. extern template
  414. bool
  415. has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
  416. #endif
  417. #endif
  418. _GLIBCXX_END_NAMESPACE
  419. #endif // _CODECVT_H