encoding.hpp 15 KB


  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. // Copyright (c) 2025 Alexander Grund
  4. //
  5. // Distributed under the Boost Software License, Version 1.0.
  6. // https://www.boost.org/LICENSE_1_0.txt
  7. #ifndef BOOST_LOCALE_ENCODING_HPP_INCLUDED
  8. #define BOOST_LOCALE_ENCODING_HPP_INCLUDED
  9. #include <boost/locale/config.hpp>
  10. #include <boost/locale/detail/encoding.hpp>
  11. #include <boost/locale/encoding_errors.hpp>
  12. #include <boost/locale/encoding_utf.hpp>
  13. #include <boost/locale/info.hpp>
  14. #include <boost/locale/util/string.hpp>
  15. #include <memory>
  16. #ifdef BOOST_MSVC
  17. # pragma warning(push)
  18. # pragma warning(disable : 4275 4251 4231 4660)
  19. #endif
  20. namespace boost { namespace locale {
  21. /// \brief Namespace that contains all functions related to character set conversion
  22. namespace conv {
  23. /// \defgroup Charset conversion functions
  24. ///
  25. /// @{
  26. /// convert text in range [begin,end) encoded with \a charset to UTF according to policy \a how
  27. ///
  28. /// \throws invalid_charset_error: Character set is not supported
  29. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  30. /// encoded or decoded)
  31. template<typename CharType>
  32. BOOST_LOCALE_DECL std::basic_string<CharType>
  33. to_utf(const char* begin, const char* end, const std::string& charset, method_type how = default_method);
  34. /// convert UTF text in range [begin,end) to text encoded with \a charset according to policy \a how
  35. ///
  36. /// \throws invalid_charset_error: Character set is not supported
  37. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  38. /// encoded or decoded)
  39. template<typename CharType>
  40. BOOST_LOCALE_DECL std::string from_utf(const CharType* begin,
  41. const CharType* end,
  42. const std::string& charset,
  43. method_type how = default_method);
  44. /// convert \a text encoded with \a charset to UTF according to policy \a how
  45. ///
  46. /// \throws invalid_charset_error: Character set is not supported
  47. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  48. /// encoded or decoded)
  49. template<typename CharType>
  50. std::basic_string<CharType>
  51. to_utf(const std::string& text, const std::string& charset, method_type how = default_method)
  52. {
  53. return to_utf<CharType>(text.c_str(), text.c_str() + text.size(), charset, how);
  54. }
  55. /// Convert \a text encoded with \a charset to UTF according to policy \a how
  56. ///
  57. /// \throws invalid_charset_error: Character set is not supported
  58. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  59. /// encoded or decoded)
  60. template<typename CharType>
  61. std::basic_string<CharType>
  62. to_utf(const char* text, const std::string& charset, method_type how = default_method)
  63. {
  64. return to_utf<CharType>(text, util::str_end(text), charset, how);
  65. }
  66. /// convert text in range [begin,end) in locale encoding given by \a loc to UTF according to
  67. /// policy \a how
  68. ///
  69. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  70. /// \throws invalid_charset_error: Character set is not supported
  71. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  72. /// encoded or decoded)
  73. template<typename CharType>
  74. std::basic_string<CharType>
  75. to_utf(const char* begin, const char* end, const std::locale& loc, method_type how = default_method)
  76. {
  77. return to_utf<CharType>(begin, end, std::use_facet<info>(loc).encoding(), how);
  78. }
  79. /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
  80. ///
  81. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  82. /// \throws invalid_charset_error: Character set is not supported
  83. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  84. /// encoded or decoded)
  85. template<typename CharType>
  86. std::basic_string<CharType>
  87. to_utf(const std::string& text, const std::locale& loc, method_type how = default_method)
  88. {
  89. return to_utf<CharType>(text, std::use_facet<info>(loc).encoding(), how);
  90. }
  91. /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
  92. ///
  93. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  94. /// \throws invalid_charset_error: Character set is not supported
  95. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  96. /// encoded or decoded)
  97. template<typename CharType>
  98. std::basic_string<CharType> to_utf(const char* text, const std::locale& loc, method_type how = default_method)
  99. {
  100. return to_utf<CharType>(text, std::use_facet<info>(loc).encoding(), how);
  101. }
  102. /// convert \a text from UTF to text encoded with \a charset according to policy \a how
  103. ///
  104. /// \throws invalid_charset_error: Character set is not supported
  105. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  106. /// encoded or decoded)
  107. template<typename CharType>
  108. std::string
  109. from_utf(const std::basic_string<CharType>& text, const std::string& charset, method_type how = default_method)
  110. {
  111. return from_utf(text.c_str(), text.c_str() + text.size(), charset, how);
  112. }
  113. /// Convert \a text from UTF to \a charset according to policy \a how
  114. ///
  115. /// \throws invalid_charset_error: Character set is not supported
  116. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  117. /// encoded or decoded)
  118. template<typename CharType>
  119. std::string from_utf(const CharType* text, const std::string& charset, method_type how = default_method)
  120. {
  121. return from_utf(text, util::str_end(text), charset, how);
  122. }
  123. /// Convert UTF text in range [begin,end) to text in locale encoding given by \a loc according to policy \a how
  124. ///
  125. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  126. /// \throws invalid_charset_error: Character set is not supported
  127. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  128. /// encoded or decoded)
  129. template<typename CharType>
  130. std::string
  131. from_utf(const CharType* begin, const CharType* end, const std::locale& loc, method_type how = default_method)
  132. {
  133. return from_utf(begin, end, std::use_facet<info>(loc).encoding(), how);
  134. }
  135. /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
  136. ///
  137. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  138. /// \throws invalid_charset_error: Character set is not supported
  139. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  140. /// encoded or decoded)
  141. template<typename CharType>
  142. std::string
  143. from_utf(const std::basic_string<CharType>& text, const std::locale& loc, method_type how = default_method)
  144. {
  145. return from_utf(text, std::use_facet<info>(loc).encoding(), how);
  146. }
  147. /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
  148. ///
  149. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  150. /// \throws invalid_charset_error: Character set is not supported
  151. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  152. /// encoded or decoded)
  153. template<typename CharType>
  154. std::string from_utf(const CharType* text, const std::locale& loc, method_type how = default_method)
  155. {
  156. return from_utf(text, std::use_facet<info>(loc).encoding(), how);
  157. }
  158. /// Convert a text in range [begin,end) to \a to_encoding from \a from_encoding according to
  159. /// policy \a how
  160. ///
  161. /// \throws invalid_charset_error: Either character set is not supported
  162. /// \throws conversion_error: when the conversion fails (e.g. \a how is \c stop and any character cannot be
  163. /// encoded or decoded)
  164. BOOST_LOCALE_DECL
  165. std::string between(const char* begin,
  166. const char* end,
  167. const std::string& to_encoding,
  168. const std::string& from_encoding,
  169. method_type how = default_method);
  170. /// Convert \a text to \a to_encoding from \a from_encoding according to
  171. /// policy \a how
  172. ///
  173. /// \throws invalid_charset_error: Either character set is not supported
  174. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  175. /// encoded or decoded)
  176. inline std::string between(const char* text,
  177. const std::string& to_encoding,
  178. const std::string& from_encoding,
  179. method_type how = default_method)
  180. {
  181. return between(text, util::str_end(text), to_encoding, from_encoding, how);
  182. }
  183. /// Convert \a text to \a to_encoding from \a from_encoding according to
  184. /// policy \a how
  185. ///
  186. /// \throws invalid_charset_error: Either character set is not supported
  187. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  188. /// encoded or decoded)
  189. inline std::string between(const std::string& text,
  190. const std::string& to_encoding,
  191. const std::string& from_encoding,
  192. method_type how = default_method)
  193. {
  194. return between(text.c_str(), text.c_str() + text.size(), to_encoding, from_encoding, how);
  195. }
  196. /// @}
  197. /// Converter class to decode a narrow string using a local encoding and encode it with UTF
  198. template<typename CharType>
  199. class utf_encoder {
  200. std::unique_ptr<detail::utf_encoder<CharType>> impl_;
  201. public:
  202. using char_type = CharType;
  203. using string_type = std::basic_string<CharType>;
  204. /// Create an instance to convert text encoded with \a charset to UTF according to policy \a how
  205. ///
  206. /// Note: When converting only a single text \ref to_utf is likely faster.
  207. /// \throws invalid_charset_error: Character set is not supported
  208. utf_encoder(const std::string& charset, method_type how = default_method) :
  209. impl_(detail::make_utf_encoder<CharType>(charset, how))
  210. {}
  211. /// Convert text in range [begin,end) to UTF
  212. ///
  213. /// \throws conversion_error: Conversion failed
  214. string_type convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
  215. /// Convert \a text to UTF
  216. ///
  217. /// \throws conversion_error: Conversion failed
  218. string_type convert(const core::string_view text) const { return impl_->convert(text); }
  219. /// Convert \a text to UTF
  220. ///
  221. /// \throws conversion_error: Conversion failed
  222. string_type operator()(const core::string_view text) const { return convert(text); }
  223. };
  224. /// Converter class to decode an UTF string and encode it using a local encoding
  225. template<typename CharType>
  226. class utf_decoder {
  227. std::unique_ptr<detail::utf_decoder<CharType>> impl_;
  228. public:
  229. using char_type = CharType;
  230. using stringview_type = core::basic_string_view<CharType>;
  231. /// Create an instance to convert UTF text to text encoded with \a charset according to policy \a how
  232. ///
  233. /// Note: When converting only a single text \ref from_utf is likely faster.
  234. /// \throws invalid_charset_error: Character set is not supported
  235. utf_decoder(const std::string& charset, method_type how = default_method) :
  236. impl_(detail::make_utf_decoder<CharType>(charset, how))
  237. {}
  238. /// Convert UTF text in range [begin,end) to local encoding
  239. ///
  240. /// \throws conversion_error: Conversion failed
  241. std::string convert(const CharType* begin, const CharType* end) const { return impl_->convert(begin, end); }
  242. /// Convert \a text from UTF to local encoding
  243. ///
  244. /// \throws conversion_error: Conversion failed
  245. std::string convert(const stringview_type& text) const { return impl_->convert(text); }
  246. /// Convert \a text from UTF to local encoding
  247. ///
  248. /// \throws conversion_error: Conversion failed
  249. std::string operator()(const stringview_type& text) const { return convert(text); }
  250. };
  251. class narrow_converter {
  252. std::unique_ptr<detail::narrow_converter> impl_;
  253. public:
  254. /// Create converter to convert text from \a src_encoding to \a target_encoding according to policy \a how
  255. ///
  256. /// \throws invalid_charset_error: Either character set is not supported
  257. narrow_converter(const std::string& src_encoding,
  258. const std::string& target_encoding,
  259. method_type how = default_method) :
  260. impl_(detail::make_narrow_converter(src_encoding, target_encoding, how))
  261. {}
  262. /// Convert text in range [begin,end)
  263. ///
  264. /// \throws conversion_error: Conversion failed
  265. std::string convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
  266. /// Convert \a text
  267. ///
  268. /// \throws conversion_error: Conversion failed
  269. std::string convert(const core::string_view text) const { return impl_->convert(text); }
  270. /// Convert \a text
  271. ///
  272. /// \throws conversion_error: Conversion failed
  273. std::string operator()(const core::string_view text) const { return convert(text); }
  274. };
  275. } // namespace conv
  276. }} // namespace boost::locale
  277. #ifdef BOOST_MSVC
  278. # pragma warning(pop)
  279. #endif
  280. #endif