transcode_algorithm.hpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. // Copyright (C) 2018 Robert N. Steagall
  2. // Copyright (C) 2019 T. Zachary Laine
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ALGORITHM_HPP
  8. #define BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ALGORITHM_HPP
  9. #include <boost/parser/detail/text/in_out_result.hpp>
  10. #include <boost/parser/detail/text/transcode_iterator.hpp>
  11. #include <boost/parser/detail/text/unpack.hpp>
  12. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  13. #include <algorithm>
  14. #endif
  15. #include <boost/parser/config.hpp>
  16. namespace boost::parser::detail { namespace text {
  17. /** An alias for `in_out_result` returned by algorithms that perform a
  18. transcoding copy. */
  19. template<typename Iter, typename OutIter>
  20. using transcode_result = in_out_result<Iter, OutIter>;
  21. namespace detail {
  22. template<typename OutIter>
  23. constexpr OutIter read_into_utf8_iter(uint32_t cp, OutIter out)
  24. {
  25. if (cp < 0x80) {
  26. *out = static_cast<char>(cp);
  27. ++out;
  28. } else if (cp < 0x800) {
  29. *out = static_cast<char>(0xC0 + (cp >> 6));
  30. ++out;
  31. *out = static_cast<char>(0x80 + (cp & 0x3f));
  32. ++out;
  33. } else if (cp < 0x10000) {
  34. *out = static_cast<char>(0xe0 + (cp >> 12));
  35. ++out;
  36. *out = static_cast<char>(0x80 + ((cp >> 6) & 0x3f));
  37. ++out;
  38. *out = static_cast<char>(0x80 + (cp & 0x3f));
  39. ++out;
  40. } else {
  41. *out = static_cast<char>(0xf0 + (cp >> 18));
  42. ++out;
  43. *out = static_cast<char>(0x80 + ((cp >> 12) & 0x3f));
  44. ++out;
  45. *out = static_cast<char>(0x80 + ((cp >> 6) & 0x3f));
  46. ++out;
  47. *out = static_cast<char>(0x80 + (cp & 0x3f));
  48. ++out;
  49. }
  50. return out;
  51. }
  52. template<typename OutIter>
  53. constexpr OutIter read_into_utf16_iter(uint32_t cp, OutIter out)
  54. {
  55. uint16_t const high_surrogate_base = 0xd7c0;
  56. uint16_t const low_surrogate_base = 0xdc00;
  57. if (cp < 0x10000) {
  58. *out = static_cast<uint16_t>(cp);
  59. ++out;
  60. } else {
  61. *out = static_cast<uint16_t>(cp >> 10) + high_surrogate_base;
  62. ++out;
  63. *out = static_cast<uint16_t>(cp & 0x3ff) + low_surrogate_base;
  64. ++out;
  65. }
  66. return out;
  67. }
  68. template<
  69. bool UseN,
  70. typename InputIter,
  71. typename Sentinel,
  72. typename OutIter>
  73. transcode_result<InputIter, OutIter> transcode_utf_8_to_16(
  74. InputIter first,
  75. Sentinel last,
  76. std::ptrdiff_t n,
  77. OutIter out,
  78. std::input_iterator_tag)
  79. {
  80. for (; first != last && (!UseN || n); --n) {
  81. unsigned char const c = *first;
  82. if (c < 0x80) {
  83. *out = *first;
  84. ++first;
  85. ++out;
  86. } else {
  87. auto const cp = detail::advance(first, last);
  88. out = detail::read_into_utf16_iter(cp, out);
  89. }
  90. }
  91. return {first, out};
  92. }
  93. template<bool UseN, typename Iter, typename OutIter>
  94. transcode_result<Iter, OutIter> transcode_utf_8_to_16(
  95. Iter first,
  96. Iter last,
  97. std::ptrdiff_t n,
  98. OutIter out,
  99. std::random_access_iterator_tag)
  100. {
  101. return transcode_utf_8_to_16<UseN>(
  102. first, last, n, out, std::input_iterator_tag{});
  103. }
  104. template<
  105. bool UseN,
  106. typename InputIter,
  107. typename Sentinel,
  108. typename OutIter>
  109. transcode_result<InputIter, OutIter> transcode_utf_8_to_32(
  110. InputIter first,
  111. Sentinel last,
  112. std::ptrdiff_t n,
  113. OutIter out,
  114. std::input_iterator_tag)
  115. {
  116. for (; first != last && (!UseN || n); --n) {
  117. unsigned char const c = *first;
  118. if (c < 0x80) {
  119. *out = *first;
  120. ++first;
  121. ++out;
  122. } else {
  123. *out = detail::advance(first, last);
  124. ++out;
  125. }
  126. }
  127. return {first, out};
  128. }
  129. template<bool UseN, typename Iter, typename OutIter>
  130. transcode_result<Iter, OutIter> transcode_utf_8_to_32(
  131. Iter first,
  132. Iter last,
  133. std::ptrdiff_t n,
  134. OutIter out,
  135. std::random_access_iterator_tag)
  136. {
  137. return transcode_utf_8_to_32<UseN>(
  138. first, last, n, out, std::input_iterator_tag{});
  139. }
  140. template<format Tag>
  141. struct tag_t
  142. {};
  143. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  144. transcode_result<Iter, OutIter> transcode_to_8(
  145. tag_t<format::utf8>,
  146. Iter first,
  147. Sentinel last,
  148. std::ptrdiff_t n,
  149. OutIter out)
  150. {
  151. for (; first != last && (!UseN || n); ++first, ++out) {
  152. *out = *first;
  153. --n;
  154. }
  155. return {first, out};
  156. }
  157. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  158. transcode_result<Iter, OutIter> transcode_to_16(
  159. tag_t<format::utf8>,
  160. Iter first,
  161. Sentinel last,
  162. std::ptrdiff_t n,
  163. OutIter out)
  164. {
  165. return detail::transcode_utf_8_to_16<UseN>(
  166. first,
  167. last,
  168. n,
  169. out,
  170. typename std::iterator_traits<Iter>::iterator_category{});
  171. }
  172. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  173. transcode_result<Iter, OutIter> transcode_to_32(
  174. tag_t<format::utf8>,
  175. Iter first,
  176. Sentinel last,
  177. std::ptrdiff_t n,
  178. OutIter out)
  179. {
  180. return detail::transcode_utf_8_to_32<UseN>(
  181. first,
  182. last,
  183. n,
  184. out,
  185. typename std::iterator_traits<Iter>::iterator_category{});
  186. }
  187. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  188. transcode_result<Iter, OutIter> transcode_to_8(
  189. tag_t<format::utf16>,
  190. Iter first,
  191. Sentinel last,
  192. std::ptrdiff_t n,
  193. OutIter out)
  194. {
  195. uint32_t const high_surrogate_max = 0xdbff;
  196. uint16_t const high_surrogate_base = 0xd7c0;
  197. uint16_t const low_surrogate_base = 0xdc00;
  198. for (; first != last && (!UseN || n); ++first, --n) {
  199. uint32_t const hi = *first;
  200. if (surrogate(hi)) {
  201. if (hi <= high_surrogate_max) {
  202. ++first;
  203. if (first == last) {
  204. uint32_t const cp = replacement_character;
  205. out = detail::read_into_utf8_iter(cp, out);
  206. ++out;
  207. return {first, out};
  208. }
  209. uint32_t const lo = *first;
  210. if (low_surrogate(lo)) {
  211. uint32_t const cp =
  212. ((hi - high_surrogate_base) << 10) +
  213. (lo - low_surrogate_base);
  214. out = detail::read_into_utf8_iter(cp, out);
  215. continue;
  216. }
  217. }
  218. out = detail::read_into_utf8_iter(
  219. replacement_character, out);
  220. } else {
  221. out = detail::read_into_utf8_iter(hi, out);
  222. }
  223. }
  224. return {first, out};
  225. }
  226. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  227. transcode_result<Iter, OutIter> transcode_to_16(
  228. tag_t<format::utf16>,
  229. Iter first,
  230. Sentinel last,
  231. std::ptrdiff_t n,
  232. OutIter out)
  233. {
  234. for (; first != last && (!UseN || n); ++first, ++out, --n) {
  235. *out = *first;
  236. }
  237. return {first, out};
  238. }
  239. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  240. transcode_result<Iter, OutIter> transcode_to_32(
  241. tag_t<format::utf16>,
  242. Iter first,
  243. Sentinel last,
  244. std::ptrdiff_t n,
  245. OutIter out)
  246. {
  247. uint32_t const high_surrogate_max = 0xdbff;
  248. uint16_t const high_surrogate_base = 0xd7c0;
  249. uint16_t const low_surrogate_base = 0xdc00;
  250. for (; first != last && (!UseN || n); ++first, --n) {
  251. uint32_t const hi = *first;
  252. if (surrogate(hi)) {
  253. if (hi <= high_surrogate_max) {
  254. ++first;
  255. if (first == last) {
  256. *out = replacement_character;
  257. ++out;
  258. return {first, out};
  259. }
  260. uint32_t const lo = *first;
  261. if (low_surrogate(lo)) {
  262. uint32_t const cp =
  263. ((hi - high_surrogate_base) << 10) +
  264. (lo - low_surrogate_base);
  265. *out = cp;
  266. ++out;
  267. continue;
  268. }
  269. }
  270. *out = replacement_character;
  271. ++out;
  272. } else {
  273. *out = hi;
  274. ++out;
  275. }
  276. }
  277. return {first, out};
  278. }
  279. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  280. transcode_result<Iter, OutIter> transcode_to_8(
  281. tag_t<format::utf32>,
  282. Iter first,
  283. Sentinel last,
  284. std::ptrdiff_t n,
  285. OutIter out)
  286. {
  287. for (; first != last && (!UseN || n); ++first, --n) {
  288. out = detail::read_into_utf8_iter(*first, out);
  289. }
  290. return {first, out};
  291. }
  292. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  293. transcode_result<Iter, OutIter> transcode_to_16(
  294. tag_t<format::utf32>,
  295. Iter first,
  296. Sentinel last,
  297. std::ptrdiff_t n,
  298. OutIter out)
  299. {
  300. for (; first != last && (!UseN || n); ++first, --n) {
  301. out = detail::read_into_utf16_iter(*first, out);
  302. }
  303. return {first, out};
  304. }
  305. template<bool UseN, typename Iter, typename Sentinel, typename OutIter>
  306. transcode_result<Iter, OutIter> transcode_to_32(
  307. tag_t<format::utf32>,
  308. Iter first,
  309. Sentinel last,
  310. std::ptrdiff_t n,
  311. OutIter out)
  312. {
  313. for (; first != last && (!UseN || n); ++first, ++out, --n) {
  314. *out = *first;
  315. }
  316. return {first, out};
  317. }
  318. }
  319. #if 0
  320. /** Copies the code points in the range [first, last) to out, changing the
  321. encoding from UTF-8 to UTF-32. */
  322. template<typename InputIter, typename Sentinel, typename OutIter>
  323. transcode_result<InputIter, OutIter> transcode_utf_8_to_32_take_n(
  324. InputIter first, Sentinel last, std::ptrdiff_t n, OutIter out)
  325. {
  326. auto const r = detail::unpack_iterator_and_sentinel(first, last);
  327. return detail::transcode_to_32<true>(
  328. detail::tag_t<r.format_tag>{}, r.first, r.last, n, out);
  329. }
  330. /** Copies the first `n` code points in the range [first, last) to out,
  331. changing the encoding from UTF-8 to UTF-32. */
  332. template<typename InputIter, typename Sentinel, typename OutIter>
  333. transcode_result<InputIter, OutIter> transcode_utf_8_to_32_take_n(
  334. InputIter first, Sentinel last, std::ptrdiff_t n, OutIter out)
  335. {
  336. auto const r = detail::unpack_iterator_and_sentinel(first, last);
  337. return detail::transcode_to_32<true>(
  338. detail::tag_t<r.format_tag>{}, r.first, r.last, n, out);
  339. }
  340. /** Copies the first `n` code points in the range [first, last) to out,
  341. changing the encoding from UTF-8 to UTF-32. */
  342. template<typename InputIter, typename Sentinel, typename OutIter>
  343. transcode_result<InputIter, OutIter>
  344. transcode_utf_8_to_32_take_n(Range && r, std::ptrdiff_t n, OutIter out)
  345. {
  346. return detail::transcode_utf_8_to_32_dispatch<true, Range, OutIter>::
  347. call(r, n, out)
  348. .out;
  349. }
  350. #endif
  351. }}
  352. namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V1 {
  353. #if defined(BOOST_TEXT_DOXYGEN)
  354. // -> utf8
  355. /** Copies the code points in the range `[first, last)` to `out`, changing
  356. the encoding to UTF-8. */
  357. template<
  358. std::input_iterator I,
  359. std::sentinel_for<I> S,
  360. std::output_iterator<uint8_t> O>
  361. requires(
  362. utf16_code_unit<std::iter_value_t<I>> ||
  363. utf32_code_unit<std::iter_value_t<I>>)
  364. transcode_result<I, O> transcode_to_utf8(I first, S last, O out);
  365. /** Copies the code points in the range `[p, null_sentinel)` to `out`,
  366. changing the encoding to UTF-8. */
  367. template<typename Ptr, std::output_iterator<uint8_t> O>
  368. requires(utf16_pointer<Ptr> || utf32_pointer<Ptr>)
  369. transcode_result<Ptr, O> transcode_to_utf8(Ptr p, O out);
  370. /** Copies the code points in the array `arr` to `out`, changing the
  371. encoding to UTF-8. */
  372. template<std::size_t N, typename Char, std::output_iterator<uint8_t> O>
  373. requires (utf16_code_unit<Char> || utf32_code_unit<Char>)
  374. transcode_result<Char *, O> transcode_to_utf8(Char (&arr)[N], O out);
  375. /** Copies the code points in the range `r` to `out`, changing the
  376. encoding to UTF-8. */
  377. template<std::ranges::input_range R, std::output_iterator<uint8_t> O>
  378. requires (utf16_code_unit<std::ranges::range_value_t<R>> ||
  379. utf32_code_unit<std::ranges::range_value_t<R>>)
  380. transcode_result<std::ranges::borrowed_iterator_t<R>, O>
  381. transcode_to_utf8(R && r, O out);
  382. // -> utf16
  383. /** Copies the code points in the range `[first, last)` to `out`, changing
  384. the encoding to UTF-16. */
  385. template<
  386. std::input_iterator I,
  387. std::sentinel_for<I> S,
  388. std::output_iterator<char16_t> O>
  389. requires (utf8_code_unit<std::iter_value_t<I>> ||
  390. utf32_code_unit<std::iter_value_t<I>>)
  391. transcode_result<I, O> transcode_to_utf16(I first, S last, O out);
  392. /** Copies the code points in the range `[p, null_sentinel)` to `out`,
  393. changing the encoding to UTF-16. */
  394. template<typename Ptr, std::output_iterator<char16_t> O>
  395. requires (utf8_pointer<Ptr> || utf32_pointer<Ptr>)
  396. transcode_result<Ptr, O> transcode_to_utf16(Ptr p, O out);
  397. /** Copies the code points in the array `arr` to `out`, changing the
  398. encoding to UTF-16. */
  399. template<std::size_t N, typename Char, std::output_iterator<char16_t> O>
  400. requires (utf8_code_unit<Char> || utf32_code_unit<Char>)
  401. transcode_result<Char *, O> transcode_to_utf16(Char (&arr)[N], O out);
  402. /** Copies the code points in the range `r` to `out`, changing the
  403. encoding to UTF-16. */
  404. template<std::ranges::input_range R, std::output_iterator<cjar16_t> O>
  405. requires (utf8_code_unit<std::ranges::range_value_t<R>> ||
  406. utf32_code_unit<std::ranges::range_value_t<R>>)
  407. transcode_result<std::ranges::borrowed_iterator_t<R>, O>
  408. transcode_to_utf16(R && r, O out);
  409. // -> utf32
  410. /** Copies the code points in the range `[first, last)` to `out`, changing
  411. the encoding to UTF-32. */
  412. template<
  413. std::input_iterator I,
  414. std::sentinel_for<I> S,
  415. std::output_iterator<uint32_t> O>
  416. requires (utf8_code_unit<std::iter_value_t<I>> ||
  417. utf16_code_unit<std::iter_value_t<I>>)
  418. transcode_result<I, O> transcode_to_utf32(I first, S last, O out);
  419. /** Copies the code points in the range `[p, null_sentinel)` to `out`,
  420. changing the encoding to UTF-32. */
  421. template<typename Ptr, std::output_iterator<uint32_t> O>
  422. requires (utf8_pointer<Ptr> || utf16_pointer<Ptr>)
  423. transcode_result<Ptr, O> transcode_to_utf32(Ptr p, O out);
  424. /** Copies the code points in the array `arr` to `out`, changing the
  425. encoding to UTF-32. */
  426. template<std::size_t N, typename Char, std::output_iterator<uint32_t> O>
  427. requires (utf8_code_unit<Char> || utf16_code_unit<Char>)
  428. transcode_result<Char *, O> transcode_to_utf32(Char (&arr)[N], O out);
  429. /** Copies the code points in the range `r` to `out`, changing the
  430. encoding to UTF-32. */
  431. template<std::ranges::input_range R, std::output_iterator<uint32_t> O>
  432. requires (utf8_code_unit<std::ranges::range_value_t<R>> ||
  433. utf16_code_unit<std::ranges::range_value_t<R>>)
  434. transcode_result<std::ranges::borrowed_iterator_t<R>, O>
  435. transcode_to_utf32(R && r, O out);
  436. #endif
  437. namespace dtl {
  438. template<
  439. bool UseN,
  440. typename Range,
  441. typename OutIter,
  442. bool _16Ptr = detail::is_16_ptr_v<Range>,
  443. bool CPPtr = detail::is_cp_ptr_v<Range>>
  444. struct transcode_to_8_dispatch
  445. {
  446. static constexpr auto
  447. call(Range && r, std::ptrdiff_t n, OutIter out)
  448. -> transcode_result<decltype(detail::begin(r)), OutIter>
  449. {
  450. auto const u = text::unpack_iterator_and_sentinel(
  451. detail::begin(r), detail::end(r));
  452. auto unpacked = detail::transcode_to_8<UseN>(
  453. detail::tag_t<u.format_tag>{}, u.first, u.last, n, out);
  454. return {u.repack(unpacked.in), unpacked.out};
  455. }
  456. };
  457. template<bool UseN, typename Ptr, typename OutIter>
  458. struct transcode_to_8_dispatch<UseN, Ptr, OutIter, true, false>
  459. {
  460. static constexpr auto
  461. call(Ptr p, std::ptrdiff_t n, OutIter out)
  462. {
  463. return detail::transcode_to_8<UseN>(
  464. detail::tag_t<format::utf16>{}, p, null_sentinel, n, out);
  465. }
  466. };
  467. template<bool UseN, typename Ptr, typename OutIter>
  468. struct transcode_to_8_dispatch<UseN, Ptr, OutIter, false, true>
  469. {
  470. static constexpr auto
  471. call(Ptr p, std::ptrdiff_t n, OutIter out)
  472. {
  473. return detail::transcode_to_8<UseN>(
  474. detail::tag_t<format::utf32>{}, p, null_sentinel, n, out);
  475. }
  476. };
  477. template<
  478. bool UseN,
  479. typename Range,
  480. typename OutIter,
  481. bool CharPtr = detail::is_char_ptr_v<Range>,
  482. bool CPPtr = detail::is_cp_ptr_v<Range>>
  483. struct transcode_to_16_dispatch
  484. {
  485. static constexpr auto
  486. call(Range && r, std::ptrdiff_t n, OutIter out)
  487. -> transcode_result<decltype(detail::begin(r)), OutIter>
  488. {
  489. auto const u = text::unpack_iterator_and_sentinel(
  490. detail::begin(r), detail::end(r));
  491. auto unpacked = detail::transcode_to_16<UseN>(
  492. detail::tag_t<u.format_tag>{}, u.first, u.last, n, out);
  493. return {u.repack(unpacked.in), unpacked.out};
  494. }
  495. };
  496. template<bool UseN, typename Ptr, typename OutIter>
  497. struct transcode_to_16_dispatch<UseN, Ptr, OutIter, true, false>
  498. {
  499. static constexpr auto
  500. call(Ptr p, std::ptrdiff_t n, OutIter out)
  501. {
  502. return detail::transcode_to_16<UseN>(
  503. detail::tag_t<format::utf8>{}, p, null_sentinel, n, out);
  504. }
  505. };
  506. template<bool UseN, typename Ptr, typename OutIter>
  507. struct transcode_to_16_dispatch<UseN, Ptr, OutIter, false, true>
  508. {
  509. static constexpr auto
  510. call(Ptr p, std::ptrdiff_t n, OutIter out)
  511. {
  512. return detail::transcode_to_16<UseN>(
  513. detail::tag_t<format::utf32>{}, p, null_sentinel, n, out);
  514. }
  515. };
  516. template<
  517. bool UseN,
  518. typename Range,
  519. typename OutIter,
  520. bool CharPtr = detail::is_char_ptr_v<Range>,
  521. bool _16Ptr = detail::is_16_ptr_v<Range>>
  522. struct transcode_to_32_dispatch
  523. {
  524. static constexpr auto
  525. call(Range && r, std::ptrdiff_t n, OutIter out)
  526. -> transcode_result<decltype(detail::begin(r)), OutIter>
  527. {
  528. auto const u = text::unpack_iterator_and_sentinel(
  529. detail::begin(r), detail::end(r));
  530. auto unpacked = detail::transcode_to_32<UseN>(
  531. detail::tag_t<u.format_tag>{}, u.first, u.last, n, out);
  532. return {u.repack(unpacked.in), unpacked.out};
  533. }
  534. };
  535. template<bool UseN, typename Ptr, typename OutIter>
  536. struct transcode_to_32_dispatch<UseN, Ptr, OutIter, true, false>
  537. {
  538. static constexpr auto
  539. call(Ptr p, std::ptrdiff_t n, OutIter out)
  540. {
  541. return detail::transcode_to_32<UseN>(
  542. detail::tag_t<format::utf8>{}, p, null_sentinel, n, out);
  543. }
  544. };
  545. template<bool UseN, typename Ptr, typename OutIter>
  546. struct transcode_to_32_dispatch<UseN, Ptr, OutIter, false, true>
  547. {
  548. static constexpr auto
  549. call(Ptr p, std::ptrdiff_t n, OutIter out)
  550. {
  551. return detail::transcode_to_32<UseN>(
  552. detail::tag_t<format::utf16>{}, p, null_sentinel, n, out);
  553. }
  554. };
  555. }
  556. template<typename Iter, typename Sentinel, typename OutIter>
  557. transcode_result<Iter, OutIter> transcode_to_utf8(
  558. Iter first, Sentinel last, OutIter out)
  559. {
  560. auto const r = text::unpack_iterator_and_sentinel(first, last);
  561. auto unpacked = detail::transcode_to_8<false>(
  562. detail::tag_t<r.format_tag>{}, r.first, r.last, -1, out);
  563. return {r.repack(unpacked.in), unpacked.out};
  564. }
  565. template<typename Range, typename OutIter>
  566. transcode_result<detail::iterator_t<Range>, OutIter>
  567. transcode_to_utf8(Range && r, OutIter out)
  568. {
  569. return dtl::transcode_to_8_dispatch<false, Range, OutIter>::call(
  570. r, -1, out);
  571. }
  572. template<typename Iter, typename Sentinel, typename OutIter>
  573. transcode_result<Iter, OutIter> transcode_to_utf16(
  574. Iter first, Sentinel last, OutIter out)
  575. {
  576. auto const r = text::unpack_iterator_and_sentinel(first, last);
  577. auto unpacked = detail::transcode_to_16<false>(
  578. detail::tag_t<r.format_tag>{}, r.first, r.last, -1, out);
  579. return {r.repack(unpacked.in), unpacked.out};
  580. }
  581. template<typename Range, typename OutIter>
  582. transcode_result<detail::iterator_t<Range>, OutIter>
  583. transcode_to_utf16(Range && r, OutIter out)
  584. {
  585. return dtl::transcode_to_16_dispatch<false, Range, OutIter>::call(
  586. r, -1, out);
  587. }
  588. template<typename Iter, typename Sentinel, typename OutIter>
  589. transcode_result<Iter, OutIter> transcode_to_utf32(
  590. Iter first, Sentinel last, OutIter out)
  591. {
  592. auto const r = text::unpack_iterator_and_sentinel(first, last);
  593. auto unpacked = detail::transcode_to_32<false>(
  594. detail::tag_t<r.format_tag>{}, r.first, r.last, -1, out);
  595. return {r.repack(unpacked.in), unpacked.out};
  596. }
  597. template<typename Range, typename OutIter>
  598. transcode_result<detail::iterator_t<Range>, OutIter>
  599. transcode_to_utf32(Range && r, OutIter out)
  600. {
  601. return dtl::transcode_to_32_dispatch<false, Range, OutIter>::call(
  602. r, -1, out);
  603. }
  604. }}}
  605. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  606. namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 {
  607. // -> utf8
  608. template<
  609. std::input_iterator I,
  610. std::sentinel_for<I> S,
  611. std::output_iterator<uint8_t> O>
  612. requires(
  613. utf16_code_unit<std::iter_value_t<I>> ||
  614. utf32_code_unit<std::iter_value_t<I>>)
  615. transcode_result<I, O> transcode_to_utf8(I first, S last, O out)
  616. {
  617. auto const r = text::unpack_iterator_and_sentinel(first, last);
  618. auto unpacked = detail::transcode_to_8<false>(
  619. detail::tag_t<r.format_tag>{}, r.first, r.last, -1, out);
  620. return {r.repack(unpacked.in), unpacked.out};
  621. }
  622. template<typename R, std::output_iterator<uint32_t> O>
  623. requires(utf16_range<R> || utf32_range<R>)
  624. transcode_result<dtl::uc_result_iterator<R>, O> transcode_to_utf8(
  625. R && r, O out)
  626. {
  627. return text::transcode_to_utf8(
  628. std::ranges::begin(r), std::ranges::end(r), out);
  629. }
  630. // -> utf16
  631. template<
  632. std::input_iterator I,
  633. std::sentinel_for<I> S,
  634. std::output_iterator<char16_t> O>
  635. requires(
  636. utf8_code_unit<std::iter_value_t<I>> ||
  637. utf32_code_unit<std::iter_value_t<I>>)
  638. transcode_result<I, O> transcode_to_utf16(I first, S last, O out)
  639. {
  640. auto const r = text::unpack_iterator_and_sentinel(first, last);
  641. auto unpacked = detail::transcode_to_16<false>(
  642. detail::tag_t<r.format_tag>{}, r.first, r.last, -1, out);
  643. return {r.repack(unpacked.in), unpacked.out};
  644. }
  645. template<typename R, std::output_iterator<uint32_t> O>
  646. requires(utf8_range<R> || utf32_range<R>)
  647. transcode_result<dtl::uc_result_iterator<R>, O> transcode_to_utf16(
  648. R && r, O out)
  649. {
  650. return text::transcode_to_utf16(
  651. std::ranges::begin(r), std::ranges::end(r), out);
  652. }
  653. // -> utf32
  654. template<
  655. std::input_iterator I,
  656. std::sentinel_for<I> S,
  657. std::output_iterator<uint32_t> O>
  658. requires(
  659. utf8_code_unit<std::iter_value_t<I>> ||
  660. utf16_code_unit<std::iter_value_t<I>>)
  661. transcode_result<I, O> transcode_to_utf32(I first, S last, O out)
  662. {
  663. auto const r = text::unpack_iterator_and_sentinel(first, last);
  664. auto unpacked = detail::transcode_to_32<false>(
  665. detail::tag_t<r.format_tag>{}, r.first, r.last, -1, out);
  666. return {r.repack(unpacked.in), unpacked.out};
  667. }
  668. template<typename R, std::output_iterator<uint32_t> O>
  669. requires(utf8_range<R> || utf16_range<R>)
  670. transcode_result<dtl::uc_result_iterator<R>, O> transcode_to_utf32(
  671. R && r, O out)
  672. {
  673. return text::transcode_to_utf32(
  674. std::ranges::begin(r), std::ranges::end(r), out);
  675. }
  676. }}}
  677. #endif
  678. #endif