transcode_iterator.hpp 118 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487
  1. // Copyright (C) 2020 T. Zachary Laine
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See
  4. // accompanying file LICENSE_1_0.txt or copy at
  5. // http://www.boost.org/LICENSE_1_0.txt)
  6. #ifndef BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ITERATOR_HPP
  7. #define BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ITERATOR_HPP
  8. #include <boost/parser/detail/debug_assert.hpp>
  9. #include <boost/parser/detail/text/transcode_iterator_fwd.hpp>
  10. #include <boost/parser/detail/text/concepts.hpp>
  11. #include <boost/parser/detail/text/utf.hpp>
  12. #include <boost/parser/detail/text/detail/algorithm.hpp>
  13. #include <boost/parser/detail/stl_interfaces/iterator_interface.hpp>
  14. #include <array>
  15. #include <iterator>
  16. #include <type_traits>
  17. #include <stdexcept>
  18. #include <string_view>
  19. namespace boost::parser::detail { namespace text {
  20. namespace {
  21. constexpr char16_t high_surrogate_base = 0xd7c0;
  22. constexpr char16_t low_surrogate_base = 0xdc00;
  23. constexpr char32_t high_surrogate_min = 0xd800;
  24. constexpr char32_t high_surrogate_max = 0xdbff;
  25. constexpr char32_t low_surrogate_min = 0xdc00;
  26. constexpr char32_t low_surrogate_max = 0xdfff;
  27. constexpr char32_t replacement_character = 0xfffd;
  28. }
  29. namespace detail {
  30. constexpr bool in(unsigned char lo, unsigned char c, unsigned char hi)
  31. {
  32. return lo <= c && c <= hi;
  33. }
  34. struct throw_on_encoding_error
  35. {};
  36. template<typename OutIter>
  37. inline constexpr OutIter read_into_buf(char32_t cp, OutIter buf)
  38. {
  39. if (cp < 0x80) {
  40. *buf = static_cast<char>(cp);
  41. ++buf;
  42. } else if (cp < 0x800) {
  43. *buf = static_cast<char>(0xC0 + (cp >> 6));
  44. ++buf;
  45. *buf = static_cast<char>(0x80 + (cp & 0x3f));
  46. ++buf;
  47. } else if (cp < 0x10000) {
  48. *buf = static_cast<char>(0xe0 + (cp >> 12));
  49. ++buf;
  50. *buf = static_cast<char>(0x80 + ((cp >> 6) & 0x3f));
  51. ++buf;
  52. *buf = static_cast<char>(0x80 + (cp & 0x3f));
  53. ++buf;
  54. } else {
  55. *buf = static_cast<char>(0xf0 + (cp >> 18));
  56. ++buf;
  57. *buf = static_cast<char>(0x80 + ((cp >> 12) & 0x3f));
  58. ++buf;
  59. *buf = static_cast<char>(0x80 + ((cp >> 6) & 0x3f));
  60. ++buf;
  61. *buf = static_cast<char>(0x80 + (cp & 0x3f));
  62. ++buf;
  63. }
  64. return buf;
  65. }
  66. template<typename OutIter>
  67. constexpr OutIter write_cp_utf8(char32_t cp, OutIter out)
  68. {
  69. return detail::read_into_buf(cp, out);
  70. }
  71. template<typename OutIter>
  72. constexpr OutIter write_cp_utf16(char32_t cp, OutIter out)
  73. {
  74. if (cp < 0x10000) {
  75. *out = static_cast<char16_t>(cp);
  76. ++out;
  77. } else {
  78. *out = static_cast<char16_t>(cp >> 10) + high_surrogate_base;
  79. ++out;
  80. *out = static_cast<char16_t>(cp & 0x3ff) + low_surrogate_base;
  81. ++out;
  82. }
  83. return out;
  84. }
  85. inline constexpr char32_t surrogates_to_cp(char16_t hi, char16_t lo)
  86. {
  87. return char32_t((hi - high_surrogate_base) << 10) +
  88. (lo - low_surrogate_base);
  89. }
  90. template<typename T, typename U>
  91. using enable_utf8_cp = std::enable_if<is_char_iter_v<T>, U>;
  92. template<typename T, typename U = T>
  93. using enable_utf8_cp_t = typename enable_utf8_cp<T, U>::type;
  94. template<typename T, typename U>
  95. using enable_utf16_cp = std::enable_if<is_16_iter_v<T>, U>;
  96. template<typename T, typename U = T>
  97. using enable_utf16_cp_t = typename enable_utf16_cp<T, U>::type;
  98. template<typename I>
  99. auto bidirectional_at_most()
  100. {
  101. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  102. if constexpr (std::bidirectional_iterator<I>) {
  103. return std::bidirectional_iterator_tag{};
  104. } else if constexpr (std::forward_iterator<I>) {
  105. return std::forward_iterator_tag{};
  106. } else if constexpr (std::input_iterator<I>) {
  107. return std::input_iterator_tag{};
  108. }
  109. #else
  110. using category =
  111. typename std::iterator_traits<I>::iterator_category;
  112. if constexpr (std::is_base_of_v<
  113. std::bidirectional_iterator_tag,
  114. category>) {
  115. return std::bidirectional_iterator_tag{};
  116. } else {
  117. return category{};
  118. }
  119. #endif
  120. }
  121. template<typename I>
  122. using bidirectional_at_most_t = decltype(bidirectional_at_most<I>());
  123. }
  124. /** Returns true iff `c` is a Unicode surrogate. */
  125. inline constexpr bool surrogate(char32_t c)
  126. {
  127. return high_surrogate_min <= c && c <= low_surrogate_max;
  128. }
  129. /** Returns true iff `c` is a Unicode high surrogate. */
  130. inline constexpr bool high_surrogate(char32_t c)
  131. {
  132. return high_surrogate_min <= c && c <= high_surrogate_max;
  133. }
  134. /** Returns true iff `c` is a Unicode low surrogate. */
  135. inline constexpr bool low_surrogate(char32_t c)
  136. {
  137. return low_surrogate_min <= c && c <= low_surrogate_max;
  138. }
  139. /** Returns true iff `c` is a Unicode reserved noncharacter.
  140. \see Unicode 3.4/D14 */
  141. inline constexpr bool reserved_noncharacter(char32_t c)
  142. {
  143. bool const byte01_reserved = (c & 0xffff) >= 0xfffe;
  144. bool const byte2_at_most_0x10 = ((c & 0xff0000u) >> 16) <= 0x10;
  145. return (byte01_reserved && byte2_at_most_0x10) ||
  146. (0xfdd0 <= c && c <= 0xfdef);
  147. }
  148. /** Returns true iff `c` is a valid Unicode scalar value.
  149. \see Unicode 3.9/D90 */
  150. inline constexpr bool scalar_value(char32_t c)
  151. {
  152. return c <= 0x10ffff && !surrogate(c);
  153. }
  154. /** Returns true iff `c` is a Unicode scalar value not in the reserved
  155. range.
  156. \see Unicode 3.9/D90 */
  157. inline constexpr bool unreserved_scalar_value(char32_t c)
  158. {
  159. return scalar_value(c) && !reserved_noncharacter(c);
  160. }
  161. /** Returns true iff `c` is a UTF-8 lead code unit (which must be followed
  162. by 1-3 following units). */
  163. constexpr bool lead_code_unit(char8_type c)
  164. {
  165. return uint8_t((unsigned char)c - 0xc2) <= 0x32;
  166. }
  167. /** Returns true iff `c` is a UTF-8 continuation code unit. */
  168. constexpr bool continuation(char8_type c) { return (int8_t)c < -0x40; }
  169. /** Given the first (and possibly only) code unit of a UTF-8-encoded code
  170. point, returns the number of bytes occupied by that code point (in the
  171. range `[1, 4]`). Returns a value < 0 if `first_unit` is not a valid
  172. initial UTF-8 code unit. */
  173. inline constexpr int utf8_code_units(char8_type first_unit_)
  174. {
  175. auto first_unit = (unsigned int)first_unit_;
  176. return first_unit <= 0x7f ? 1
  177. : boost::parser::detail::text::lead_code_unit(first_unit)
  178. ? int(0xe0 <= first_unit) + int(0xf0 <= first_unit) + 2
  179. : -1;
  180. }
  181. /** Given the first (and possibly only) code unit of a UTF-16-encoded code
  182. point, returns the number of code units occupied by that code point
  183. (in the range `[1, 2]`). Returns a negative value if `first_unit` is
  184. not a valid initial UTF-16 code unit. */
  185. inline constexpr int utf16_code_units(char16_t first_unit)
  186. {
  187. if (boost::parser::detail::text::low_surrogate(first_unit))
  188. return -1;
  189. if (boost::parser::detail::text::high_surrogate(first_unit))
  190. return 2;
  191. return 1;
  192. }
  193. namespace detail {
  194. // optional is not constexpr friendly.
  195. template<typename Iter>
  196. struct optional_iter
  197. {
  198. constexpr optional_iter() : it_(), valid_(false) {}
  199. constexpr optional_iter(Iter it) : it_(it), valid_(true) {}
  200. constexpr operator bool() const { return valid_; }
  201. constexpr Iter operator*() const
  202. {
  203. BOOST_PARSER_DEBUG_ASSERT(valid_);
  204. return it_;
  205. }
  206. Iter & operator*()
  207. {
  208. BOOST_PARSER_DEBUG_ASSERT(valid_);
  209. return it_;
  210. }
  211. friend BOOST_PARSER_CONSTEXPR bool
  212. operator==(optional_iter lhs, optional_iter rhs)
  213. {
  214. return lhs.valid_ == rhs.valid_ &&
  215. (!lhs.valid_ || lhs.it_ == rhs.it_);
  216. }
  217. friend BOOST_PARSER_CONSTEXPR bool
  218. operator!=(optional_iter lhs, optional_iter rhs)
  219. {
  220. return !(lhs == rhs);
  221. }
  222. private:
  223. Iter it_;
  224. bool valid_;
  225. };
  226. // Follow Table 3-7 in Unicode 3.9/D92
  227. template<typename Iter>
  228. constexpr optional_iter<Iter> end_of_invalid_utf8(Iter it)
  229. {
  230. BOOST_PARSER_DEBUG_ASSERT(!boost::parser::detail::text::continuation(*it));
  231. if (detail::in(0, *it, 0x7f))
  232. return optional_iter<Iter>{};
  233. if (detail::in(0xc2, *it, 0xdf)) {
  234. auto next = it;
  235. if (!boost::parser::detail::text::continuation(*++next))
  236. return next;
  237. return optional_iter<Iter>{};
  238. }
  239. if (detail::in(0xe0, *it, 0xe0)) {
  240. auto next = it;
  241. if (!detail::in(0xa0, *++next, 0xbf))
  242. return next;
  243. if (!boost::parser::detail::text::continuation(*++next))
  244. return next;
  245. return optional_iter<Iter>{};
  246. }
  247. if (detail::in(0xe1, *it, 0xec)) {
  248. auto next = it;
  249. if (!boost::parser::detail::text::continuation(*++next))
  250. return next;
  251. if (!boost::parser::detail::text::continuation(*++next))
  252. return next;
  253. return optional_iter<Iter>{};
  254. }
  255. if (detail::in(0xed, *it, 0xed)) {
  256. auto next = it;
  257. if (!detail::in(0x80, *++next, 0x9f))
  258. return next;
  259. if (!boost::parser::detail::text::continuation(*++next))
  260. return next;
  261. return optional_iter<Iter>{};
  262. }
  263. if (detail::in(0xee, *it, 0xef)) {
  264. auto next = it;
  265. if (!boost::parser::detail::text::continuation(*++next))
  266. return next;
  267. if (!boost::parser::detail::text::continuation(*++next))
  268. return next;
  269. return optional_iter<Iter>{};
  270. }
  271. if (detail::in(0xf0, *it, 0xf0)) {
  272. auto next = it;
  273. if (!detail::in(0x90, *++next, 0xbf))
  274. return next;
  275. if (!boost::parser::detail::text::continuation(*++next))
  276. return next;
  277. if (!boost::parser::detail::text::continuation(*++next))
  278. return next;
  279. return optional_iter<Iter>{};
  280. }
  281. if (detail::in(0xf1, *it, 0xf3)) {
  282. auto next = it;
  283. if (!boost::parser::detail::text::continuation(*++next))
  284. return next;
  285. if (!boost::parser::detail::text::continuation(*++next))
  286. return next;
  287. if (!boost::parser::detail::text::continuation(*++next))
  288. return next;
  289. return optional_iter<Iter>{};
  290. }
  291. if (detail::in(0xf4, *it, 0xf4)) {
  292. auto next = it;
  293. if (!detail::in(0x80, *++next, 0x8f))
  294. return next;
  295. if (!boost::parser::detail::text::continuation(*++next))
  296. return next;
  297. if (!boost::parser::detail::text::continuation(*++next))
  298. return next;
  299. return optional_iter<Iter>{};
  300. }
  301. return it;
  302. }
  303. template<typename Iter>
  304. constexpr Iter decrement(Iter it)
  305. {
  306. Iter retval = it;
  307. int backup = 0;
  308. while (backup < 4 && boost::parser::detail::text::continuation(*--retval)) {
  309. ++backup;
  310. }
  311. backup = it - retval;
  312. if (boost::parser::detail::text::continuation(*retval))
  313. return it - 1;
  314. optional_iter<Iter> first_invalid = end_of_invalid_utf8(retval);
  315. if (first_invalid == retval)
  316. ++*first_invalid;
  317. while (first_invalid && (*first_invalid - retval) < backup) {
  318. backup -= *first_invalid - retval;
  319. retval = *first_invalid;
  320. first_invalid = end_of_invalid_utf8(retval);
  321. if (first_invalid == retval)
  322. ++*first_invalid;
  323. }
  324. if (1 < backup) {
  325. int const cp_bytes = boost::parser::detail::text::utf8_code_units(*retval);
  326. if (cp_bytes < backup)
  327. retval = it - 1;
  328. }
  329. return retval;
  330. }
  331. template<typename Iter>
  332. constexpr Iter decrement(Iter first, Iter it)
  333. {
  334. Iter retval = it;
  335. int backup = 0;
  336. while (backup < 4 && retval != first &&
  337. boost::parser::detail::text::continuation(*--retval)) {
  338. ++backup;
  339. }
  340. backup = (int)std::distance(retval, it);
  341. if (boost::parser::detail::text::continuation(*retval)) {
  342. if (it != first)
  343. --it;
  344. return it;
  345. }
  346. optional_iter<Iter> first_invalid = end_of_invalid_utf8(retval);
  347. if (first_invalid == retval)
  348. ++*first_invalid;
  349. while (first_invalid &&
  350. std::distance(retval, *first_invalid) < backup) {
  351. backup -= (int)std::distance(retval, *first_invalid);
  352. retval = *first_invalid;
  353. first_invalid = end_of_invalid_utf8(retval);
  354. if (first_invalid == retval)
  355. ++*first_invalid;
  356. }
  357. if (1 < backup) {
  358. int const cp_bytes = boost::parser::detail::text::utf8_code_units(*retval);
  359. if (cp_bytes < backup) {
  360. if (it != first)
  361. --it;
  362. retval = it;
  363. }
  364. }
  365. return retval;
  366. }
  367. enum char_class : uint8_t {
  368. ill = 0,
  369. asc = 1,
  370. cr1 = 2,
  371. cr2 = 3,
  372. cr3 = 4,
  373. l2a = 5,
  374. l3a = 6,
  375. l3b = 7,
  376. l3c = 8,
  377. l4a = 9,
  378. l4b = 10,
  379. l4c = 11,
  380. };
  381. enum table_state : uint8_t {
  382. bgn = 0,
  383. e_d = bgn, // "end"
  384. err = 12,
  385. cs1 = 24,
  386. cs2 = 36,
  387. cs3 = 48,
  388. p3a = 60,
  389. p3b = 72,
  390. p4a = 84,
  391. p4b = 96,
  392. invalid_table_state = 200
  393. };
  394. struct first_cu
  395. {
  396. unsigned char initial_octet;
  397. table_state next;
  398. };
  399. namespace {
  400. constexpr first_cu first_cus[256] = {
  401. {0x00, bgn}, {0x01, bgn}, {0x02, bgn}, {0x03, bgn}, {0x04, bgn},
  402. {0x05, bgn}, {0x06, bgn}, {0x07, bgn}, {0x08, bgn}, {0x09, bgn},
  403. {0x0a, bgn}, {0x0b, bgn}, {0x0c, bgn}, {0x0d, bgn}, {0x0e, bgn},
  404. {0x0f, bgn}, {0x10, bgn}, {0x11, bgn}, {0x12, bgn}, {0x13, bgn},
  405. {0x14, bgn}, {0x15, bgn}, {0x16, bgn}, {0x17, bgn}, {0x18, bgn},
  406. {0x19, bgn}, {0x1a, bgn}, {0x1b, bgn}, {0x1c, bgn}, {0x1d, bgn},
  407. {0x1e, bgn}, {0x1f, bgn}, {0x20, bgn}, {0x21, bgn}, {0x22, bgn},
  408. {0x23, bgn}, {0x24, bgn}, {0x25, bgn}, {0x26, bgn}, {0x27, bgn},
  409. {0x28, bgn}, {0x29, bgn}, {0x2a, bgn}, {0x2b, bgn}, {0x2c, bgn},
  410. {0x2d, bgn}, {0x2e, bgn}, {0x2f, bgn}, {0x30, bgn}, {0x31, bgn},
  411. {0x32, bgn}, {0x33, bgn}, {0x34, bgn}, {0x35, bgn}, {0x36, bgn},
  412. {0x37, bgn}, {0x38, bgn}, {0x39, bgn}, {0x3a, bgn}, {0x3b, bgn},
  413. {0x3c, bgn}, {0x3d, bgn}, {0x3e, bgn}, {0x3f, bgn}, {0x40, bgn},
  414. {0x41, bgn}, {0x42, bgn}, {0x43, bgn}, {0x44, bgn}, {0x45, bgn},
  415. {0x46, bgn}, {0x47, bgn}, {0x48, bgn}, {0x49, bgn}, {0x4a, bgn},
  416. {0x4b, bgn}, {0x4c, bgn}, {0x4d, bgn}, {0x4e, bgn}, {0x4f, bgn},
  417. {0x50, bgn}, {0x51, bgn}, {0x52, bgn}, {0x53, bgn}, {0x54, bgn},
  418. {0x55, bgn}, {0x56, bgn}, {0x57, bgn}, {0x58, bgn}, {0x59, bgn},
  419. {0x5a, bgn}, {0x5b, bgn}, {0x5c, bgn}, {0x5d, bgn}, {0x5e, bgn},
  420. {0x5f, bgn}, {0x60, bgn}, {0x61, bgn}, {0x62, bgn}, {0x63, bgn},
  421. {0x64, bgn}, {0x65, bgn}, {0x66, bgn}, {0x67, bgn}, {0x68, bgn},
  422. {0x69, bgn}, {0x6a, bgn}, {0x6b, bgn}, {0x6c, bgn}, {0x6d, bgn},
  423. {0x6e, bgn}, {0x6f, bgn}, {0x70, bgn}, {0x71, bgn}, {0x72, bgn},
  424. {0x73, bgn}, {0x74, bgn}, {0x75, bgn}, {0x76, bgn}, {0x77, bgn},
  425. {0x78, bgn}, {0x79, bgn}, {0x7a, bgn}, {0x7b, bgn}, {0x7c, bgn},
  426. {0x7d, bgn}, {0x7e, bgn}, {0x7f, bgn}, {0x00, err}, {0x01, err},
  427. {0x02, err}, {0x03, err}, {0x04, err}, {0x05, err}, {0x06, err},
  428. {0x07, err}, {0x08, err}, {0x09, err}, {0x0a, err}, {0x0b, err},
  429. {0x0c, err}, {0x0d, err}, {0x0e, err}, {0x0f, err}, {0x10, err},
  430. {0x11, err}, {0x12, err}, {0x13, err}, {0x14, err}, {0x15, err},
  431. {0x16, err}, {0x17, err}, {0x18, err}, {0x19, err}, {0x1a, err},
  432. {0x1b, err}, {0x1c, err}, {0x1d, err}, {0x1e, err}, {0x1f, err},
  433. {0x20, err}, {0x21, err}, {0x22, err}, {0x23, err}, {0x24, err},
  434. {0x25, err}, {0x26, err}, {0x27, err}, {0x28, err}, {0x29, err},
  435. {0x2a, err}, {0x2b, err}, {0x2c, err}, {0x2d, err}, {0x2e, err},
  436. {0x2f, err}, {0x30, err}, {0x31, err}, {0x32, err}, {0x33, err},
  437. {0x34, err}, {0x35, err}, {0x36, err}, {0x37, err}, {0x38, err},
  438. {0x39, err}, {0x3a, err}, {0x3b, err}, {0x3c, err}, {0x3d, err},
  439. {0x3e, err}, {0x3f, err}, {0xc0, err}, {0xc1, err}, {0x02, cs1},
  440. {0x03, cs1}, {0x04, cs1}, {0x05, cs1}, {0x06, cs1}, {0x07, cs1},
  441. {0x08, cs1}, {0x09, cs1}, {0x0a, cs1}, {0x0b, cs1}, {0x0c, cs1},
  442. {0x0d, cs1}, {0x0e, cs1}, {0x0f, cs1}, {0x10, cs1}, {0x11, cs1},
  443. {0x12, cs1}, {0x13, cs1}, {0x14, cs1}, {0x15, cs1}, {0x16, cs1},
  444. {0x17, cs1}, {0x18, cs1}, {0x19, cs1}, {0x1a, cs1}, {0x1b, cs1},
  445. {0x1c, cs1}, {0x1d, cs1}, {0x1e, cs1}, {0x1f, cs1}, {0x00, p3a},
  446. {0x01, cs2}, {0x02, cs2}, {0x03, cs2}, {0x04, cs2}, {0x05, cs2},
  447. {0x06, cs2}, {0x07, cs2}, {0x08, cs2}, {0x09, cs2}, {0x0a, cs2},
  448. {0x0b, cs2}, {0x0c, cs2}, {0x0d, p3b}, {0x0e, cs2}, {0x0f, cs2},
  449. {0x00, p4a}, {0x01, cs3}, {0x02, cs3}, {0x03, cs3}, {0x04, p4b},
  450. {0xf5, err}, {0xf6, err}, {0xf7, err}, {0xf8, err}, {0xf9, err},
  451. {0xfa, err}, {0xfb, err}, {0xfc, err}, {0xfd, err}, {0xfe, err},
  452. {0xff, err},
  453. };
  454. constexpr char_class octet_classes[256] = {
  455. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  456. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  457. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  458. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  459. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  460. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  461. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  462. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  463. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc,
  464. asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, asc, cr1, cr1,
  465. cr1, cr1, cr1, cr1, cr1, cr1, cr1, cr1, cr1, cr1, cr1, cr1, cr1,
  466. cr1, cr2, cr2, cr2, cr2, cr2, cr2, cr2, cr2, cr2, cr2, cr2, cr2,
  467. cr2, cr2, cr2, cr2, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3,
  468. cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3,
  469. cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, cr3, ill, ill, l2a,
  470. l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a,
  471. l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a, l2a,
  472. l2a, l2a, l2a, l3a, l3b, l3b, l3b, l3b, l3b, l3b, l3b, l3b, l3b,
  473. l3b, l3b, l3b, l3c, l3b, l3b, l4a, l4b, l4b, l4b, l4c, ill, ill,
  474. ill, ill, ill, ill, ill, ill, ill, ill, ill,
  475. };
  476. constexpr table_state transitions[108] = {
  477. err, e_d, err, err, err, cs1, p3a, cs2, p3b, p4a, cs3, p4b,
  478. err, err, err, err, err, err, err, err, err, err, err, err,
  479. err, err, e_d, e_d, e_d, err, err, err, err, err, err, err,
  480. err, err, cs1, cs1, cs1, err, err, err, err, err, err, err,
  481. err, err, cs2, cs2, cs2, err, err, err, err, err, err, err,
  482. err, err, err, err, cs1, err, err, err, err, err, err, err,
  483. err, err, cs1, cs1, err, err, err, err, err, err, err, err,
  484. err, err, err, cs2, cs2, err, err, err, err, err, err, err,
  485. err, err, cs2, err, err, err, err, err, err, err, err, err,
  486. };
  487. }
  488. template<typename InputIter, typename Sentinel>
  489. char32_t advance(InputIter & first, Sentinel last)
  490. {
  491. char32_t retval = 0;
  492. first_cu const info = first_cus[(unsigned char)*first];
  493. ++first;
  494. retval = info.initial_octet;
  495. int state = info.next;
  496. while (state != bgn) {
  497. if (first != last) {
  498. unsigned char const cu = *first;
  499. retval = (retval << 6) | (cu & 0x3f);
  500. char_class const class_ = octet_classes[cu];
  501. state = transitions[state + class_];
  502. if (state == err)
  503. return replacement_character;
  504. ++first;
  505. } else {
  506. return replacement_character;
  507. }
  508. }
  509. return retval;
  510. }
  511. template<typename Derived, typename Iter>
  512. struct trans_ins_iter
  513. {
  514. using value_type = void;
  515. using difference_type =
  516. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  517. std::ptrdiff_t;
  518. #else
  519. void;
  520. #endif
  521. using pointer = void;
  522. using reference = void;
  523. using iterator_category = std::output_iterator_tag;
  524. constexpr trans_ins_iter() {}
  525. constexpr trans_ins_iter(Iter it) : it_(it) {}
  526. constexpr Derived & operator*() { return derived(); }
  527. constexpr Derived & operator++() { return derived(); }
  528. constexpr Derived operator++(int) { return derived(); }
  529. constexpr Iter base() const { return it_; }
  530. protected:
  531. constexpr Iter & iter() { return it_; }
  532. private:
  533. constexpr Derived & derived()
  534. {
  535. return static_cast<Derived &>(*this);
  536. }
  537. Iter it_;
  538. };
  539. template<typename Derived, typename I, typename ValueType>
  540. using trans_iter = stl_interfaces::iterator_interface<
  541. Derived,
  542. bidirectional_at_most_t<I>,
  543. ValueType,
  544. ValueType>;
  545. }
  546. }}
  547. namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V1 {
  548. #if defined(BOOST_TEXT_DOXYGEN)
  549. /** Returns the first code unit in `[r.begin(), r.end())` that is not
  550. properly UTF-8 encoded, or `r.begin() + std::distance(r)` if no such
  551. code unit is found. */
  552. template<utf8_range R>
  553. requires std::ranges::forward_range<R>
  554. constexpr std::ranges::borrowed_iterator_t<R> find_invalid_encoding(R && r);
  555. /** Returns the first code unit in `[r.begin(), r.end())` that is not
  556. properly UTF-16 encoded, or `r.begin() + std::distance(r)` if no such
  557. code unit is found. */
  558. template<utf16_range R>
  559. requires std::ranges::forward_range<R>
  560. constexpr std::ranges::borrowed_iterator_t<R> find_invalid_encoding(R && r);
  561. /** Returns true iff `r` is properly UTF-8 encoded. */
  562. template<utf8_range R>
  563. requires std::ranges::forward_range<R>
  564. constexpr bool encoded(R && r);
  565. /** Returns true iff `r` is properly UTF-16 encoded */
  566. template<utf16_range R>
  567. requires std::ranges::forward_range<R>
  568. constexpr bool encoded(R && r);
  569. /** Returns true iff `r` is empty or the initial UTF-8 code units in `r`
  570. form a valid Unicode code point. */
  571. template<utf8_range R>
  572. requires std::ranges::forward_range<R>
  573. constexpr bool starts_encoded(R && r);
  574. /** Returns true iff `r` is empty or the initial UTF-16 code units in `r`
  575. form a valid Unicode code point. */
  576. template<utf16_range R>
  577. requires std::ranges::forward_range<R>
  578. constexpr bool starts_encoded(R && r);
  579. /** Returns true iff `r` is empty or the final UTF-8 code units in `r`
  580. form a valid Unicode code point. */
  581. template<utf8_range R>
  582. requires std::ranges::bidirectional_range<R> &&
  583. std::ranges::common_range<R>
  584. constexpr bool ends_encoded(R && r);
  585. /** Returns true iff `r` is empty or the final UTF-16 code units in `r`
  586. form a valid Unicode code point. */
  587. template<utf8_range R>
  588. requires std::ranges::bidirectional_range<R> &&
  589. std::ranges::common_range<R>
  590. constexpr bool ends_encoded(R && r);
  591. #endif
  592. template<typename Iter>
  593. constexpr detail::enable_utf8_cp_t<Iter>
  594. find_invalid_encoding(Iter first, Iter last)
  595. {
  596. while (first != last) {
  597. int const cp_bytes = boost::parser::detail::text::utf8_code_units(*first);
  598. if (cp_bytes == -1 || last - first < cp_bytes)
  599. return first;
  600. if (detail::end_of_invalid_utf8(first))
  601. return first;
  602. first += cp_bytes;
  603. }
  604. return last;
  605. }
  606. template<typename Iter>
  607. constexpr detail::enable_utf16_cp_t<Iter>
  608. find_invalid_encoding(Iter first, Iter last)
  609. {
  610. while (first != last) {
  611. int const cp_units = boost::parser::detail::text::utf16_code_units(*first);
  612. if (cp_units == -1 || last - first < cp_units)
  613. return first;
  614. if (cp_units == 2 && !boost::parser::detail::text::low_surrogate(*(first + 1)))
  615. return first;
  616. first += cp_units;
  617. }
  618. return last;
  619. }
  620. template<typename Iter>
  621. constexpr detail::enable_utf8_cp_t<Iter, bool> encoded(
  622. Iter first, Iter last)
  623. {
  624. return v1::find_invalid_encoding(first, last) == last;
  625. }
  626. template<typename Iter>
  627. constexpr detail::enable_utf16_cp_t<Iter, bool> encoded(
  628. Iter first, Iter last)
  629. {
  630. return v1::find_invalid_encoding(first, last) == last;
  631. }
  632. template<typename Iter>
  633. constexpr detail::enable_utf8_cp_t<Iter, bool>
  634. starts_encoded(Iter first, Iter last)
  635. {
  636. if (first == last)
  637. return true;
  638. int const cp_bytes = boost::parser::detail::text::utf8_code_units(*first);
  639. if (cp_bytes == -1 || last - first < cp_bytes)
  640. return false;
  641. return !detail::end_of_invalid_utf8(first);
  642. }
  643. template<typename Iter>
  644. constexpr detail::enable_utf16_cp_t<Iter, bool>
  645. starts_encoded(Iter first, Iter last)
  646. {
  647. if (first == last)
  648. return true;
  649. int const cp_units = boost::parser::detail::text::utf16_code_units(*first);
  650. if (cp_units == -1 || last - first < cp_units)
  651. return false;
  652. return cp_units == 1 || boost::parser::detail::text::low_surrogate(*(first + 1));
  653. }
  654. template<typename Iter>
  655. constexpr detail::enable_utf8_cp_t<Iter, bool>
  656. ends_encoded(Iter first, Iter last)
  657. {
  658. if (first == last)
  659. return true;
  660. auto it = last;
  661. while (first != --it && boost::parser::detail::text::continuation(*it))
  662. ;
  663. return v1::starts_encoded(it, last);
  664. }
  665. template<typename Iter>
  666. constexpr detail::enable_utf16_cp_t<Iter, bool>
  667. ends_encoded(Iter first, Iter last)
  668. {
  669. if (first == last)
  670. return true;
  671. auto it = last;
  672. if (boost::parser::detail::text::low_surrogate(*--it))
  673. --it;
  674. return v1::starts_encoded(it, last);
  675. }
  676. }}}
  677. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  678. namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 {
  679. template<utf8_range R>
  680. requires std::ranges::forward_range<R>
  681. constexpr std::ranges::borrowed_iterator_t<R> find_invalid_encoding(R && r)
  682. {
  683. auto first = std::ranges::begin(r);
  684. auto last = std::ranges::end(r);
  685. while (first != last) {
  686. int const cp_bytes = boost::parser::detail::text::utf8_code_units(*first);
  687. if (cp_bytes == -1 || last - first < cp_bytes)
  688. return first;
  689. if (detail::end_of_invalid_utf8(first))
  690. return first;
  691. first += cp_bytes;
  692. }
  693. if constexpr (std::ranges::borrowed_range<R>) {
  694. return last;
  695. } else {
  696. return std::ranges::dangling{};
  697. }
  698. }
  699. template<utf16_range R>
  700. requires std::ranges::forward_range<R>
  701. constexpr std::ranges::borrowed_iterator_t<R> find_invalid_encoding(R && r)
  702. {
  703. auto first = std::ranges::begin(r);
  704. auto last = std::ranges::end(r);
  705. while (first != last) {
  706. int const cp_units = boost::parser::detail::text::utf16_code_units(*first);
  707. if (cp_units == -1 || last - first < cp_units)
  708. return first;
  709. if (cp_units == 2 && !boost::parser::detail::text::low_surrogate(*(first + 1)))
  710. return first;
  711. first += cp_units;
  712. }
  713. if constexpr (std::ranges::borrowed_range<R>) {
  714. return last;
  715. } else {
  716. return std::ranges::dangling{};
  717. }
  718. }
  719. template<utf8_range R>
  720. requires std::ranges::forward_range<R>
  721. constexpr bool encoded(R && r)
  722. {
  723. return boost::parser::detail::text::v1::find_invalid_encoding(r.begin(), r.end()) ==
  724. r.end();
  725. }
  726. template<utf16_range R>
  727. requires std::ranges::forward_range<R>
  728. constexpr bool encoded(R && r)
  729. {
  730. return boost::parser::detail::text::v1::find_invalid_encoding(r.begin(), r.end()) ==
  731. r.end();
  732. }
  733. template<utf8_range R>
  734. requires std::ranges::forward_range<R>
  735. constexpr bool starts_encoded(R && r)
  736. {
  737. auto first = std::ranges::begin(r);
  738. auto last = std::ranges::end(r);
  739. if (first == last)
  740. return true;
  741. int const cp_bytes = boost::parser::detail::text::utf8_code_units(*first);
  742. if (cp_bytes == -1 || last - first < cp_bytes)
  743. return false;
  744. return !detail::end_of_invalid_utf8(first);
  745. }
  746. template<utf16_range R>
  747. requires std::ranges::forward_range<R>
  748. constexpr bool starts_encoded(R && r)
  749. {
  750. auto first = std::ranges::begin(r);
  751. auto last = std::ranges::end(r);
  752. if (first == last)
  753. return true;
  754. int const cp_units = boost::parser::detail::text::utf16_code_units(*first);
  755. if (cp_units == -1 || last - first < cp_units)
  756. return false;
  757. return cp_units == 1 || boost::parser::detail::text::low_surrogate(*(first + 1));
  758. }
  759. template<utf8_range R>
  760. requires std::ranges::bidirectional_range<R> &&
  761. std::ranges::common_range<R>
  762. constexpr bool ends_encoded(R && r)
  763. {
  764. auto first = std::ranges::begin(r);
  765. auto last = std::ranges::end(r);
  766. if (first == last)
  767. return true;
  768. auto it = last;
  769. while (first != --it && boost::parser::detail::text::continuation(*it))
  770. ;
  771. return boost::parser::detail::text::starts_encoded(it, last);
  772. }
  773. template<utf16_range R>
  774. requires std::ranges::bidirectional_range<R> &&
  775. std::ranges::common_range<R>
  776. constexpr bool ends_encoded(R && r)
  777. {
  778. auto first = std::ranges::begin(r);
  779. auto last = std::ranges::end(r);
  780. if (first == last)
  781. return true;
  782. auto it = last;
  783. if (boost::parser::detail::text::low_surrogate(*--it))
  784. --it;
  785. return boost::parser::detail::text::starts_encoded(it, last);
  786. }
  787. }}}
  788. #endif
  789. namespace boost::parser::detail { namespace text {
  790. /** An error handler type that can be used with the converting iterators;
  791. provides the Unicode replacement character on errors. */
  792. struct use_replacement_character
  793. {
  794. constexpr char32_t operator()(std::string_view) const noexcept
  795. {
  796. return replacement_character;
  797. }
  798. };
  799. /** A sentinel type that compares equal to a pointer to a 1-, 2-, or
  800. 4-byte integral value, iff the pointer is null. */
  801. struct null_sentinel_t
  802. {
  803. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  804. template<std::input_iterator I>
  805. requires std::default_initializable<std::iter_value_t<I>> &&
  806. std::equality_comparable_with<std::iter_reference_t<I>, std::iter_value_t<I>>
  807. #else
  808. template<typename I>
  809. #endif
  810. friend constexpr bool operator==(I it, null_sentinel_t)
  811. {
  812. return *it == detail::iter_value_t<I>{};
  813. }
  814. #if !defined(__cpp_impl_three_way_comparison)
  815. template<typename I>
  816. friend constexpr bool operator==(null_sentinel_t, I it)
  817. {
  818. return *it == detail::iter_value_t<I>{};
  819. }
  820. template<typename I>
  821. friend constexpr bool operator!=(I it, null_sentinel_t)
  822. {
  823. return *it != detail::iter_value_t<I>{};
  824. }
  825. template<typename I>
  826. friend constexpr bool operator!=(null_sentinel_t, I it)
  827. {
  828. return *it != detail::iter_value_t<I>{};
  829. }
  830. #endif
  831. };
  832. #if defined(__cpp_inline_variables)
  833. inline constexpr null_sentinel_t null_sentinel;
  834. #else
  835. namespace {
  836. constexpr null_sentinel_t null_sentinel;
  837. }
  838. #endif
  839. /** An out iterator that converts UTF-32 to UTF-8. */
  840. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  841. template<std::output_iterator<char8_t> Iter>
  842. #else
  843. template<typename Iter>
  844. #endif
  845. struct utf_32_to_8_out_iterator
  846. : detail::trans_ins_iter<utf_32_to_8_out_iterator<Iter>, Iter>
  847. {
  848. constexpr utf_32_to_8_out_iterator() {}
  849. explicit constexpr utf_32_to_8_out_iterator(Iter it) :
  850. detail::trans_ins_iter<utf_32_to_8_out_iterator<Iter>, Iter>(it)
  851. {}
  852. constexpr utf_32_to_8_out_iterator & operator=(char32_t cp)
  853. {
  854. auto & out = this->iter();
  855. out = detail::write_cp_utf8(cp, out);
  856. return *this;
  857. }
  858. };
  859. /** An insert-iterator analogous to std::insert_iterator, that also
  860. converts UTF-32 to UTF-8. */
  861. template<typename Cont>
  862. struct utf_32_to_8_insert_iterator : detail::trans_ins_iter<
  863. utf_32_to_8_insert_iterator<Cont>,
  864. std::insert_iterator<Cont>>
  865. {
  866. constexpr utf_32_to_8_insert_iterator() {}
  867. constexpr utf_32_to_8_insert_iterator(
  868. Cont & c, typename Cont::iterator it) :
  869. detail::trans_ins_iter<
  870. utf_32_to_8_insert_iterator<Cont>,
  871. std::insert_iterator<Cont>>(std::insert_iterator<Cont>(c, it))
  872. {}
  873. constexpr utf_32_to_8_insert_iterator & operator=(char32_t cp)
  874. {
  875. auto & out = this->iter();
  876. out = detail::write_cp_utf8(cp, out);
  877. return *this;
  878. }
  879. };
  880. /** An insert-iterator analogous to std::front_insert_iterator, that also
  881. converts UTF-32 to UTF-8. */
  882. template<typename Cont>
  883. struct utf_32_to_8_front_insert_iterator
  884. : detail::trans_ins_iter<
  885. utf_32_to_8_front_insert_iterator<Cont>,
  886. std::front_insert_iterator<Cont>>
  887. {
  888. constexpr utf_32_to_8_front_insert_iterator() {}
  889. explicit constexpr utf_32_to_8_front_insert_iterator(Cont & c) :
  890. detail::trans_ins_iter<
  891. utf_32_to_8_front_insert_iterator<Cont>,
  892. std::front_insert_iterator<Cont>>(
  893. std::front_insert_iterator<Cont>(c))
  894. {}
  895. constexpr utf_32_to_8_front_insert_iterator & operator=(char32_t cp)
  896. {
  897. auto & out = this->iter();
  898. out = detail::write_cp_utf8(cp, out);
  899. return *this;
  900. }
  901. };
  902. /** An insert-iterator analogous to std::back_insert_iterator, that also
  903. converts UTF-32 to UTF-8. */
  904. template<typename Cont>
  905. struct utf_32_to_8_back_insert_iterator
  906. : detail::trans_ins_iter<
  907. utf_32_to_8_back_insert_iterator<Cont>,
  908. std::back_insert_iterator<Cont>>
  909. {
  910. constexpr utf_32_to_8_back_insert_iterator() {}
  911. explicit constexpr utf_32_to_8_back_insert_iterator(Cont & c) :
  912. detail::trans_ins_iter<
  913. utf_32_to_8_back_insert_iterator<Cont>,
  914. std::back_insert_iterator<Cont>>(
  915. std::back_insert_iterator<Cont>(c))
  916. {}
  917. constexpr utf_32_to_8_back_insert_iterator & operator=(char32_t cp)
  918. {
  919. auto & out = this->iter();
  920. out = detail::write_cp_utf8(cp, out);
  921. return *this;
  922. }
  923. };
  924. namespace detail {
  925. template<typename OutIter>
  926. OutIter assign_8_to_32_insert(
  927. unsigned char cu, char32_t & cp, int & state, OutIter out)
  928. {
  929. auto write = [&] {
  930. *out = cp;
  931. ++out;
  932. state = invalid_table_state;
  933. };
  934. auto start_cp = [&] {
  935. first_cu const info = first_cus[cu];
  936. state = info.next;
  937. cp = info.initial_octet;
  938. if (state == bgn)
  939. write();
  940. };
  941. if (state == invalid_table_state) {
  942. start_cp();
  943. } else {
  944. cp = (cp << 6) | (cu & 0x3f);
  945. char_class const class_ = octet_classes[cu];
  946. state = transitions[state + class_];
  947. if (state == bgn) {
  948. write();
  949. } else if (state == err) {
  950. *out = replacement_character;
  951. ++out;
  952. start_cp();
  953. }
  954. }
  955. return out;
  956. }
  957. }
  958. /** An out iterator that converts UTF-8 to UTF-32. */
  959. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  960. template<std::output_iterator<char32_t> Iter>
  961. #else
  962. template<typename Iter>
  963. #endif
  964. struct utf_8_to_32_out_iterator
  965. : detail::trans_ins_iter<utf_8_to_32_out_iterator<Iter>, Iter>
  966. {
  967. constexpr utf_8_to_32_out_iterator() {}
  968. explicit constexpr utf_8_to_32_out_iterator(Iter it) :
  969. detail::trans_ins_iter<utf_8_to_32_out_iterator<Iter>, Iter>(it),
  970. state_(detail::invalid_table_state)
  971. {}
  972. constexpr utf_8_to_32_out_iterator & operator=(char8_type cu)
  973. {
  974. auto & out = this->iter();
  975. out = detail::assign_8_to_32_insert(cu, cp_, state_, out);
  976. return *this;
  977. }
  978. #ifndef BOOST_TEXT_DOXYGEN
  979. private:
  980. int state_;
  981. char32_t cp_;
  982. #endif
  983. };
  984. /** An insert-iterator analogous to std::insert_iterator, that also
  985. converts UTF-8 to UTF-32. */
  986. template<typename Cont>
  987. struct utf_8_to_32_insert_iterator : detail::trans_ins_iter<
  988. utf_8_to_32_insert_iterator<Cont>,
  989. std::insert_iterator<Cont>>
  990. {
  991. constexpr utf_8_to_32_insert_iterator() {}
  992. constexpr utf_8_to_32_insert_iterator(
  993. Cont & c, typename Cont::iterator it) :
  994. detail::trans_ins_iter<
  995. utf_8_to_32_insert_iterator<Cont>,
  996. std::insert_iterator<Cont>>(std::insert_iterator<Cont>(c, it)),
  997. state_(detail::invalid_table_state)
  998. {}
  999. constexpr utf_8_to_32_insert_iterator & operator=(char16_t cu)
  1000. {
  1001. auto & out = this->iter();
  1002. out = detail::assign_8_to_32_insert(cu, cp_, state_, out);
  1003. return *this;
  1004. }
  1005. #ifndef BOOST_TEXT_DOXYGEN
  1006. private:
  1007. int state_;
  1008. char32_t cp_;
  1009. #endif
  1010. };
  1011. /** An insert-iterator analogous to std::front_insert_iterator, that also
  1012. converts UTF-8 to UTF-32. */
  1013. template<typename Cont>
  1014. struct utf_8_to_32_front_insert_iterator
  1015. : detail::trans_ins_iter<
  1016. utf_8_to_32_front_insert_iterator<Cont>,
  1017. std::front_insert_iterator<Cont>>
  1018. {
  1019. constexpr utf_8_to_32_front_insert_iterator() {}
  1020. explicit constexpr utf_8_to_32_front_insert_iterator(Cont & c) :
  1021. detail::trans_ins_iter<
  1022. utf_8_to_32_front_insert_iterator<Cont>,
  1023. std::front_insert_iterator<Cont>>(
  1024. std::front_insert_iterator<Cont>(c)),
  1025. state_(detail::invalid_table_state)
  1026. {}
  1027. constexpr utf_8_to_32_front_insert_iterator & operator=(char16_t cu)
  1028. {
  1029. auto & out = this->iter();
  1030. out = detail::assign_8_to_32_insert(cu, cp_, state_, out);
  1031. return *this;
  1032. }
  1033. #ifndef BOOST_TEXT_DOXYGEN
  1034. private:
  1035. int state_;
  1036. char32_t cp_;
  1037. #endif
  1038. };
  1039. /** An insert-iterator analogous to std::back_insert_iterator, that also
  1040. converts UTF-8 to UTF-32. */
  1041. template<typename Cont>
  1042. struct utf_8_to_32_back_insert_iterator
  1043. : detail::trans_ins_iter<
  1044. utf_8_to_32_back_insert_iterator<Cont>,
  1045. std::back_insert_iterator<Cont>>
  1046. {
  1047. constexpr utf_8_to_32_back_insert_iterator() {}
  1048. explicit constexpr utf_8_to_32_back_insert_iterator(Cont & c) :
  1049. detail::trans_ins_iter<
  1050. utf_8_to_32_back_insert_iterator<Cont>,
  1051. std::back_insert_iterator<Cont>>(
  1052. std::back_insert_iterator<Cont>(c)),
  1053. state_(detail::invalid_table_state)
  1054. {}
  1055. constexpr utf_8_to_32_back_insert_iterator & operator=(char16_t cu)
  1056. {
  1057. auto & out = this->iter();
  1058. out = detail::assign_8_to_32_insert(cu, cp_, state_, out);
  1059. return *this;
  1060. }
  1061. #ifndef BOOST_TEXT_DOXYGEN
  1062. private:
  1063. int state_;
  1064. char32_t cp_;
  1065. #endif
  1066. };
  1067. /** An out iterator that converts UTF-8 to UTF-16. */
  1068. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  1069. template<std::output_iterator<char16_t> Iter>
  1070. #else
  1071. template<typename Iter>
  1072. #endif
  1073. struct utf_32_to_16_out_iterator
  1074. : detail::trans_ins_iter<utf_32_to_16_out_iterator<Iter>, Iter>
  1075. {
  1076. constexpr utf_32_to_16_out_iterator() {}
  1077. explicit constexpr utf_32_to_16_out_iterator(Iter it) :
  1078. detail::trans_ins_iter<utf_32_to_16_out_iterator<Iter>, Iter>(it)
  1079. {}
  1080. constexpr utf_32_to_16_out_iterator & operator=(char32_t cp)
  1081. {
  1082. auto & out = this->iter();
  1083. out = detail::write_cp_utf16(cp, out);
  1084. return *this;
  1085. }
  1086. };
  1087. /** An insert-iterator analogous to std::insert_iterator, that also
  1088. converts UTF-32 to UTF-16. */
  1089. template<typename Cont>
  1090. struct utf_32_to_16_insert_iterator
  1091. : detail::trans_ins_iter<
  1092. utf_32_to_16_insert_iterator<Cont>,
  1093. std::insert_iterator<Cont>>
  1094. {
  1095. constexpr utf_32_to_16_insert_iterator() {}
  1096. constexpr utf_32_to_16_insert_iterator(
  1097. Cont & c, typename Cont::iterator it) :
  1098. detail::trans_ins_iter<
  1099. utf_32_to_16_insert_iterator<Cont>,
  1100. std::insert_iterator<Cont>>(std::insert_iterator<Cont>(c, it))
  1101. {}
  1102. constexpr utf_32_to_16_insert_iterator & operator=(char32_t cp)
  1103. {
  1104. auto & out = this->iter();
  1105. out = detail::write_cp_utf16(cp, out);
  1106. return *this;
  1107. }
  1108. };
  1109. /** An insert-iterator analogous to std::front_insert_iterator, that also
  1110. converts UTF-32 to UTF-16. */
  1111. template<typename Cont>
  1112. struct utf_32_to_16_front_insert_iterator
  1113. : detail::trans_ins_iter<
  1114. utf_32_to_16_front_insert_iterator<Cont>,
  1115. std::front_insert_iterator<Cont>>
  1116. {
  1117. constexpr utf_32_to_16_front_insert_iterator() {}
  1118. explicit constexpr utf_32_to_16_front_insert_iterator(Cont & c) :
  1119. detail::trans_ins_iter<
  1120. utf_32_to_16_front_insert_iterator<Cont>,
  1121. std::front_insert_iterator<Cont>>(
  1122. std::front_insert_iterator<Cont>(c))
  1123. {}
  1124. constexpr utf_32_to_16_front_insert_iterator & operator=(char32_t cp)
  1125. {
  1126. auto & out = this->iter();
  1127. out = detail::write_cp_utf16(cp, out);
  1128. return *this;
  1129. }
  1130. };
  1131. /** An insert-iterator analogous to std::back_insert_iterator, that also
  1132. converts UTF-32 to UTF-16. */
  1133. template<typename Cont>
  1134. struct utf_32_to_16_back_insert_iterator
  1135. : detail::trans_ins_iter<
  1136. utf_32_to_16_back_insert_iterator<Cont>,
  1137. std::back_insert_iterator<Cont>>
  1138. {
  1139. constexpr utf_32_to_16_back_insert_iterator() {}
  1140. explicit constexpr utf_32_to_16_back_insert_iterator(Cont & c) :
  1141. detail::trans_ins_iter<
  1142. utf_32_to_16_back_insert_iterator<Cont>,
  1143. std::back_insert_iterator<Cont>>(
  1144. std::back_insert_iterator<Cont>(c))
  1145. {}
  1146. constexpr utf_32_to_16_back_insert_iterator & operator=(char32_t cp)
  1147. {
  1148. auto & out = this->iter();
  1149. out = detail::write_cp_utf16(cp, out);
  1150. return *this;
  1151. }
  1152. };
  1153. namespace detail {
  1154. template<typename OutIter>
  1155. OutIter
  1156. assign_16_to_32_insert(char16_t & prev_cu, char16_t cu, OutIter out)
  1157. {
  1158. if (high_surrogate(cu)) {
  1159. if (prev_cu) {
  1160. *out = replacement_character;
  1161. ++out;
  1162. }
  1163. prev_cu = cu;
  1164. } else if (low_surrogate(cu)) {
  1165. if (prev_cu) {
  1166. *out = detail::surrogates_to_cp(prev_cu, cu);
  1167. ++out;
  1168. } else {
  1169. *out = replacement_character;
  1170. ++out;
  1171. }
  1172. prev_cu = 0;
  1173. } else {
  1174. if (prev_cu) {
  1175. *out = replacement_character;
  1176. ++out;
  1177. }
  1178. *out = cu;
  1179. ++out;
  1180. prev_cu = 0;
  1181. }
  1182. return out;
  1183. }
  1184. }
  1185. /** An out iterator that converts UTF-16 to UTF-32. */
  1186. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  1187. template<std::output_iterator<char32_t> Iter>
  1188. #else
  1189. template<typename Iter>
  1190. #endif
  1191. struct utf_16_to_32_out_iterator
  1192. : detail::trans_ins_iter<utf_16_to_32_out_iterator<Iter>, Iter>
  1193. {
  1194. constexpr utf_16_to_32_out_iterator() {}
  1195. explicit constexpr utf_16_to_32_out_iterator(Iter it) :
  1196. detail::trans_ins_iter<utf_16_to_32_out_iterator<Iter>, Iter>(it),
  1197. prev_cu_(0)
  1198. {}
  1199. constexpr utf_16_to_32_out_iterator & operator=(char16_t cu)
  1200. {
  1201. auto & out = this->iter();
  1202. out = detail::assign_16_to_32_insert(prev_cu_, cu, out);
  1203. return *this;
  1204. }
  1205. #ifndef BOOST_TEXT_DOXYGEN
  1206. private:
  1207. char16_t prev_cu_;
  1208. #endif
  1209. };
  1210. /** An insert-iterator analogous to std::insert_iterator, that also
  1211. converts UTF-16 to UTF-32. */
  1212. template<typename Cont>
  1213. struct utf_16_to_32_insert_iterator
  1214. : detail::trans_ins_iter<
  1215. utf_16_to_32_insert_iterator<Cont>,
  1216. std::insert_iterator<Cont>>
  1217. {
  1218. constexpr utf_16_to_32_insert_iterator() {}
  1219. constexpr utf_16_to_32_insert_iterator(
  1220. Cont & c, typename Cont::iterator it) :
  1221. detail::trans_ins_iter<
  1222. utf_16_to_32_insert_iterator<Cont>,
  1223. std::insert_iterator<Cont>>(std::insert_iterator<Cont>(c, it)),
  1224. prev_cu_(0)
  1225. {}
  1226. constexpr utf_16_to_32_insert_iterator & operator=(char16_t cu)
  1227. {
  1228. auto & out = this->iter();
  1229. out = detail::assign_16_to_32_insert(prev_cu_, cu, out);
  1230. return *this;
  1231. }
  1232. #ifndef BOOST_TEXT_DOXYGEN
  1233. private:
  1234. char16_t prev_cu_;
  1235. #endif
  1236. };
  1237. /** An insert-iterator analogous to std::front_insert_iterator, that also
  1238. converts UTF-16 to UTF-32. */
  1239. template<typename Cont>
  1240. struct utf_16_to_32_front_insert_iterator
  1241. : detail::trans_ins_iter<
  1242. utf_16_to_32_front_insert_iterator<Cont>,
  1243. std::front_insert_iterator<Cont>>
  1244. {
  1245. constexpr utf_16_to_32_front_insert_iterator() {}
  1246. explicit constexpr utf_16_to_32_front_insert_iterator(Cont & c) :
  1247. detail::trans_ins_iter<
  1248. utf_16_to_32_front_insert_iterator<Cont>,
  1249. std::front_insert_iterator<Cont>>(
  1250. std::front_insert_iterator<Cont>(c)),
  1251. prev_cu_(0)
  1252. {}
  1253. constexpr utf_16_to_32_front_insert_iterator & operator=(char16_t cu)
  1254. {
  1255. auto & out = this->iter();
  1256. out = detail::assign_16_to_32_insert(prev_cu_, cu, out);
  1257. return *this;
  1258. }
  1259. #ifndef BOOST_TEXT_DOXYGEN
  1260. private:
  1261. char16_t prev_cu_;
  1262. #endif
  1263. };
  1264. /** An insert-iterator analogous to std::back_insert_iterator, that also
  1265. converts UTF-16 to UTF-32. */
  1266. template<typename Cont>
  1267. struct utf_16_to_32_back_insert_iterator
  1268. : detail::trans_ins_iter<
  1269. utf_16_to_32_back_insert_iterator<Cont>,
  1270. std::back_insert_iterator<Cont>>
  1271. {
  1272. constexpr utf_16_to_32_back_insert_iterator() {}
  1273. explicit constexpr utf_16_to_32_back_insert_iterator(Cont & c) :
  1274. detail::trans_ins_iter<
  1275. utf_16_to_32_back_insert_iterator<Cont>,
  1276. std::back_insert_iterator<Cont>>(
  1277. std::back_insert_iterator<Cont>(c)),
  1278. prev_cu_(0)
  1279. {}
  1280. constexpr utf_16_to_32_back_insert_iterator & operator=(char16_t cu)
  1281. {
  1282. auto & out = this->iter();
  1283. out = detail::assign_16_to_32_insert(prev_cu_, cu, out);
  1284. return *this;
  1285. }
  1286. #ifndef BOOST_TEXT_DOXYGEN
  1287. private:
  1288. char16_t prev_cu_;
  1289. #endif
  1290. };
  1291. namespace detail {
  1292. template<typename OutIter>
  1293. OutIter
  1294. assign_16_to_8_insert(char16_t & prev_cu, char16_t cu, OutIter out)
  1295. {
  1296. if (high_surrogate(cu)) {
  1297. if (prev_cu)
  1298. out = detail::write_cp_utf8(replacement_character, out);
  1299. prev_cu = cu;
  1300. } else if (low_surrogate(cu)) {
  1301. if (prev_cu) {
  1302. auto const cp = detail::surrogates_to_cp(prev_cu, cu);
  1303. out = detail::write_cp_utf8(cp, out);
  1304. } else {
  1305. out = detail::write_cp_utf8(replacement_character, out);
  1306. }
  1307. prev_cu = 0;
  1308. } else {
  1309. if (prev_cu)
  1310. out = detail::write_cp_utf8(replacement_character, out);
  1311. out = detail::write_cp_utf8(cu, out);
  1312. prev_cu = 0;
  1313. }
  1314. return out;
  1315. }
  1316. }
  1317. /** An out iterator that converts UTF-16 to UTF-8. */
  1318. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  1319. template<std::output_iterator<char8_t> Iter>
  1320. #else
  1321. template<typename Iter>
  1322. #endif
  1323. struct utf_16_to_8_out_iterator
  1324. : detail::trans_ins_iter<utf_16_to_8_out_iterator<Iter>, Iter>
  1325. {
  1326. constexpr utf_16_to_8_out_iterator() {}
  1327. explicit constexpr utf_16_to_8_out_iterator(Iter it) :
  1328. detail::trans_ins_iter<utf_16_to_8_out_iterator<Iter>, Iter>(it),
  1329. prev_cu_(0)
  1330. {}
  1331. constexpr utf_16_to_8_out_iterator & operator=(char16_t cu)
  1332. {
  1333. auto & out = this->iter();
  1334. out = detail::assign_16_to_8_insert(prev_cu_, cu, out);
  1335. return *this;
  1336. }
  1337. #ifndef BOOST_TEXT_DOXYGEN
  1338. private:
  1339. char16_t prev_cu_;
  1340. #endif
  1341. };
  1342. /** An insert-iterator analogous to std::insert_iterator, that also
  1343. converts UTF-16 to UTF-8. */
  1344. template<typename Cont>
  1345. struct utf_16_to_8_insert_iterator : detail::trans_ins_iter<
  1346. utf_16_to_8_insert_iterator<Cont>,
  1347. std::insert_iterator<Cont>>
  1348. {
  1349. constexpr utf_16_to_8_insert_iterator() {}
  1350. constexpr utf_16_to_8_insert_iterator(
  1351. Cont & c, typename Cont::iterator it) :
  1352. detail::trans_ins_iter<
  1353. utf_16_to_8_insert_iterator<Cont>,
  1354. std::insert_iterator<Cont>>(std::insert_iterator<Cont>(c, it)),
  1355. prev_cu_(0)
  1356. {}
  1357. constexpr utf_16_to_8_insert_iterator & operator=(char16_t cu)
  1358. {
  1359. auto & out = this->iter();
  1360. out = detail::assign_16_to_8_insert(prev_cu_, cu, out);
  1361. return *this;
  1362. }
  1363. #ifndef BOOST_TEXT_DOXYGEN
  1364. private:
  1365. char16_t prev_cu_;
  1366. #endif
  1367. };
  1368. /** An insert-iterator analogous to std::front_insert_iterator, that also
  1369. converts UTF-16 to UTF-8. */
  1370. template<typename Cont>
  1371. struct utf_16_to_8_front_insert_iterator
  1372. : detail::trans_ins_iter<
  1373. utf_16_to_8_front_insert_iterator<Cont>,
  1374. std::front_insert_iterator<Cont>>
  1375. {
  1376. constexpr utf_16_to_8_front_insert_iterator() {}
  1377. explicit constexpr utf_16_to_8_front_insert_iterator(Cont & c) :
  1378. detail::trans_ins_iter<
  1379. utf_16_to_8_front_insert_iterator<Cont>,
  1380. std::front_insert_iterator<Cont>>(
  1381. std::front_insert_iterator<Cont>(c)),
  1382. prev_cu_(0)
  1383. {}
  1384. constexpr utf_16_to_8_front_insert_iterator & operator=(char16_t cu)
  1385. {
  1386. auto & out = this->iter();
  1387. out = detail::assign_16_to_8_insert(prev_cu_, cu, out);
  1388. return *this;
  1389. }
  1390. #ifndef BOOST_TEXT_DOXYGEN
  1391. private:
  1392. char16_t prev_cu_;
  1393. #endif
  1394. };
  1395. /** An insert-iterator analogous to std::back_insert_iterator, that also
  1396. converts UTF-16 to UTF-8. */
  1397. template<typename Cont>
  1398. struct utf_16_to_8_back_insert_iterator
  1399. : detail::trans_ins_iter<
  1400. utf_16_to_8_back_insert_iterator<Cont>,
  1401. std::back_insert_iterator<Cont>>
  1402. {
  1403. constexpr utf_16_to_8_back_insert_iterator() {}
  1404. explicit constexpr utf_16_to_8_back_insert_iterator(Cont & c) :
  1405. detail::trans_ins_iter<
  1406. utf_16_to_8_back_insert_iterator<Cont>,
  1407. std::back_insert_iterator<Cont>>(
  1408. std::back_insert_iterator<Cont>(c)),
  1409. prev_cu_(0)
  1410. {}
  1411. constexpr utf_16_to_8_back_insert_iterator & operator=(char16_t cu)
  1412. {
  1413. auto & out = this->iter();
  1414. out = detail::assign_16_to_8_insert(prev_cu_, cu, out);
  1415. return *this;
  1416. }
  1417. #ifndef BOOST_TEXT_DOXYGEN
  1418. private:
  1419. char16_t prev_cu_;
  1420. #endif
  1421. };
  1422. namespace detail {
  1423. template<typename OutIter>
  1424. OutIter assign_8_to_16_insert(
  1425. unsigned char cu, char32_t & cp, int & state, OutIter out)
  1426. {
  1427. auto write = [&] {
  1428. out = detail::write_cp_utf16(cp, out);
  1429. state = invalid_table_state;
  1430. };
  1431. auto start_cp = [&] {
  1432. first_cu const info = first_cus[cu];
  1433. state = info.next;
  1434. cp = info.initial_octet;
  1435. if (state == bgn)
  1436. write();
  1437. };
  1438. if (state == invalid_table_state) {
  1439. start_cp();
  1440. } else {
  1441. cp = (cp << 6) | (cu & 0x3f);
  1442. char_class const class_ = octet_classes[cu];
  1443. state = transitions[state + class_];
  1444. if (state == bgn) {
  1445. write();
  1446. } else if (state == err) {
  1447. out = detail::write_cp_utf16(replacement_character, out);
  1448. start_cp();
  1449. }
  1450. }
  1451. return out;
  1452. }
  1453. }
  1454. /** An out iterator that converts UTF-8 to UTF-16. */
  1455. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  1456. template<std::output_iterator<char16_t> Iter>
  1457. #else
  1458. template<typename Iter>
  1459. #endif
  1460. struct utf_8_to_16_out_iterator
  1461. : detail::trans_ins_iter<utf_8_to_16_out_iterator<Iter>, Iter>
  1462. {
  1463. constexpr utf_8_to_16_out_iterator() {}
  1464. explicit constexpr utf_8_to_16_out_iterator(Iter it) :
  1465. detail::trans_ins_iter<utf_8_to_16_out_iterator<Iter>, Iter>(it),
  1466. state_(detail::invalid_table_state)
  1467. {}
  1468. constexpr utf_8_to_16_out_iterator & operator=(char8_type cu)
  1469. {
  1470. auto & out = this->iter();
  1471. out = detail::assign_8_to_16_insert(cu, cp_, state_, out);
  1472. return *this;
  1473. }
  1474. #ifndef BOOST_TEXT_DOXYGEN
  1475. private:
  1476. int state_;
  1477. char32_t cp_;
  1478. #endif
  1479. };
  1480. /** An insert-iterator analogous to std::insert_iterator, that also
  1481. converts UTF-8 to UTF-16. */
  1482. template<typename Cont>
  1483. struct utf_8_to_16_insert_iterator : detail::trans_ins_iter<
  1484. utf_8_to_16_insert_iterator<Cont>,
  1485. std::insert_iterator<Cont>>
  1486. {
  1487. constexpr utf_8_to_16_insert_iterator() {}
  1488. constexpr utf_8_to_16_insert_iterator(
  1489. Cont & c, typename Cont::iterator it) :
  1490. detail::trans_ins_iter<
  1491. utf_8_to_16_insert_iterator<Cont>,
  1492. std::insert_iterator<Cont>>(std::insert_iterator<Cont>(c, it)),
  1493. state_(detail::invalid_table_state)
  1494. {}
  1495. constexpr utf_8_to_16_insert_iterator & operator=(char16_t cu)
  1496. {
  1497. auto & out = this->iter();
  1498. out = detail::assign_8_to_16_insert(cu, cp_, state_, out);
  1499. return *this;
  1500. }
  1501. #ifndef BOOST_TEXT_DOXYGEN
  1502. private:
  1503. int state_;
  1504. char32_t cp_;
  1505. #endif
  1506. };
  1507. /** An insert-iterator analogous to std::front_insert_iterator, that also
  1508. converts UTF-8 to UTF-16. */
  1509. template<typename Cont>
  1510. struct utf_8_to_16_front_insert_iterator
  1511. : detail::trans_ins_iter<
  1512. utf_8_to_16_front_insert_iterator<Cont>,
  1513. std::front_insert_iterator<Cont>>
  1514. {
  1515. constexpr utf_8_to_16_front_insert_iterator() {}
  1516. explicit constexpr utf_8_to_16_front_insert_iterator(Cont & c) :
  1517. detail::trans_ins_iter<
  1518. utf_8_to_16_front_insert_iterator<Cont>,
  1519. std::front_insert_iterator<Cont>>(
  1520. std::front_insert_iterator<Cont>(c)),
  1521. state_(detail::invalid_table_state)
  1522. {}
  1523. constexpr utf_8_to_16_front_insert_iterator & operator=(char16_t cu)
  1524. {
  1525. auto & out = this->iter();
  1526. out = detail::assign_8_to_16_insert(cu, cp_, state_, out);
  1527. return *this;
  1528. }
  1529. #ifndef BOOST_TEXT_DOXYGEN
  1530. private:
  1531. int state_;
  1532. char32_t cp_;
  1533. #endif
  1534. };
  1535. /** An insert-iterator analogous to std::back_insert_iterator, that also
  1536. converts UTF-8 to UTF-16. */
  1537. template<typename Cont>
  1538. struct utf_8_to_16_back_insert_iterator
  1539. : detail::trans_ins_iter<
  1540. utf_8_to_16_back_insert_iterator<Cont>,
  1541. std::back_insert_iterator<Cont>>
  1542. {
  1543. constexpr utf_8_to_16_back_insert_iterator() {}
  1544. explicit constexpr utf_8_to_16_back_insert_iterator(Cont & c) :
  1545. detail::trans_ins_iter<
  1546. utf_8_to_16_back_insert_iterator<Cont>,
  1547. std::back_insert_iterator<Cont>>(
  1548. std::back_insert_iterator<Cont>(c)),
  1549. state_(detail::invalid_table_state)
  1550. {}
  1551. constexpr utf_8_to_16_back_insert_iterator & operator=(char16_t cu)
  1552. {
  1553. auto & out = this->iter();
  1554. out = detail::assign_8_to_16_insert(cu, cp_, state_, out);
  1555. return *this;
  1556. }
  1557. #ifndef BOOST_TEXT_DOXYGEN
  1558. private:
  1559. int state_;
  1560. char32_t cp_;
  1561. #endif
  1562. };
  1563. }}
  1564. #include <boost/parser/detail/text/unpack.hpp>
  1565. namespace boost::parser::detail { namespace text { namespace detail {
  1566. template<format Tag>
  1567. struct make_utf8_dispatch;
  1568. template<>
  1569. struct make_utf8_dispatch<format::utf8>
  1570. {
  1571. template<typename Iter, typename Sentinel>
  1572. static constexpr Iter call(Iter first, Iter it, Sentinel last)
  1573. {
  1574. return it;
  1575. }
  1576. };
  1577. template<>
  1578. struct make_utf8_dispatch<format::utf16>
  1579. {
  1580. template<typename Iter, typename Sentinel>
  1581. static constexpr utf_iterator<
  1582. format::utf16,
  1583. format::utf8,
  1584. Iter,
  1585. Sentinel>
  1586. call(Iter first, Iter it, Sentinel last)
  1587. {
  1588. return {first, it, last};
  1589. }
  1590. };
  1591. template<>
  1592. struct make_utf8_dispatch<format::utf32>
  1593. {
  1594. template<typename Iter, typename Sentinel>
  1595. static constexpr utf_iterator<
  1596. format::utf32,
  1597. format::utf8,
  1598. Iter,
  1599. Sentinel>
  1600. call(Iter first, Iter it, Sentinel last)
  1601. {
  1602. return {first, it, last};
  1603. }
  1604. };
  1605. template<format Tag>
  1606. struct make_utf16_dispatch;
  1607. template<>
  1608. struct make_utf16_dispatch<format::utf8>
  1609. {
  1610. template<typename Iter, typename Sentinel>
  1611. static constexpr utf_iterator<
  1612. format::utf8,
  1613. format::utf16,
  1614. Iter,
  1615. Sentinel>
  1616. call(Iter first, Iter it, Sentinel last)
  1617. {
  1618. return {first, it, last};
  1619. }
  1620. };
  1621. template<>
  1622. struct make_utf16_dispatch<format::utf16>
  1623. {
  1624. template<typename Iter, typename Sentinel>
  1625. static constexpr Iter call(Iter first, Iter it, Sentinel last)
  1626. {
  1627. return it;
  1628. }
  1629. };
  1630. template<>
  1631. struct make_utf16_dispatch<format::utf32>
  1632. {
  1633. template<typename Iter, typename Sentinel>
  1634. static constexpr utf_iterator<
  1635. format::utf32,
  1636. format::utf16,
  1637. Iter,
  1638. Sentinel>
  1639. call(Iter first, Iter it, Sentinel last)
  1640. {
  1641. return {first, it, last};
  1642. }
  1643. };
  1644. template<format Tag>
  1645. struct make_utf32_dispatch;
  1646. template<>
  1647. struct make_utf32_dispatch<format::utf8>
  1648. {
  1649. template<typename Iter, typename Sentinel>
  1650. static constexpr utf_iterator<
  1651. format::utf8,
  1652. format::utf32,
  1653. Iter,
  1654. Sentinel>
  1655. call(Iter first, Iter it, Sentinel last)
  1656. {
  1657. return {first, it, last};
  1658. }
  1659. };
  1660. template<>
  1661. struct make_utf32_dispatch<format::utf16>
  1662. {
  1663. template<typename Iter, typename Sentinel>
  1664. static constexpr utf_iterator<
  1665. format::utf16,
  1666. format::utf32,
  1667. Iter,
  1668. Sentinel>
  1669. call(Iter first, Iter it, Sentinel last)
  1670. {
  1671. return {first, it, last};
  1672. }
  1673. };
  1674. template<>
  1675. struct make_utf32_dispatch<format::utf32>
  1676. {
  1677. template<typename Iter, typename Sentinel>
  1678. static constexpr Iter call(Iter first, Iter it, Sentinel last)
  1679. {
  1680. return it;
  1681. }
  1682. };
  1683. template<
  1684. typename Cont,
  1685. typename UTF8,
  1686. typename UTF16,
  1687. typename UTF32,
  1688. int Bytes = sizeof(typename Cont::value_type)>
  1689. struct from_utf8_dispatch
  1690. {
  1691. using type = UTF8;
  1692. };
  1693. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1694. struct from_utf8_dispatch<Cont, UTF8, UTF16, UTF32, 2>
  1695. {
  1696. using type = UTF16;
  1697. };
  1698. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1699. struct from_utf8_dispatch<Cont, UTF8, UTF16, UTF32, 4>
  1700. {
  1701. using type = UTF32;
  1702. };
  1703. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1704. using from_utf8_dispatch_t =
  1705. typename from_utf8_dispatch<Cont, UTF8, UTF16, UTF32>::type;
  1706. template<
  1707. typename Cont,
  1708. typename UTF8,
  1709. typename UTF16,
  1710. typename UTF32,
  1711. int Bytes = sizeof(typename Cont::value_type)>
  1712. struct from_utf16_dispatch
  1713. {
  1714. using type = UTF16;
  1715. };
  1716. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1717. struct from_utf16_dispatch<Cont, UTF8, UTF16, UTF32, 1>
  1718. {
  1719. using type = UTF8;
  1720. };
  1721. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1722. struct from_utf16_dispatch<Cont, UTF8, UTF16, UTF32, 4>
  1723. {
  1724. using type = UTF32;
  1725. };
  1726. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1727. using from_utf16_dispatch_t =
  1728. typename from_utf16_dispatch<Cont, UTF8, UTF16, UTF32>::type;
  1729. template<
  1730. typename Cont,
  1731. typename UTF8,
  1732. typename UTF16,
  1733. typename UTF32,
  1734. int Bytes = sizeof(typename Cont::value_type)>
  1735. struct from_utf32_dispatch
  1736. {
  1737. using type = UTF32;
  1738. };
  1739. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1740. struct from_utf32_dispatch<Cont, UTF8, UTF16, UTF32, 1>
  1741. {
  1742. using type = UTF8;
  1743. };
  1744. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1745. struct from_utf32_dispatch<Cont, UTF8, UTF16, UTF32, 2>
  1746. {
  1747. using type = UTF16;
  1748. };
  1749. template<typename Cont, typename UTF8, typename UTF16, typename UTF32>
  1750. using from_utf32_dispatch_t =
  1751. typename from_utf32_dispatch<Cont, UTF8, UTF16, UTF32>::type;
  1752. }}}
  1753. namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V1 {
  1754. #if defined(BOOST_TEXT_DOXYGEN)
  1755. /** Returns a `utf_32_to_8_out_iterator<O>` constructed from the given
  1756. iterator. */
  1757. template<std::output_iterator<char8_t> O>
  1758. utf_32_to_8_out_iterator<O> utf_32_to_8_out(O it);
  1759. /** Returns a `utf_8_to_32_out_iterator<O>` constructed from the given
  1760. iterator. */
  1761. template<std::output_iterator<char32_t> O>
  1762. utf_8_to_32_out_iterator<O> utf_8_to_32_out(O it);
  1763. /** Returns a `utf_32_to_16_out_iterator<O>` constructed from the given
  1764. iterator. */
  1765. template<std::output_iterator<char16_t> O>
  1766. utf_32_to_16_out_iterator<O> utf_32_to_16_out(O it);
  1767. /** Returns a `utf_16_to_32_out_iterator<O>` constructed from the given
  1768. iterator. */
  1769. template<std::output_iterator<char32_t> O>
  1770. utf_16_to_32_out_iterator<O> utf_16_to_32_out(O it);
  1771. /** Returns a `utf_16_to_8_out_iterator<O>` constructed from the given
  1772. iterator. */
  1773. template<std::output_iterator<char8_t> O>
  1774. utf_16_to_8_out_iterator<O> utf_16_to_8_out(O it);
  1775. /** Returns a `utf_8_to_16_out_iterator<O>` constructed from the given
  1776. iterator. */
  1777. template<std::output_iterator<char16_t> O>
  1778. utf_8_to_16_out_iterator<O> utf_8_to_16_out(O it);
  1779. /** Returns an iterator equivalent to `it` that transcodes `[first, last)`
  1780. to UTF-8. */
  1781. template<std::input_iterator I, std::sentinel_for<I> S>
  1782. auto utf8_iterator(I first, I it, S last);
  1783. /** Returns an iterator equivalent to `it` that transcodes `[first, last)`
  1784. to UTF-16. */
  1785. template<std::input_iterator I, std::sentinel_for<I> S>
  1786. auto utf16_iterator(I first, I it, S last);
  1787. /** Returns an iterator equivalent to `it` that transcodes `[first, last)`
  1788. to UTF-32. */
  1789. template<std::input_iterator I, std::sentinel_for<I> S>
  1790. auto utf32_iterator(I first, I it, S last);
  1791. /** Returns a inserting iterator that transcodes from UTF-8 to UTF-8,
  1792. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1793. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1794. any other size implies UTF-32. */
  1795. template<typename Cont>
  1796. requires requires { typename Cont::value_type; } &&
  1797. std::is_integral_v<typename Cont::value_type>
  1798. auto from_utf8_inserter(Cont & c, typename Cont::iterator it);
  1799. /** Returns a inserting iterator that transcodes from UTF-16 to UTF-8,
  1800. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1801. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1802. any other size implies UTF-32. */
  1803. template<typename Cont>
  1804. requires requires { typename Cont::value_type; } &&
  1805. std::is_integral_v<typename Cont::value_type>
  1806. auto from_utf16_inserter(Cont & c, typename Cont::iterator it);
  1807. /** Returns a inserting iterator that transcodes from UTF-32 to UTF-8,
  1808. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1809. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1810. any other size implies UTF-32. */
  1811. template<typename Cont>
  1812. requires requires { typename Cont::value_type; } &&
  1813. std::is_integral_v<typename Cont::value_type>
  1814. auto from_utf32_inserter(Cont & c, typename Cont::iterator it);
  1815. /** Returns a back-inserting iterator that transcodes from UTF-8 to UTF-8,
  1816. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1817. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1818. any other size implies UTF-32. */
  1819. template<typename Cont>
  1820. requires requires { typename Cont::value_type; } &&
  1821. std::is_integral_v<typename Cont::value_type>
  1822. auto from_utf8_back_inserter(Cont & c);
  1823. /** Returns a back-inserting iterator that transcodes from UTF-16 to UTF-8,
  1824. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1825. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1826. any other size implies UTF-32. */
  1827. template<typename Cont>
  1828. requires requires { typename Cont::value_type; } &&
  1829. std::is_integral_v<typename Cont::value_type>
  1830. auto from_utf16_back_inserter(Cont & c);
  1831. /** Returns a back-inserting iterator that transcodes from UTF-32 to UTF-8,
  1832. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1833. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1834. any other size implies UTF-32. */
  1835. template<typename Cont>
  1836. requires requires { typename Cont::value_type; } &&
  1837. std::is_integral_v<typename Cont::value_type>
  1838. auto from_utf32_back_inserter(Cont & c);
  1839. /** Returns a front-inserting iterator that transcodes from UTF-8 to UTF-8,
  1840. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1841. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1842. any other size implies UTF-32. */
  1843. template<typename Cont>
  1844. requires requires { typename Cont::value_type; } &&
  1845. std::is_integral_v<typename Cont::value_type>
  1846. auto from_utf8_front_inserter(Cont & c);
  1847. /** Returns a front-inserting iterator that transcodes from UTF-16 to UTF-8,
  1848. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1849. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1850. any other size implies UTF-32. */
  1851. template<typename Cont>
  1852. requires requires { typename Cont::value_type; } &&
  1853. std::is_integral_v<typename Cont::value_type>
  1854. auto from_utf16_front_inserter(Cont & c);
  1855. /** Returns a front-inserting iterator that transcodes from UTF-32 to UTF-8,
  1856. UTF-16, or UTF-32. Which UTF the iterator transcodes to depends on
  1857. `sizeof(Cont::value_type)`: `1` implies UTF-8; `2` implies UTF-16; and
  1858. any other size implies UTF-32. */
  1859. template<typename Cont>
  1860. requires requires { typename Cont::value_type; } &&
  1861. std::is_integral_v<typename Cont::value_type>
  1862. auto from_utf32_front_inserter(Cont & c);
  1863. #endif
  1864. template<typename Iter>
  1865. utf_32_to_8_out_iterator<Iter> utf_32_to_8_out(Iter it)
  1866. {
  1867. return utf_32_to_8_out_iterator<Iter>(it);
  1868. }
  1869. template<typename Iter>
  1870. utf_8_to_32_out_iterator<Iter> utf_8_to_32_out(Iter it)
  1871. {
  1872. return utf_8_to_32_out_iterator<Iter>(it);
  1873. }
  1874. template<typename Iter>
  1875. utf_32_to_16_out_iterator<Iter> utf_32_to_16_out(Iter it)
  1876. {
  1877. return utf_32_to_16_out_iterator<Iter>(it);
  1878. }
  1879. template<typename Iter>
  1880. utf_16_to_32_out_iterator<Iter> utf_16_to_32_out(Iter it)
  1881. {
  1882. return utf_16_to_32_out_iterator<Iter>(it);
  1883. }
  1884. template<typename Iter>
  1885. utf_16_to_8_out_iterator<Iter> utf_16_to_8_out(Iter it)
  1886. {
  1887. return utf_16_to_8_out_iterator<Iter>(it);
  1888. }
  1889. template<typename Iter>
  1890. utf_8_to_16_out_iterator<Iter> utf_8_to_16_out(Iter it)
  1891. {
  1892. return utf_8_to_16_out_iterator<Iter>(it);
  1893. }
  1894. template<typename Iter, typename Sentinel>
  1895. auto utf8_iterator(Iter first, Iter it, Sentinel last)
  1896. {
  1897. auto const unpacked = text::unpack_iterator_and_sentinel(first, last);
  1898. auto const unpacked_it =
  1899. text::unpack_iterator_and_sentinel(it, last).first;
  1900. constexpr format tag = unpacked.format_tag;
  1901. return detail::make_utf8_dispatch<tag>::call(
  1902. unpacked.first, unpacked_it, unpacked.last);
  1903. }
  1904. template<typename Iter, typename Sentinel>
  1905. auto utf16_iterator(Iter first, Iter it, Sentinel last)
  1906. {
  1907. auto const unpacked = text::unpack_iterator_and_sentinel(first, last);
  1908. auto const unpacked_it =
  1909. text::unpack_iterator_and_sentinel(it, last).first;
  1910. constexpr format tag = unpacked.format_tag;
  1911. return detail::make_utf16_dispatch<tag>::call(
  1912. unpacked.first, unpacked_it, unpacked.last);
  1913. }
  1914. template<typename Iter, typename Sentinel>
  1915. auto utf32_iterator(Iter first, Iter it, Sentinel last)
  1916. {
  1917. auto const unpacked = text::unpack_iterator_and_sentinel(first, last);
  1918. auto const unpacked_it =
  1919. text::unpack_iterator_and_sentinel(it, last).first;
  1920. constexpr format tag = unpacked.format_tag;
  1921. return detail::make_utf32_dispatch<tag>::call(
  1922. unpacked.first, unpacked_it, unpacked.last);
  1923. }
  1924. template<typename Cont>
  1925. auto from_utf8_inserter(Cont & c, typename Cont::iterator it)
  1926. {
  1927. using result_type = detail::from_utf8_dispatch_t<
  1928. Cont,
  1929. std::insert_iterator<Cont>,
  1930. utf_8_to_16_insert_iterator<Cont>,
  1931. utf_8_to_32_insert_iterator<Cont>>;
  1932. return result_type(c, it);
  1933. }
  1934. template<typename Cont>
  1935. auto from_utf16_inserter(Cont & c, typename Cont::iterator it)
  1936. {
  1937. using result_type = detail::from_utf16_dispatch_t<
  1938. Cont,
  1939. utf_16_to_8_insert_iterator<Cont>,
  1940. std::insert_iterator<Cont>,
  1941. utf_16_to_32_insert_iterator<Cont>>;
  1942. return result_type(c, it);
  1943. }
  1944. template<typename Cont>
  1945. auto from_utf32_inserter(Cont & c, typename Cont::iterator it)
  1946. {
  1947. using result_type = detail::from_utf32_dispatch_t<
  1948. Cont,
  1949. utf_32_to_8_insert_iterator<Cont>,
  1950. utf_32_to_16_insert_iterator<Cont>,
  1951. std::insert_iterator<Cont>>;
  1952. return result_type(c, it);
  1953. }
  1954. template<typename Cont>
  1955. auto from_utf8_back_inserter(Cont & c)
  1956. {
  1957. using result_type = detail::from_utf8_dispatch_t<
  1958. Cont,
  1959. std::back_insert_iterator<Cont>,
  1960. utf_8_to_16_back_insert_iterator<Cont>,
  1961. utf_8_to_32_back_insert_iterator<Cont>>;
  1962. return result_type(c);
  1963. }
  1964. template<typename Cont>
  1965. auto from_utf16_back_inserter(Cont & c)
  1966. {
  1967. using result_type = detail::from_utf16_dispatch_t<
  1968. Cont,
  1969. utf_16_to_8_back_insert_iterator<Cont>,
  1970. std::back_insert_iterator<Cont>,
  1971. utf_16_to_32_back_insert_iterator<Cont>>;
  1972. return result_type(c);
  1973. }
  1974. template<typename Cont>
  1975. auto from_utf32_back_inserter(Cont & c)
  1976. {
  1977. using result_type = detail::from_utf32_dispatch_t<
  1978. Cont,
  1979. utf_32_to_8_back_insert_iterator<Cont>,
  1980. utf_32_to_16_back_insert_iterator<Cont>,
  1981. std::back_insert_iterator<Cont>>;
  1982. return result_type(c);
  1983. }
  1984. template<typename Cont>
  1985. auto from_utf8_front_inserter(Cont & c)
  1986. {
  1987. using result_type = detail::from_utf8_dispatch_t<
  1988. Cont,
  1989. std::front_insert_iterator<Cont>,
  1990. utf_8_to_16_front_insert_iterator<Cont>,
  1991. utf_8_to_32_front_insert_iterator<Cont>>;
  1992. return result_type(c);
  1993. }
  1994. template<typename Cont>
  1995. auto from_utf16_front_inserter(Cont & c)
  1996. {
  1997. using result_type = detail::from_utf16_dispatch_t<
  1998. Cont,
  1999. utf_16_to_8_front_insert_iterator<Cont>,
  2000. std::front_insert_iterator<Cont>,
  2001. utf_16_to_32_front_insert_iterator<Cont>>;
  2002. return result_type(c);
  2003. }
  2004. template<typename Cont>
  2005. auto from_utf32_front_inserter(Cont & c)
  2006. {
  2007. using result_type = detail::from_utf32_dispatch_t<
  2008. Cont,
  2009. utf_32_to_8_front_insert_iterator<Cont>,
  2010. utf_32_to_16_front_insert_iterator<Cont>,
  2011. std::front_insert_iterator<Cont>>;
  2012. return result_type(c);
  2013. }
  2014. }}}
  2015. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2016. namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 {
  2017. template<std::output_iterator<char8_t> O>
  2018. constexpr utf_32_to_8_out_iterator<O> utf_32_to_8_out(O it)
  2019. {
  2020. return utf_32_to_8_out_iterator<O>(it);
  2021. }
  2022. template<std::output_iterator<char32_t> O>
  2023. constexpr utf_8_to_32_out_iterator<O> utf_8_to_32_out(O it)
  2024. {
  2025. return utf_8_to_32_out_iterator<O>(it);
  2026. }
  2027. template<std::output_iterator<char16_t> O>
  2028. constexpr utf_32_to_16_out_iterator<O> utf_32_to_16_out(O it)
  2029. {
  2030. return utf_32_to_16_out_iterator<O>(it);
  2031. }
  2032. template<std::output_iterator<char32_t> O>
  2033. constexpr utf_16_to_32_out_iterator<O> utf_16_to_32_out(O it)
  2034. {
  2035. return utf_16_to_32_out_iterator<O>(it);
  2036. }
  2037. template<std::output_iterator<char8_t> O>
  2038. constexpr utf_16_to_8_out_iterator<O> utf_16_to_8_out(O it)
  2039. {
  2040. return utf_16_to_8_out_iterator<O>(it);
  2041. }
  2042. template<std::output_iterator<char16_t> O>
  2043. constexpr utf_8_to_16_out_iterator<O> utf_8_to_16_out(O it)
  2044. {
  2045. return utf_8_to_16_out_iterator<O>(it);
  2046. }
  2047. template<std::input_iterator I, std::sentinel_for<I> S>
  2048. constexpr auto utf8_iterator(I first, I it, S last)
  2049. {
  2050. return v1::utf8_iterator(first, it, last);
  2051. }
  2052. template<std::input_iterator I, std::sentinel_for<I> S>
  2053. constexpr auto utf16_iterator(I first, I it, S last)
  2054. {
  2055. return v1::utf16_iterator(first, it, last);
  2056. }
  2057. template<std::input_iterator I, std::sentinel_for<I> S>
  2058. constexpr auto utf32_iterator(I first, I it, S last)
  2059. {
  2060. return v1::utf32_iterator(first, it, last);
  2061. }
  2062. template<typename Cont>
  2063. requires requires { typename Cont::value_type; } &&
  2064. utf_code_unit<typename Cont::value_type>
  2065. constexpr auto from_utf8_inserter(Cont & c, typename Cont::iterator it)
  2066. {
  2067. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2068. return std::insert_iterator<Cont>(c, it);
  2069. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2070. return utf_8_to_16_insert_iterator<Cont>(c, it);
  2071. } else {
  2072. return utf_8_to_32_insert_iterator<Cont>(c, it);
  2073. }
  2074. }
  2075. template<typename Cont>
  2076. requires requires { typename Cont::value_type; } &&
  2077. utf_code_unit<typename Cont::value_type>
  2078. constexpr auto from_utf16_inserter(Cont & c, typename Cont::iterator it)
  2079. {
  2080. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2081. return utf_16_to_8_insert_iterator<Cont>(c, it);
  2082. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2083. return std::insert_iterator<Cont>(c, it);
  2084. } else {
  2085. return utf_16_to_32_insert_iterator<Cont>(c, it);
  2086. }
  2087. }
  2088. template<typename Cont>
  2089. requires requires { typename Cont::value_type; } &&
  2090. utf_code_unit<typename Cont::value_type>
  2091. constexpr auto from_utf32_inserter(Cont & c, typename Cont::iterator it)
  2092. {
  2093. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2094. return utf_32_to_8_insert_iterator<Cont>(c, it);
  2095. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2096. return utf_32_to_16_insert_iterator<Cont>(c, it);
  2097. } else {
  2098. return std::insert_iterator<Cont>(c, it);
  2099. }
  2100. }
  2101. template<typename Cont>
  2102. requires requires { typename Cont::value_type; } &&
  2103. utf_code_unit<typename Cont::value_type>
  2104. constexpr auto from_utf8_back_inserter(Cont & c)
  2105. {
  2106. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2107. return std::back_insert_iterator<Cont>(c);
  2108. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2109. return utf_8_to_16_back_insert_iterator<Cont>(c);
  2110. } else {
  2111. return utf_8_to_32_back_insert_iterator<Cont>(c);
  2112. }
  2113. }
  2114. template<typename Cont>
  2115. requires requires { typename Cont::value_type; } &&
  2116. utf_code_unit<typename Cont::value_type>
  2117. constexpr auto from_utf16_back_inserter(Cont & c)
  2118. {
  2119. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2120. return utf_16_to_8_back_insert_iterator<Cont>(c);
  2121. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2122. return std::back_insert_iterator<Cont>(c);
  2123. } else {
  2124. return utf_16_to_32_back_insert_iterator<Cont>(c);
  2125. }
  2126. }
  2127. template<typename Cont>
  2128. requires requires { typename Cont::value_type; } &&
  2129. utf_code_unit<typename Cont::value_type>
  2130. constexpr auto from_utf32_back_inserter(Cont & c)
  2131. {
  2132. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2133. return utf_32_to_8_back_insert_iterator<Cont>(c);
  2134. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2135. return utf_32_to_16_back_insert_iterator<Cont>(c);
  2136. } else {
  2137. return std::back_insert_iterator<Cont>(c);
  2138. }
  2139. }
  2140. template<typename Cont>
  2141. requires requires { typename Cont::value_type; } &&
  2142. utf_code_unit<typename Cont::value_type>
  2143. constexpr auto from_utf8_front_inserter(Cont & c)
  2144. {
  2145. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2146. return std::front_insert_iterator<Cont>(c);
  2147. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2148. return utf_8_to_16_front_insert_iterator<Cont>(c);
  2149. } else {
  2150. return utf_8_to_32_front_insert_iterator<Cont>(c);
  2151. }
  2152. }
  2153. template<typename Cont>
  2154. requires requires { typename Cont::value_type; } &&
  2155. utf_code_unit<typename Cont::value_type>
  2156. constexpr auto from_utf16_front_inserter(Cont & c)
  2157. {
  2158. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2159. return utf_16_to_8_front_insert_iterator<Cont>(c);
  2160. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2161. return std::front_insert_iterator<Cont>(c);
  2162. } else {
  2163. return utf_16_to_32_front_insert_iterator<Cont>(c);
  2164. }
  2165. }
  2166. template<typename Cont>
  2167. requires requires { typename Cont::value_type; } &&
  2168. utf_code_unit<typename Cont::value_type>
  2169. constexpr auto from_utf32_front_inserter(Cont & c)
  2170. {
  2171. if constexpr (sizeof(typename Cont::value_type) == 1) {
  2172. return utf_32_to_8_front_insert_iterator<Cont>(c);
  2173. } else if constexpr (sizeof(typename Cont::value_type) == 2) {
  2174. return utf_32_to_16_front_insert_iterator<Cont>(c);
  2175. } else {
  2176. return std::front_insert_iterator<Cont>(c);
  2177. }
  2178. }
  2179. }}}
  2180. #endif
  2181. namespace boost::parser::detail { namespace text {
  2182. namespace detail {
  2183. template<format Format>
  2184. constexpr auto format_to_type()
  2185. {
  2186. if constexpr (Format == format::utf8) {
  2187. return char8_type{};
  2188. } else if constexpr (Format == format::utf16) {
  2189. return char16_t{};
  2190. } else {
  2191. return char32_t{};
  2192. }
  2193. }
  2194. template<typename I>
  2195. constexpr bool is_bidi =
  2196. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2197. std::bidirectional_iterator<I>
  2198. #else
  2199. std::is_base_of_v<
  2200. std::bidirectional_iterator_tag,
  2201. typename std::iterator_traits<I>::iterator_category>
  2202. #endif
  2203. ;
  2204. template<typename I, bool SupportReverse = is_bidi<I>>
  2205. struct first_and_curr
  2206. {
  2207. first_and_curr() = default;
  2208. first_and_curr(I curr) : curr{curr} {}
  2209. first_and_curr(const first_and_curr & other) = default;
  2210. template<
  2211. class I2
  2212. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2213. ,
  2214. typename Enable = std::enable_if_t<std::is_convertible_v<I2, I>>
  2215. #endif
  2216. >
  2217. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2218. requires std::convertible_to<I2, I>
  2219. #endif
  2220. first_and_curr(const first_and_curr<I2> & other) : curr{other.curr}
  2221. {}
  2222. I curr;
  2223. };
  2224. template<typename I>
  2225. struct first_and_curr<I, true>
  2226. {
  2227. first_and_curr() = default;
  2228. first_and_curr(I first, I curr) : first{first}, curr{curr} {}
  2229. first_and_curr(const first_and_curr & other) = default;
  2230. template<
  2231. class I2
  2232. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2233. ,
  2234. typename Enable = std::enable_if_t<std::is_convertible_v<I2, I>>
  2235. #endif
  2236. >
  2237. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2238. requires std::convertible_to<I2, I>
  2239. #endif
  2240. first_and_curr(const first_and_curr<I2> & other) :
  2241. first{other.first}, curr{other.curr}
  2242. {}
  2243. I first;
  2244. I curr;
  2245. };
  2246. }
  2247. namespace detail {
  2248. struct iter_access
  2249. {
  2250. template<typename T>
  2251. static auto & buf(T & it)
  2252. {
  2253. return it.buf_;
  2254. }
  2255. template<typename T>
  2256. static auto & first_and_curr(T & it)
  2257. {
  2258. return it.first_and_curr_;
  2259. }
  2260. template<typename T>
  2261. static auto & buf_index(T & it)
  2262. {
  2263. return it.buf_index_;
  2264. }
  2265. template<typename T>
  2266. static auto & buf_last(T & it)
  2267. {
  2268. return it.buf_last_;
  2269. }
  2270. template<typename T>
  2271. static auto & to_increment(T & it)
  2272. {
  2273. return it.to_increment_;
  2274. }
  2275. template<typename T>
  2276. static auto & last(T & it)
  2277. {
  2278. return it.last_;
  2279. }
  2280. };
  2281. }
  2282. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2283. template<
  2284. format FromFormat,
  2285. format ToFormat,
  2286. std::input_iterator I,
  2287. std::sentinel_for<I> S,
  2288. transcoding_error_handler ErrorHandler>
  2289. requires std::convertible_to<std::iter_value_t<I>, detail::format_to_type_t<FromFormat>>
  2290. #else
  2291. template<
  2292. format FromFormat,
  2293. format ToFormat,
  2294. typename I,
  2295. typename S,
  2296. typename ErrorHandler>
  2297. #endif
  2298. class utf_iterator
  2299. : public stl_interfaces::iterator_interface<
  2300. utf_iterator<FromFormat, ToFormat, I, S, ErrorHandler>,
  2301. detail::bidirectional_at_most_t<I>,
  2302. detail::format_to_type_t<ToFormat>,
  2303. detail::format_to_type_t<ToFormat>>
  2304. {
  2305. static_assert(
  2306. FromFormat == format::utf8 || FromFormat == format::utf16 ||
  2307. FromFormat == format::utf32);
  2308. static_assert(
  2309. ToFormat == format::utf8 || ToFormat == format::utf16 ||
  2310. ToFormat == format::utf32);
  2311. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2312. template<typename T>
  2313. constexpr static bool is_bidirectional = std::is_base_of_v<
  2314. std::bidirectional_iterator_tag,
  2315. detail::bidirectional_at_most_t<T>>;
  2316. template<typename T>
  2317. constexpr static bool is_forward = std::is_base_of_v<
  2318. std::forward_iterator_tag,
  2319. detail::bidirectional_at_most_t<T>>;
  2320. template<typename T>
  2321. constexpr static bool is_input = !is_bidirectional<T> && !is_forward<T>;
  2322. #endif
  2323. static_assert(
  2324. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2325. std::forward_iterator<I>
  2326. #else
  2327. is_forward<I>
  2328. #endif
  2329. || noexcept(ErrorHandler{}("")));
  2330. public:
  2331. using value_type = detail::format_to_type_t<ToFormat>;
  2332. constexpr utf_iterator() = default;
  2333. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2334. template<
  2335. typename J = I,
  2336. typename Enable = std::enable_if_t<is_bidirectional<J>>>
  2337. #endif
  2338. constexpr utf_iterator(I first, I it, S last)
  2339. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2340. requires std::bidirectional_iterator<I>
  2341. #endif
  2342. : first_and_curr_{first, it}, last_(last)
  2343. {
  2344. if (curr() != last_)
  2345. read();
  2346. }
  2347. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2348. template<
  2349. typename J = I,
  2350. typename Enable = std::enable_if_t<!is_bidirectional<J>>>
  2351. #endif
  2352. constexpr utf_iterator(I it, S last)
  2353. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2354. requires(!std::bidirectional_iterator<I>)
  2355. #endif
  2356. :
  2357. first_and_curr_{it}, last_(last)
  2358. {
  2359. if (curr() != last_)
  2360. read();
  2361. }
  2362. template<
  2363. class I2,
  2364. class S2
  2365. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2366. ,
  2367. typename Enable = std::enable_if_t<
  2368. std::is_convertible_v<I2, I> && std::is_convertible_v<S2, S>>
  2369. #endif
  2370. >
  2371. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2372. requires std::convertible_to<I2, I> && std::convertible_to<S2, S>
  2373. #endif
  2374. constexpr utf_iterator(
  2375. utf_iterator<FromFormat, ToFormat, I2, S2, ErrorHandler> const &
  2376. other) :
  2377. buf_(detail::iter_access::buf(other)),
  2378. first_and_curr_(detail::iter_access::first_and_curr(other)),
  2379. buf_index_(detail::iter_access::buf_index(other)),
  2380. buf_last_(detail::iter_access::buf_last(other)),
  2381. to_increment_(detail::iter_access::to_increment(other)),
  2382. last_(detail::iter_access::last(other))
  2383. {}
  2384. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2385. template<
  2386. typename J = I,
  2387. typename Enable = std::enable_if_t<is_bidirectional<J>>>
  2388. #endif
  2389. constexpr I begin() const
  2390. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2391. requires std::bidirectional_iterator<I>
  2392. #endif
  2393. {
  2394. return first();
  2395. }
  2396. constexpr S end() const { return last_; }
  2397. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2398. template<
  2399. typename J = I,
  2400. typename Enable = std::enable_if_t<is_forward<J>>>
  2401. #endif
  2402. constexpr I base() const
  2403. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2404. requires std::forward_iterator<I>
  2405. #endif
  2406. {
  2407. return curr();
  2408. }
  2409. constexpr value_type operator*() const
  2410. {
  2411. BOOST_PARSER_DEBUG_ASSERT(buf_index_ < buf_last_);
  2412. return buf_[buf_index_];
  2413. }
  2414. constexpr utf_iterator & operator++()
  2415. {
  2416. BOOST_PARSER_DEBUG_ASSERT(buf_index_ != buf_last_ || curr() != last_);
  2417. if (buf_index_ + 1 == buf_last_ && curr() != last_) {
  2418. if constexpr (
  2419. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2420. std::forward_iterator<I>
  2421. #else
  2422. is_forward<I>
  2423. #endif
  2424. ) {
  2425. std::advance(curr(), to_increment_);
  2426. }
  2427. if (curr() == last_)
  2428. buf_index_ = 0;
  2429. else
  2430. read();
  2431. } else if (buf_index_ + 1 <= buf_last_) {
  2432. ++buf_index_;
  2433. }
  2434. return *this;
  2435. }
  2436. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2437. template<
  2438. typename J = I,
  2439. typename Enable = std::enable_if_t<is_bidirectional<J>>>
  2440. #endif
  2441. constexpr utf_iterator & operator--()
  2442. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2443. requires std::bidirectional_iterator<I>
  2444. #endif
  2445. {
  2446. BOOST_PARSER_DEBUG_ASSERT(buf_index_ || curr() != first());
  2447. if (!buf_index_ && curr() != first())
  2448. read_reverse();
  2449. else if (buf_index_)
  2450. --buf_index_;
  2451. return *this;
  2452. }
  2453. friend constexpr bool operator==(
  2454. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2455. utf_iterator
  2456. #else
  2457. std::enable_if_t<is_forward<I>, utf_iterator>
  2458. #endif
  2459. lhs, utf_iterator rhs)
  2460. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2461. requires std::forward_iterator<I> || requires(I i) { i == i; }
  2462. #endif
  2463. {
  2464. if constexpr (
  2465. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2466. std::forward_iterator<I>
  2467. #else
  2468. is_forward<I>
  2469. #endif
  2470. ) {
  2471. return lhs.curr() == rhs.curr() && lhs.buf_index_ == rhs.buf_index_;
  2472. } else {
  2473. if (lhs.curr() != rhs.curr())
  2474. return false;
  2475. if (lhs.buf_index_ == rhs.buf_index_ &&
  2476. lhs.buf_last_ == rhs.buf_last_) {
  2477. return true;
  2478. }
  2479. return lhs.buf_index_ == lhs.buf_last_ &&
  2480. rhs.buf_index_ == rhs.buf_last_;
  2481. }
  2482. }
  2483. #if !defined(__cpp_impl_three_way_comparison)
  2484. friend BOOST_PARSER_CONSTEXPR bool operator!=(
  2485. std::enable_if_t<is_forward<I>, utf_iterator> lhs, utf_iterator rhs)
  2486. { return !(lhs == rhs); }
  2487. #endif
  2488. friend constexpr bool operator==(utf_iterator lhs, S rhs)
  2489. {
  2490. if constexpr (
  2491. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2492. std::forward_iterator<I>
  2493. #else
  2494. is_forward<I>
  2495. #endif
  2496. ) {
  2497. return lhs.curr() == rhs;
  2498. } else {
  2499. return lhs.curr() == rhs && lhs.buf_index_ == lhs.buf_last_;
  2500. }
  2501. }
  2502. #if !defined(__cpp_impl_three_way_comparison)
  2503. friend BOOST_PARSER_CONSTEXPR bool operator!=(utf_iterator lhs, S rhs)
  2504. { return !(lhs == rhs); }
  2505. #endif
  2506. // exposition only
  2507. using base_type = stl_interfaces::iterator_interface<
  2508. utf_iterator<FromFormat, ToFormat, I, S, ErrorHandler>,
  2509. detail::bidirectional_at_most_t<I>,
  2510. value_type,
  2511. value_type>;
  2512. using base_type::operator++;
  2513. using base_type::operator--;
  2514. private:
  2515. constexpr char32_t decode_code_point()
  2516. {
  2517. if constexpr (FromFormat == format::utf8) {
  2518. char32_t cp = *curr();
  2519. ++curr();
  2520. to_increment_ = 1;
  2521. if (cp < 0x80)
  2522. return cp;
  2523. // clang-format off
  2524. // It turns out that this naive implementation is faster than
  2525. // the table implementation for the converting iterators.
  2526. /*
  2527. Unicode 3.9/D92
  2528. Table 3-7. Well-Formed UTF-8 Byte Sequences
  2529. Code Points First Byte Second Byte Third Byte Fourth Byte
  2530. =========== ========== =========== ========== ===========
  2531. U+0000..U+007F 00..7F
  2532. U+0080..U+07FF C2..DF 80..BF
  2533. U+0800..U+0FFF E0 A0..BF 80..BF
  2534. U+1000..U+CFFF E1..EC 80..BF 80..BF
  2535. U+D000..U+D7FF ED 80..9F 80..BF
  2536. U+E000..U+FFFF EE..EF 80..BF 80..BF
  2537. U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
  2538. U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
  2539. U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
  2540. */
  2541. // clang-format on
  2542. unsigned char curr_c = (unsigned char)cp;
  2543. auto error = [&]() {
  2544. return ErrorHandler{}("Ill-formed UTF-8.");
  2545. };
  2546. auto next = [&]() {
  2547. ++curr();
  2548. ++to_increment_;
  2549. };
  2550. // One-byte case handled above
  2551. // Two-byte
  2552. if (detail::in(0xc2, curr_c, 0xdf)) {
  2553. cp = curr_c & 0b00011111;
  2554. if (curr() == last_)
  2555. return error();
  2556. curr_c = *curr();
  2557. if (!detail::in(0x80, curr_c, 0xbf))
  2558. return error();
  2559. cp = (cp << 6) + (curr_c & 0b00111111);
  2560. next();
  2561. // Three-byte
  2562. } else if (curr_c == 0xe0) {
  2563. cp = curr_c & 0b00001111;
  2564. if (curr() == last_)
  2565. return error();
  2566. curr_c = *curr();
  2567. if (!detail::in(0xa0, curr_c, 0xbf))
  2568. return error();
  2569. cp = (cp << 6) + (curr_c & 0b00111111);
  2570. next();
  2571. if (curr() == last_)
  2572. return error();
  2573. curr_c = *curr();
  2574. if (!detail::in(0x80, curr_c, 0xbf))
  2575. return error();
  2576. cp = (cp << 6) + (curr_c & 0b00111111);
  2577. next();
  2578. } else if (detail::in(0xe1, curr_c, 0xec)) {
  2579. cp = curr_c & 0b00001111;
  2580. if (curr() == last_)
  2581. return error();
  2582. curr_c = *curr();
  2583. if (!detail::in(0x80, curr_c, 0xbf))
  2584. return error();
  2585. cp = (cp << 6) + (curr_c & 0b00111111);
  2586. next();
  2587. if (curr() == last_)
  2588. return error();
  2589. curr_c = *curr();
  2590. if (!detail::in(0x80, curr_c, 0xbf))
  2591. return error();
  2592. cp = (cp << 6) + (curr_c & 0b00111111);
  2593. next();
  2594. } else if (curr_c == 0xed) {
  2595. cp = curr_c & 0b00001111;
  2596. if (curr() == last_)
  2597. return error();
  2598. curr_c = *curr();
  2599. if (!detail::in(0x80, curr_c, 0x9f))
  2600. return error();
  2601. cp = (cp << 6) + (curr_c & 0b00111111);
  2602. next();
  2603. if (curr() == last_)
  2604. return error();
  2605. curr_c = *curr();
  2606. if (!detail::in(0x80, curr_c, 0xbf))
  2607. return error();
  2608. cp = (cp << 6) + (curr_c & 0b00111111);
  2609. next();
  2610. } else if (detail::in(0xee, curr_c, 0xef)) {
  2611. cp = curr_c & 0b00001111;
  2612. if (curr() == last_)
  2613. return error();
  2614. curr_c = *curr();
  2615. if (!detail::in(0x80, curr_c, 0xbf))
  2616. return error();
  2617. cp = (cp << 6) + (curr_c & 0b00111111);
  2618. next();
  2619. if (curr() == last_)
  2620. return error();
  2621. curr_c = *curr();
  2622. if (!detail::in(0x80, curr_c, 0xbf))
  2623. return error();
  2624. cp = (cp << 6) + (curr_c & 0b00111111);
  2625. next();
  2626. // Four-byte
  2627. } else if (curr_c == 0xf0) {
  2628. cp = curr_c & 0b00000111;
  2629. if (curr() == last_)
  2630. return error();
  2631. curr_c = *curr();
  2632. if (!detail::in(0x90, curr_c, 0xbf))
  2633. return error();
  2634. cp = (cp << 6) + (curr_c & 0b00111111);
  2635. next();
  2636. if (curr() == last_)
  2637. return error();
  2638. curr_c = *curr();
  2639. if (!detail::in(0x80, curr_c, 0xbf))
  2640. return error();
  2641. cp = (cp << 6) + (curr_c & 0b00111111);
  2642. next();
  2643. if (curr() == last_)
  2644. return error();
  2645. curr_c = *curr();
  2646. if (!detail::in(0x80, curr_c, 0xbf))
  2647. return error();
  2648. cp = (cp << 6) + (curr_c & 0b00111111);
  2649. next();
  2650. } else if (detail::in(0xf1, curr_c, 0xf3)) {
  2651. cp = curr_c & 0b00000111;
  2652. if (curr() == last_)
  2653. return error();
  2654. curr_c = *curr();
  2655. if (!detail::in(0x80, curr_c, 0xbf))
  2656. return error();
  2657. cp = (cp << 6) + (curr_c & 0b00111111);
  2658. next();
  2659. if (curr() == last_)
  2660. return error();
  2661. curr_c = *curr();
  2662. if (!detail::in(0x80, curr_c, 0xbf))
  2663. return error();
  2664. cp = (cp << 6) + (curr_c & 0b00111111);
  2665. next();
  2666. if (curr() == last_)
  2667. return error();
  2668. curr_c = *curr();
  2669. if (!detail::in(0x80, curr_c, 0xbf))
  2670. return error();
  2671. cp = (cp << 6) + (curr_c & 0b00111111);
  2672. next();
  2673. } else if (curr_c == 0xf4) {
  2674. cp = curr_c & 0b00000111;
  2675. if (curr() == last_)
  2676. return error();
  2677. curr_c = *curr();
  2678. if (!detail::in(0x80, curr_c, 0x8f))
  2679. return error();
  2680. cp = (cp << 6) + (curr_c & 0b00111111);
  2681. next();
  2682. if (curr() == last_)
  2683. return error();
  2684. curr_c = *curr();
  2685. if (!detail::in(0x80, curr_c, 0xbf))
  2686. return error();
  2687. cp = (cp << 6) + (curr_c & 0b00111111);
  2688. next();
  2689. if (curr() == last_)
  2690. return error();
  2691. curr_c = *curr();
  2692. if (!detail::in(0x80, curr_c, 0xbf))
  2693. return error();
  2694. cp = (cp << 6) + (curr_c & 0b00111111);
  2695. next();
  2696. } else {
  2697. return error();
  2698. }
  2699. return cp;
  2700. } else if constexpr (FromFormat == format::utf16) {
  2701. char16_t hi = *curr();
  2702. ++curr();
  2703. to_increment_ = 1;
  2704. if (!boost::parser::detail::text::surrogate(hi))
  2705. return hi;
  2706. if (boost::parser::detail::text::low_surrogate(hi)) {
  2707. return ErrorHandler{}(
  2708. "Invalid UTF-16 sequence; lone trailing surrogate.");
  2709. }
  2710. // high surrogate
  2711. if (curr() == last_) {
  2712. return ErrorHandler{}(
  2713. "Invalid UTF-16 sequence; lone leading surrogate.");
  2714. }
  2715. char16_t lo = *curr();
  2716. ++curr();
  2717. ++to_increment_;
  2718. if (!boost::parser::detail::text::low_surrogate(lo)) {
  2719. return ErrorHandler{}(
  2720. "Invalid UTF-16 sequence; lone leading surrogate.");
  2721. }
  2722. return char32_t((hi - high_surrogate_base) << 10) +
  2723. (lo - low_surrogate_base);
  2724. } else {
  2725. char32_t retval = *curr();
  2726. ++curr();
  2727. to_increment_ = 1;
  2728. return retval;
  2729. }
  2730. }
  2731. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2732. template<
  2733. typename J = I,
  2734. typename Enable = std::enable_if_t<is_bidirectional<J>>>
  2735. #endif
  2736. constexpr char32_t decode_code_point_reverse()
  2737. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2738. requires std::bidirectional_iterator<I>
  2739. #endif
  2740. {
  2741. if constexpr (FromFormat == format::utf8) {
  2742. curr() = detail::decrement(first(), curr());
  2743. auto initial = curr();
  2744. char32_t cp = decode_code_point();
  2745. curr() = initial;
  2746. return cp;
  2747. } else if constexpr (FromFormat == format::utf16) {
  2748. char16_t lo = *--curr();
  2749. if (!boost::parser::detail::text::surrogate(lo))
  2750. return lo;
  2751. if (boost::parser::detail::text::high_surrogate(lo)) {
  2752. return ErrorHandler{}(
  2753. "Invalid UTF-16 sequence; lone leading surrogate.");
  2754. }
  2755. // low surrogate
  2756. if (curr() == first()) {
  2757. return ErrorHandler{}(
  2758. "Invalid UTF-16 sequence; lone trailing surrogate.");
  2759. }
  2760. char16_t hi = *detail::prev(curr());
  2761. if (!boost::parser::detail::text::high_surrogate(hi)) {
  2762. return ErrorHandler{}(
  2763. "Invalid UTF-16 sequence; lone trailing surrogate.");
  2764. }
  2765. --curr();
  2766. return char32_t((hi - high_surrogate_base) << 10) +
  2767. (lo - low_surrogate_base);
  2768. } else {
  2769. return *--curr();
  2770. }
  2771. }
  2772. template<class Out>
  2773. static constexpr Out encode_code_point(char32_t cp, Out out)
  2774. {
  2775. if constexpr (ToFormat == format::utf8) {
  2776. if (cp < 0x80) {
  2777. *out++ = static_cast<char8_type>(cp);
  2778. } else if (cp < 0x800) {
  2779. *out++ = static_cast<char8_type>(0xC0 + (cp >> 6));
  2780. *out++ = static_cast<char8_type>(0x80 + (cp & 0x3f));
  2781. } else if (cp < 0x10000) {
  2782. *out++ = static_cast<char8_type>(0xe0 + (cp >> 12));
  2783. *out++ = static_cast<char8_type>(0x80 + ((cp >> 6) & 0x3f));
  2784. *out++ = static_cast<char8_type>(0x80 + (cp & 0x3f));
  2785. } else {
  2786. *out++ = static_cast<char8_type>(0xf0 + (cp >> 18));
  2787. *out++ = static_cast<char8_type>(0x80 + ((cp >> 12) & 0x3f));
  2788. *out++ = static_cast<char8_type>(0x80 + ((cp >> 6) & 0x3f));
  2789. *out++ = static_cast<char8_type>(0x80 + (cp & 0x3f));
  2790. }
  2791. } else if constexpr (ToFormat == format::utf16) {
  2792. if (cp < 0x10000) {
  2793. *out++ = static_cast<char16_t>(cp);
  2794. } else {
  2795. *out++ =
  2796. static_cast<char16_t>(cp >> 10) + high_surrogate_base;
  2797. *out++ =
  2798. static_cast<char16_t>(cp & 0x3ff) + low_surrogate_base;
  2799. }
  2800. } else {
  2801. *out++ = cp;
  2802. }
  2803. return out;
  2804. }
  2805. constexpr void read()
  2806. {
  2807. I initial;
  2808. if constexpr (
  2809. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2810. std::forward_iterator<I>
  2811. #else
  2812. is_forward<I>
  2813. #endif
  2814. ) {
  2815. initial = curr();
  2816. }
  2817. if constexpr (noexcept(ErrorHandler{}(""))) {
  2818. char32_t cp = decode_code_point();
  2819. auto it = encode_code_point(cp, buf_.begin());
  2820. buf_index_ = 0;
  2821. buf_last_ = uint8_t(it - buf_.begin());
  2822. } else {
  2823. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2824. auto buf = buf_;
  2825. try {
  2826. #endif
  2827. char32_t cp = decode_code_point();
  2828. auto it = encode_code_point(cp, buf_.begin());
  2829. buf_index_ = 0;
  2830. buf_last_ = it - buf_.begin();
  2831. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2832. } catch (...) {
  2833. buf_ = buf;
  2834. curr() = initial;
  2835. throw;
  2836. }
  2837. #endif
  2838. }
  2839. if constexpr (
  2840. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2841. std::forward_iterator<I>
  2842. #else
  2843. is_forward<I>
  2844. #endif
  2845. ) {
  2846. curr() = initial;
  2847. }
  2848. }
  2849. constexpr void read_reverse()
  2850. {
  2851. auto initial = curr();
  2852. if constexpr (noexcept(ErrorHandler{}(""))) {
  2853. char32_t cp = decode_code_point_reverse();
  2854. auto it = encode_code_point(cp, buf_.begin());
  2855. buf_last_ = uint8_t(it - buf_.begin());
  2856. buf_index_ = buf_last_ - 1;
  2857. to_increment_ = (int)std::distance(curr(), initial);
  2858. } else {
  2859. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2860. auto buf = buf_;
  2861. try {
  2862. #endif
  2863. char32_t cp = decode_code_point_reverse();
  2864. auto it = encode_code_point(cp, buf_.begin());
  2865. buf_last_ = it - buf_.begin();
  2866. buf_index_ = buf_last_ - 1;
  2867. to_increment_ = std::distance(curr(), initial);
  2868. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2869. } catch (...) {
  2870. buf_ = buf;
  2871. curr() = initial;
  2872. throw;
  2873. }
  2874. #endif
  2875. }
  2876. }
  2877. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2878. template<
  2879. typename J = I,
  2880. typename Enable = std::enable_if_t<is_bidirectional<J>>>
  2881. #endif
  2882. constexpr I first() const
  2883. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2884. requires std::bidirectional_iterator<I>
  2885. #endif
  2886. {
  2887. return first_and_curr_.first;
  2888. }
  2889. constexpr I & curr() { return first_and_curr_.curr; }
  2890. constexpr I curr() const { return first_and_curr_.curr; }
  2891. std::array<value_type, 4 / static_cast<int>(ToFormat)> buf_ = {};
  2892. detail::first_and_curr<I> first_and_curr_ = {};
  2893. uint8_t buf_index_ = 0;
  2894. uint8_t buf_last_ = 0;
  2895. uint8_t to_increment_ = 0;
  2896. [[no_unique_address]] S last_ = {};
  2897. friend struct detail::iter_access;
  2898. };
  2899. }}
  2900. namespace boost::parser::detail { namespace text { namespace detail {
  2901. template<class T>
  2902. constexpr bool is_utf_iter = false;
  2903. template<
  2904. format FromFormat,
  2905. format ToFormat,
  2906. class I,
  2907. class S,
  2908. class ErrorHandler>
  2909. constexpr bool
  2910. is_utf_iter<utf_iterator<FromFormat, ToFormat, I, S, ErrorHandler>> =
  2911. true;
  2912. // These are here because so many downstream views that use
  2913. // utf_iterator use them.
  2914. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2915. template<typename V>
  2916. constexpr bool common_range_v = std::ranges::common_range<V>;
  2917. template<typename V>
  2918. constexpr bool forward_range_v = std::ranges::forward_range<V>;
  2919. template<typename V>
  2920. constexpr bool bidirectional_range_v = std::ranges::bidirectional_range<V>;
  2921. template<typename T>
  2922. constexpr bool default_initializable_v = std::default_initializable<T>;
  2923. template<typename V>
  2924. constexpr bool utf32_range_v = utf32_range<V>;
  2925. #else
  2926. template<typename T>
  2927. using range_expr =
  2928. decltype(detail::begin(std::declval<T &>()) == detail::end(std::declval<T &>()));
  2929. template<typename T>
  2930. constexpr bool is_range_v = is_detected_v<range_expr, T>;
  2931. template<typename V>
  2932. constexpr bool common_range_v =
  2933. is_range_v<V> && std::is_same_v<iterator_t<V>, sentinel_t<V>>;
  2934. template<typename V>
  2935. constexpr bool input_range_v = is_range_v<V> && std::is_base_of_v<
  2936. std::input_iterator_tag,
  2937. typename std::iterator_traits<iterator_t<V>>::iterator_category>;
  2938. template<typename V>
  2939. constexpr bool forward_range_v = is_range_v<V> && std::is_base_of_v<
  2940. std::forward_iterator_tag,
  2941. typename std::iterator_traits<iterator_t<V>>::iterator_category>;
  2942. template<typename V>
  2943. constexpr bool bidirectional_range_v = is_range_v<V> && std::is_base_of_v<
  2944. std::bidirectional_iterator_tag,
  2945. typename std::iterator_traits<iterator_t<V>>::iterator_category>;
  2946. template<typename T>
  2947. constexpr bool default_initializable_v = std::is_default_constructible_v<T>;
  2948. template<typename V>
  2949. constexpr bool utf_range_v = is_range_v<V> && code_unit_v<range_value_t<V>>;
  2950. template<typename V>
  2951. constexpr bool
  2952. utf32_range_v = is_range_v<V> &&
  2953. (
  2954. #if !defined(_MSC_VER)
  2955. std::is_same_v<range_value_t<V>, wchar_t> ||
  2956. #endif
  2957. std::is_same_v<range_value_t<V>, char32_t>);
  2958. #endif
  2959. template<typename I>
  2960. constexpr bool random_access_iterator_v = std::is_base_of_v<
  2961. std::random_access_iterator_tag,
  2962. typename std::iterator_traits<I>::iterator_category>;
  2963. template<typename I>
  2964. constexpr bool bidirectional_iterator_v = std::is_base_of_v<
  2965. std::bidirectional_iterator_tag,
  2966. typename std::iterator_traits<I>::iterator_category>;
  2967. template<typename I>
  2968. constexpr bool forward_iterator_v = std::is_base_of_v<
  2969. std::forward_iterator_tag,
  2970. typename std::iterator_traits<I>::iterator_category>;
  2971. template<
  2972. class V,
  2973. bool StoreFirst = !is_utf_iter<iterator_t<V>> && common_range_v<V> &&
  2974. bidirectional_range_v<V>,
  2975. bool StoreLast = !is_utf_iter<iterator_t<V>>>
  2976. struct first_last_storage
  2977. {
  2978. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2979. template<
  2980. typename Enable = std::enable_if_t<
  2981. default_initializable_v<iterator_t<V>> &&
  2982. default_initializable_v<sentinel_t<V>>>>
  2983. #endif
  2984. constexpr first_last_storage()
  2985. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  2986. requires default_initializable_v<iterator_t<V>> &&
  2987. default_initializable_v<sentinel_t<V>>
  2988. #endif
  2989. {}
  2990. constexpr first_last_storage(V & base) :
  2991. first_{detail::begin(base)}, last_{detail::end(base)}
  2992. {}
  2993. constexpr auto begin(iterator_t<V> & it) const { return first_; }
  2994. constexpr auto end(iterator_t<V> & it) const { return last_; }
  2995. iterator_t<V> first_;
  2996. sentinel_t<V> last_;
  2997. };
  2998. template<typename I>
  2999. using trinary_iter_ctor = decltype(I(
  3000. std::declval<I>().begin(),
  3001. std::declval<I>().end(),
  3002. std::declval<I>().end()));
  3003. template<class V>
  3004. struct first_last_storage<V, true, false>
  3005. {
  3006. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  3007. template<
  3008. typename Enable =
  3009. std::enable_if_t<default_initializable_v<iterator_t<V>>>>
  3010. #endif
  3011. constexpr first_last_storage()
  3012. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  3013. requires default_initializable_v<iterator_t<V>>
  3014. #endif
  3015. {}
  3016. constexpr first_last_storage(V & base) : first_{detail::begin(base)} {}
  3017. constexpr auto begin(iterator_t<V> & it) const { return first_; }
  3018. constexpr auto end(iterator_t<V> & it) const {
  3019. if constexpr (
  3020. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  3021. requires { iterator_t<V>(it.begin(), it.end(), it.end()); }
  3022. #else
  3023. is_detected_v<trinary_iter_ctor, iterator_t<V>>
  3024. #endif
  3025. ) {
  3026. return iterator_t<V>(it.begin(), it.end(), it.end());
  3027. } else {
  3028. return it.end();
  3029. }
  3030. }
  3031. iterator_t<V> first_;
  3032. };
  3033. template<class V>
  3034. struct first_last_storage<V, false, true>
  3035. {
  3036. #if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  3037. template<
  3038. typename Enable =
  3039. std::enable_if_t<default_initializable_v<sentinel_t<V>>>>
  3040. #endif
  3041. constexpr first_last_storage()
  3042. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  3043. requires default_initializable_v<sentinel_t<V>>
  3044. #endif
  3045. {}
  3046. constexpr first_last_storage(V & base) : last_{detail::end(base)} {}
  3047. constexpr auto begin(iterator_t<V> & it) const {
  3048. if constexpr (is_utf_iter<iterator_t<V>>) {
  3049. return iterator_t<V>(it.begin(), it.begin(), it.end());
  3050. } else {
  3051. return;
  3052. }
  3053. }
  3054. constexpr auto end(iterator_t<V> & it) const { return last_; }
  3055. sentinel_t<V> last_;
  3056. };
  3057. template<class V>
  3058. struct first_last_storage<V, false, false>
  3059. {
  3060. constexpr first_last_storage() = default;
  3061. constexpr first_last_storage(V & base) {}
  3062. constexpr auto begin(iterator_t<V> & it) const {
  3063. if constexpr (is_utf_iter<iterator_t<V>>) {
  3064. return iterator_t<V>(it.begin(), it.begin(), it.end());
  3065. } else {
  3066. return;
  3067. }
  3068. }
  3069. constexpr auto end(iterator_t<V> & it) const {
  3070. if constexpr (
  3071. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  3072. requires { iterator_t<V>(it.begin(), it.end(), it.end()); }
  3073. #else
  3074. is_detected_v<trinary_iter_ctor, iterator_t<V>>
  3075. #endif
  3076. ) {
  3077. return iterator_t<V>(it.begin(), it.end(), it.end());
  3078. } else {
  3079. return it.end();
  3080. }
  3081. }
  3082. };
  3083. template<class V>
  3084. constexpr auto uc_view_category() {
  3085. if constexpr (common_range_v<V> && bidirectional_range_v<V>) {
  3086. return std::bidirectional_iterator_tag{};
  3087. } else {
  3088. return std::forward_iterator_tag{};
  3089. }
  3090. }
  3091. template<class V>
  3092. using uc_view_category_t = decltype(uc_view_category<V>());
  3093. template<bool Const, class T>
  3094. using maybe_const = std::conditional_t<Const, const T, T>;
  3095. template<class T>
  3096. constexpr bool is_empty_view = false;
  3097. #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS
  3098. template<class T>
  3099. constexpr bool is_empty_view<std::ranges::empty_view<T>> = true;
  3100. #endif
  3101. }}}
  3102. #endif