case_fold.hpp 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #ifndef BOOST_PARSER_DETAIL_CASE_FOLD_HPP
  2. #define BOOST_PARSER_DETAIL_CASE_FOLD_HPP
  3. #include <boost/parser/config.hpp>
  4. #include <boost/parser/detail/text/transcode_iterator.hpp>
  5. #include <boost/parser/detail/case_fold_data_generated.hpp>
  6. #include <algorithm>
  7. namespace boost::parser::detail {
  8. template<typename I>
  9. std::optional<I> do_short_mapping(
  10. short_mapping_range const * first,
  11. short_mapping_range const * last,
  12. char32_t cp,
  13. I out)
  14. {
  15. auto it = std::lower_bound(
  16. first,
  17. last,
  18. cp,
  19. [](short_mapping_range const & range, char32_t cp) {
  20. return range.cp_first_ < cp;
  21. });
  22. if (it != first) {
  23. auto const prev = it - 1;
  24. if (prev->cp_first_ <= cp && cp < prev->cp_last_)
  25. it = prev;
  26. }
  27. if (it != last && it->cp_first_ <= cp && cp < it->cp_last_) {
  28. auto const offset = cp - it->cp_first_;
  29. if (offset % it->stride_ == 0) {
  30. *out++ =
  31. single_mapping_cps[it->first_idx_ + offset / it->stride_];
  32. return out;
  33. }
  34. }
  35. return std::nullopt;
  36. }
  37. template<typename I>
  38. I case_fold(char32_t cp, I out)
  39. {
  40. // One-byte fast path.
  41. if (cp < 0x100) {
  42. // ASCII letter fast path.
  43. if (0x61 <= cp && cp <= 0x7a) {
  44. *out++ = cp;
  45. return out;
  46. } else if (0x41 <= cp && cp <= 0x5a) {
  47. *out++ = cp + 0x20;
  48. return out;
  49. } else if (cp == 0x00DF) {
  50. // The lone multi-mapping below 0x100.
  51. *out++ = 0x0073;
  52. *out++ = 0x0073;
  53. return out;
  54. } else {
  55. // Skip [0x41, 0x5a), handled above.
  56. auto const first = text::detail::begin(mapping_ranges) + 1;
  57. // 7th entry starts with 0x100.
  58. auto const last = text::detail::begin(mapping_ranges) + 7;
  59. if (auto out_opt = do_short_mapping(first, last, cp, out))
  60. return *out_opt;
  61. }
  62. *out++ = cp;
  63. return out;
  64. }
  65. // Single-cp-mapping path (next most common case).
  66. {
  67. auto const first = text::detail::begin(mapping_ranges);
  68. auto const last = text::detail::end(mapping_ranges);
  69. if (auto out_opt = do_short_mapping(first, last, cp, out))
  70. return *out_opt;
  71. }
  72. // Multi-cp mapping path.
  73. {
  74. auto const last = detail::text::detail::end(long_mappings);
  75. auto const it = std::lower_bound(
  76. detail::text::detail::begin(long_mappings),
  77. last,
  78. cp,
  79. [](long_mapping const & mapping, char32_t cp) {
  80. return mapping.cp_ < cp;
  81. });
  82. if (it != last && it->cp_ == cp) {
  83. #if BOOST_PARSER_USE_CONCEPTS
  84. return std::ranges::copy(it->mapping_, text::null_sentinel, out)
  85. .out;
  86. #else
  87. return std::copy(
  88. it->mapping_,
  89. std::find(
  90. text::detail::begin(it->mapping_),
  91. text::detail::end(it->mapping_),
  92. 0),
  93. out);
  94. #endif
  95. }
  96. }
  97. *out++ = cp;
  98. return out;
  99. }
  100. }
  101. #endif