utf8_mqtt.hpp 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. //
  2. // Copyright (c) 2023-2025 Ivica Siladic, Bruno Iljazovic, Korina Simicevic
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. #ifndef BOOST_MQTT5_UTF8_MQTT_HPP
  8. #define BOOST_MQTT5_UTF8_MQTT_HPP
  9. #include <cstdint>
  10. #include <string>
  11. #include <string_view>
  12. #include <utility>
  13. namespace boost::mqtt5::detail {
  14. enum class validation_result : uint8_t {
  15. valid = 0,
  16. has_wildcard_character,
  17. invalid
  18. };
  19. inline int pop_front_unichar(std::string_view& s) {
  20. // assuming that s.length() is > 0
  21. int n = s[0] & 0xF0;
  22. int ch = -1;
  23. if ((n & 0x80) == 0) {
  24. ch = s[0];
  25. s.remove_prefix(1);
  26. }
  27. else if ((n == 0xC0 || n == 0xD0) && s.size() > 1) {
  28. ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
  29. s.remove_prefix(2);
  30. }
  31. else if ((n == 0xE0) && s.size() > 2) {
  32. ch = ((s[0] & 0x1F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
  33. s.remove_prefix(3);
  34. }
  35. else if ((n == 0xF0) && s.size() > 3) {
  36. ch = ((s[0] & 0x1F) << 18) | ((s[1] & 0x3F) << 12) |
  37. ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
  38. s.remove_prefix(4);
  39. }
  40. return ch;
  41. }
  42. inline validation_result validate_mqtt_utf8_char(int c) {
  43. constexpr int fe_flag = 0xFE;
  44. constexpr int ff_flag = 0xFF;
  45. constexpr int multi_lvl_wildcard = '#';
  46. constexpr int single_lvl_wildcard = '+';
  47. if (c == multi_lvl_wildcard || c == single_lvl_wildcard)
  48. return validation_result::has_wildcard_character;
  49. if (c > 0x001F && // U+0000...U+001F control characters
  50. (c < 0x007F || c > 0x009F) && // U+007F...0+009F control characters
  51. (c < 0xD800 || c > 0xDFFF) && // U+D800...U+DFFF surrogates
  52. (c < 0xFDD0 || c > 0xFDEF) && // U+FDD0...U+FDEF non-characters
  53. (c & fe_flag) != fe_flag && // non-characters
  54. (c & ff_flag) != ff_flag
  55. )
  56. return validation_result::valid;
  57. return validation_result::invalid;
  58. }
  59. inline bool is_valid_string_size(size_t sz) {
  60. constexpr size_t max_sz = 65535;
  61. return sz <= max_sz;
  62. }
  63. inline bool is_utf8(validation_result result) {
  64. return result == validation_result::valid ||
  65. result == validation_result::has_wildcard_character;
  66. }
  67. template <typename ValidSizeCondition, typename ValidCondition>
  68. validation_result validate_impl(
  69. std::string_view str,
  70. ValidSizeCondition&& size_condition, ValidCondition&& condition
  71. ) {
  72. if (!size_condition(str.size()))
  73. return validation_result::invalid;
  74. validation_result result;
  75. while (!str.empty()) {
  76. int c = pop_front_unichar(str);
  77. result = validate_mqtt_utf8_char(c);
  78. if (!condition(result))
  79. return result;
  80. }
  81. return validation_result::valid;
  82. }
  83. inline validation_result validate_mqtt_utf8(std::string_view str) {
  84. return validate_impl(str, is_valid_string_size, is_utf8);
  85. }
  86. inline bool is_valid_string_pair(
  87. const std::pair<std::string, std::string>& str_pair
  88. ) {
  89. return validate_mqtt_utf8(str_pair.first) == validation_result::valid &&
  90. validate_mqtt_utf8(str_pair.second) == validation_result::valid;
  91. }
  92. } // namespace boost::mqtt5::detail
  93. #endif //BOOST_MQTT5_UTF8_MQTT_HPP