parser.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. // Copyright 2023 Matt Borland
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // https://www.boost.org/LICENSE_1_0.txt
  4. #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
  5. #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
  6. #include <boost/json/detail/charconv/detail/config.hpp>
  7. #include <boost/json/detail/charconv/detail/from_chars_result.hpp>
  8. #include <boost/json/detail/charconv/detail/from_chars_integer_impl.hpp>
  9. #include <boost/json/detail/charconv/detail/integer_search_trees.hpp>
  10. #include <boost/json/detail/charconv/limits.hpp>
  11. #include <boost/json/detail/charconv/chars_format.hpp>
  12. #include <system_error>
  13. #include <type_traits>
  14. #include <limits>
  15. #include <cerrno>
  16. #include <cstdint>
  17. #include <cstring>
  18. #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
  19. # pragma GCC diagnostic push
  20. # pragma GCC diagnostic ignored "-Wmissing-field-initializers"
  21. #endif
  22. namespace boost { namespace json { namespace detail { namespace charconv { namespace detail {
  23. inline bool is_integer_char(char c) noexcept
  24. {
  25. return (c >= '0') && (c <= '9');
  26. }
  27. inline bool is_hex_char(char c) noexcept
  28. {
  29. return is_integer_char(c) || (((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')));
  30. }
  31. inline bool is_delimiter(char c, chars_format fmt) noexcept
  32. {
  33. if (fmt != chars_format::hex)
  34. {
  35. return !is_integer_char(c) && c != 'e' && c != 'E';
  36. }
  37. return !is_hex_char(c) && c != 'p' && c != 'P';
  38. }
  39. template <typename Unsigned_Integer, typename Integer>
  40. inline from_chars_result parser(const char* first, const char* last, bool& sign, Unsigned_Integer& significand, Integer& exponent, chars_format fmt = chars_format::general) noexcept
  41. {
  42. if (first > last)
  43. {
  44. return {first, std::errc::invalid_argument};
  45. }
  46. auto next = first;
  47. bool all_zeros = true;
  48. // First extract the sign
  49. if (*next == '-')
  50. {
  51. sign = true;
  52. ++next;
  53. }
  54. else if (*next == '+')
  55. {
  56. return {next, std::errc::invalid_argument};
  57. }
  58. else
  59. {
  60. sign = false;
  61. }
  62. // Ignore leading zeros (e.g. 00005 or -002.3e+5)
  63. while (*next == '0' && next != last)
  64. {
  65. ++next;
  66. }
  67. // If the number is 0 we can abort now
  68. char exp_char;
  69. char capital_exp_char;
  70. if (fmt != chars_format::hex)
  71. {
  72. exp_char = 'e';
  73. capital_exp_char = 'E';
  74. }
  75. else
  76. {
  77. exp_char = 'p';
  78. capital_exp_char = 'P';
  79. }
  80. if (next == last || *next == exp_char || *next == -capital_exp_char)
  81. {
  82. significand = 0;
  83. exponent = 0;
  84. return {next, std::errc()};
  85. }
  86. // Next we get the significand
  87. constexpr std::size_t significand_buffer_size = limits<Unsigned_Integer>::max_chars10 - 1; // Base 10 or 16
  88. char significand_buffer[significand_buffer_size] {};
  89. std::size_t i = 0;
  90. std::size_t dot_position = 0;
  91. Integer extra_zeros = 0;
  92. Integer leading_zero_powers = 0;
  93. const auto char_validation_func = (fmt != charconv::chars_format::hex) ? is_integer_char : is_hex_char;
  94. const int base = (fmt != charconv::chars_format::hex) ? 10 : 16;
  95. while (char_validation_func(*next) && next != last && i < significand_buffer_size)
  96. {
  97. all_zeros = false;
  98. significand_buffer[i] = *next;
  99. ++next;
  100. ++i;
  101. }
  102. bool fractional = false;
  103. if (next == last)
  104. {
  105. // if fmt is chars_format::scientific the e is required
  106. if (fmt == chars_format::scientific)
  107. {
  108. return {first, std::errc::invalid_argument};
  109. }
  110. exponent = 0;
  111. std::size_t offset = i;
  112. from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
  113. switch (r.ec)
  114. {
  115. case std::errc::invalid_argument:
  116. return {first, std::errc::invalid_argument};
  117. case std::errc::result_out_of_range:
  118. return {next, std::errc::result_out_of_range};
  119. default:
  120. return {next, std::errc()};
  121. }
  122. }
  123. else if (*next == '.')
  124. {
  125. ++next;
  126. fractional = true;
  127. dot_position = i;
  128. // Process the fractional part if we have it
  129. //
  130. // if fmt is chars_format::scientific the e is required
  131. // if fmt is chars_format::fixed and not scientific the e is disallowed
  132. // if fmt is chars_format::general (which is scientific and fixed) the e is optional
  133. // If we have the value 0.00001 we can continue to chop zeros and adjust the exponent
  134. // so that we get the useful parts of the fraction
  135. if (all_zeros)
  136. {
  137. while (*next == '0' && next != last)
  138. {
  139. ++next;
  140. --leading_zero_powers;
  141. }
  142. if (next == last)
  143. {
  144. return {last, std::errc()};
  145. }
  146. }
  147. while (char_validation_func(*next) && next != last && i < significand_buffer_size)
  148. {
  149. significand_buffer[i] = *next;
  150. ++next;
  151. ++i;
  152. }
  153. }
  154. if (i == significand_buffer_size)
  155. {
  156. // We can not process any more significant figures into the significand so skip to the end
  157. // or the exponent part and capture the additional orders of magnitude for the exponent
  158. bool found_dot = false;
  159. while ((char_validation_func(*next) || *next == '.') && next != last)
  160. {
  161. ++next;
  162. if (!fractional && !found_dot)
  163. {
  164. ++extra_zeros;
  165. }
  166. if (*next == '.')
  167. {
  168. found_dot = true;
  169. }
  170. }
  171. }
  172. if (next == last || is_delimiter(*next, fmt))
  173. {
  174. if (fmt == chars_format::scientific)
  175. {
  176. return {first, std::errc::invalid_argument};
  177. }
  178. if (dot_position != 0 || fractional)
  179. {
  180. exponent = static_cast<Integer>(dot_position) - i + extra_zeros + leading_zero_powers;
  181. }
  182. else
  183. {
  184. exponent = extra_zeros + leading_zero_powers;
  185. }
  186. std::size_t offset = i;
  187. from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
  188. switch (r.ec)
  189. {
  190. case std::errc::invalid_argument:
  191. return {first, std::errc::invalid_argument};
  192. case std::errc::result_out_of_range:
  193. return {next, std::errc::result_out_of_range};
  194. default:
  195. return {next, std::errc()};
  196. }
  197. }
  198. else if (*next == exp_char || *next == capital_exp_char)
  199. {
  200. // Would be a number without a significand e.g. e+03
  201. if (next == first)
  202. {
  203. return {next, std::errc::invalid_argument};
  204. }
  205. ++next;
  206. if (fmt == chars_format::fixed)
  207. {
  208. return {first, std::errc::invalid_argument};
  209. }
  210. exponent = i - 1;
  211. std::size_t offset = i;
  212. bool round = false;
  213. // If more digits are present than representable in the significand of the target type
  214. // we set the maximum
  215. if (offset > significand_buffer_size)
  216. {
  217. offset = significand_buffer_size - 1;
  218. i = significand_buffer_size;
  219. if (significand_buffer[offset] == '5' ||
  220. significand_buffer[offset] == '6' ||
  221. significand_buffer[offset] == '7' ||
  222. significand_buffer[offset] == '8' ||
  223. significand_buffer[offset] == '9')
  224. {
  225. round = true;
  226. }
  227. }
  228. // If the significand is 0 from chars will return std::errc::invalid_argument because there is nothing in the buffer,
  229. // but it is a valid value. We need to continue parsing to get the correct value of ptr even
  230. // though we know we could bail now.
  231. //
  232. // See GitHub issue #29: https://github.com/cppalliance/charconv/issues/29
  233. if (offset != 0)
  234. {
  235. from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
  236. switch (r.ec)
  237. {
  238. case std::errc::invalid_argument:
  239. return {first, std::errc::invalid_argument};
  240. case std::errc::result_out_of_range:
  241. return {next, std::errc::result_out_of_range};
  242. default:
  243. break;
  244. }
  245. if (round)
  246. {
  247. significand += 1;
  248. }
  249. }
  250. }
  251. else
  252. {
  253. return {first, std::errc::invalid_argument};
  254. }
  255. // Finally we get the exponent
  256. constexpr std::size_t exponent_buffer_size = 6; // Float128 min exp is −16382
  257. char exponent_buffer[exponent_buffer_size] {};
  258. Integer significand_digits = i;
  259. i = 0;
  260. // Get the sign first
  261. if (*next == '-')
  262. {
  263. exponent_buffer[i] = *next;
  264. ++next;
  265. ++i;
  266. }
  267. else if (*next == '+')
  268. {
  269. ++next;
  270. }
  271. // Next strip any leading zeros
  272. while (*next == '0')
  273. {
  274. ++next;
  275. }
  276. // Process the significant values
  277. while (is_integer_char(*next) && next != last && i < exponent_buffer_size)
  278. {
  279. exponent_buffer[i] = *next;
  280. ++next;
  281. ++i;
  282. }
  283. // If the exponent can't fit in the buffer the number is not representable
  284. if (next != last && i == exponent_buffer_size)
  285. {
  286. return {next, std::errc::result_out_of_range};
  287. }
  288. // If the exponent was e+00 or e-00
  289. if (i == 0 || (i == 1 && exponent_buffer[0] == '-'))
  290. {
  291. if (fractional)
  292. {
  293. exponent = static_cast<Integer>(dot_position) - significand_digits;
  294. }
  295. else
  296. {
  297. exponent = extra_zeros;
  298. }
  299. return {next, std::errc()};
  300. }
  301. const auto r = from_chars(exponent_buffer, exponent_buffer + i, exponent);
  302. exponent += leading_zero_powers;
  303. switch (r.ec)
  304. {
  305. case std::errc::invalid_argument:
  306. return {first, std::errc::invalid_argument};
  307. case std::errc::result_out_of_range:
  308. return {next, std::errc::result_out_of_range};
  309. default:
  310. if (fractional)
  311. {
  312. // Need to take the offset from 1.xxx because compute_floatXXX assumes the significand is an integer
  313. // so the exponent is off by the number of digits in the significand - 1
  314. if (fmt == chars_format::hex)
  315. {
  316. // In hex the number of digits parsed is possibly less than the number of digits in base10
  317. exponent -= num_digits(significand) - dot_position;
  318. }
  319. else
  320. {
  321. exponent -= significand_digits - dot_position;
  322. }
  323. }
  324. else
  325. {
  326. exponent += extra_zeros;
  327. }
  328. return {next, std::errc()};
  329. }
  330. }
  331. }}}}} // Namespaces
  332. #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
  333. # pragma GCC diagnostic pop
  334. #endif
  335. #endif // BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP