parser.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. // Copyright 2023 Matt Borland
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // https://www.boost.org/LICENSE_1_0.txt
  4. #ifndef BOOST_CHARCONV_DETAIL_PARSER_HPP
  5. #define BOOST_CHARCONV_DETAIL_PARSER_HPP
  6. #include <boost/charconv/detail/config.hpp>
  7. #include <boost/charconv/detail/from_chars_result.hpp>
  8. #include <boost/charconv/detail/from_chars_integer_impl.hpp>
  9. #include <boost/charconv/detail/integer_search_trees.hpp>
  10. #include <boost/charconv/limits.hpp>
  11. #include <boost/charconv/chars_format.hpp>
  12. #include <system_error>
  13. #include <type_traits>
  14. #include <limits>
  15. #include <cerrno>
  16. #include <cstdint>
  17. #include <cstring>
  18. #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
  19. # pragma GCC diagnostic push
  20. # pragma GCC diagnostic ignored "-Wmissing-field-initializers"
  21. #endif
  22. namespace boost { namespace charconv { namespace detail {
  23. inline bool is_integer_char(char c) noexcept
  24. {
  25. return (c >= '0') && (c <= '9');
  26. }
  27. inline bool is_hex_char(char c) noexcept
  28. {
  29. return is_integer_char(c) || (((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')));
  30. }
  31. inline bool is_delimiter(char c, chars_format fmt) noexcept
  32. {
  33. if (fmt != chars_format::hex)
  34. {
  35. return !is_integer_char(c) && c != 'e' && c != 'E';
  36. }
  37. return !is_hex_char(c) && c != 'p' && c != 'P';
  38. }
  39. inline from_chars_result from_chars_dispatch(const char* first, const char* last, std::uint64_t& value, int base) noexcept
  40. {
  41. return boost::charconv::detail::from_chars(first, last, value, base);
  42. }
  43. inline from_chars_result from_chars_dispatch(const char* first, const char* last, uint128& value, int base) noexcept
  44. {
  45. return boost::charconv::detail::from_chars128(first, last, value, base);
  46. }
  47. #ifdef BOOST_CHARCONV_HAS_INT128
  48. inline from_chars_result from_chars_dispatch(const char* first, const char* last, boost::uint128_type& value, int base) noexcept
  49. {
  50. return boost::charconv::detail::from_chars128(first, last, value, base);
  51. }
  52. #endif
  53. template<typename Unsigned_Integer>
  54. typename std::enable_if<std::is_unsigned<Unsigned_Integer>::value &&
  55. std::numeric_limits<Unsigned_Integer>::is_integer &&
  56. sizeof(Unsigned_Integer) < sizeof(std::uint64_t),
  57. from_chars_result>::type
  58. from_chars_dispatch(const char* first, const char* last, Unsigned_Integer& value, int base) noexcept
  59. {
  60. std::uint64_t tmp_value;
  61. auto result = boost::charconv::detail::from_chars(first, last, tmp_value, base);
  62. if (result) {
  63. if (tmp_value > (std::numeric_limits<Unsigned_Integer>::max)())
  64. result.ec = std::errc::result_out_of_range;
  65. else
  66. value = static_cast<Unsigned_Integer>(tmp_value);
  67. }
  68. return result;
  69. }
  70. template <typename Unsigned_Integer, typename Integer>
  71. inline from_chars_result parser(const char* first, const char* last, bool& sign, Unsigned_Integer& significand, Integer& exponent, chars_format fmt = chars_format::general) noexcept
  72. {
  73. if (first > last)
  74. {
  75. return {first, std::errc::invalid_argument};
  76. }
  77. auto next = first;
  78. bool all_zeros = true;
  79. // First extract the sign
  80. if (*next == '-')
  81. {
  82. sign = true;
  83. ++next;
  84. }
  85. else if (*next == '+')
  86. {
  87. return {next, std::errc::invalid_argument};
  88. }
  89. else
  90. {
  91. sign = false;
  92. }
  93. // Handle non-finite values
  94. // Stl allows for string like "iNf" to return inf
  95. //
  96. // This is nested ifs rather than a big one-liner to ensure that once we hit an invalid character
  97. // or an end of buffer we return the correct value of next
  98. if (next != last && (*next == 'i' || *next == 'I'))
  99. {
  100. ++next;
  101. if (next != last && (*next == 'n' || *next == 'N'))
  102. {
  103. ++next;
  104. if (next != last && (*next == 'f' || *next == 'F'))
  105. {
  106. significand = 0;
  107. return {next, std::errc::value_too_large};
  108. }
  109. }
  110. return {next, std::errc::invalid_argument};
  111. }
  112. else if (next != last && (*next == 'n' || *next == 'N'))
  113. {
  114. ++next;
  115. if (next != last && (*next == 'a' || *next == 'A'))
  116. {
  117. ++next;
  118. if (next != last && (*next == 'n' || *next == 'N'))
  119. {
  120. ++next;
  121. if (next != last && (*next == '('))
  122. {
  123. ++next;
  124. if (next != last && (*next == 's' || *next == 'S'))
  125. {
  126. significand = 1;
  127. return {next, std::errc::not_supported};
  128. }
  129. else if (next != last && (*next == 'i' || *next == 'I'))
  130. {
  131. significand = 0;
  132. return {next, std::errc::not_supported};
  133. }
  134. }
  135. else
  136. {
  137. significand = 0;
  138. return {next, std::errc::not_supported};
  139. }
  140. }
  141. }
  142. return {next, std::errc::invalid_argument};
  143. }
  144. // Ignore leading zeros (e.g. 00005 or -002.3e+5)
  145. while (next != last && *next == '0')
  146. {
  147. ++next;
  148. }
  149. // If the number is 0 we can abort now
  150. char exp_char;
  151. char capital_exp_char;
  152. if (fmt != chars_format::hex)
  153. {
  154. exp_char = 'e';
  155. capital_exp_char = 'E';
  156. }
  157. else
  158. {
  159. exp_char = 'p';
  160. capital_exp_char = 'P';
  161. }
  162. if (next == last || *next == exp_char || *next == -capital_exp_char)
  163. {
  164. significand = 0;
  165. exponent = 0;
  166. return {next, std::errc()};
  167. }
  168. // Next we get the significand
  169. constexpr std::size_t significand_buffer_size = limits<Unsigned_Integer>::max_chars10; // Base 10 or 16
  170. char significand_buffer[significand_buffer_size] {};
  171. std::size_t i = 0;
  172. std::size_t dot_position = 0;
  173. Integer extra_zeros = 0;
  174. Integer leading_zero_powers = 0;
  175. const auto char_validation_func = (fmt != boost::charconv::chars_format::hex) ? is_integer_char : is_hex_char;
  176. const int base = (fmt != boost::charconv::chars_format::hex) ? 10 : 16;
  177. while (next != last && char_validation_func(*next) && i < significand_buffer_size)
  178. {
  179. all_zeros = false;
  180. significand_buffer[i] = *next;
  181. ++next;
  182. ++i;
  183. }
  184. bool fractional = false;
  185. if (next == last)
  186. {
  187. // if fmt is chars_format::scientific the e is required
  188. if (fmt == chars_format::scientific)
  189. {
  190. return {first, std::errc::invalid_argument};
  191. }
  192. exponent = 0;
  193. std::size_t offset = i;
  194. from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
  195. switch (r.ec)
  196. {
  197. case std::errc::invalid_argument:
  198. return {first, std::errc::invalid_argument};
  199. case std::errc::result_out_of_range:
  200. return {next, std::errc::result_out_of_range};
  201. default:
  202. return {next, std::errc()};
  203. }
  204. }
  205. else if (*next == '.')
  206. {
  207. ++next;
  208. fractional = true;
  209. dot_position = i;
  210. // Process the fractional part if we have it
  211. //
  212. // if fmt is chars_format::scientific the e is required
  213. // if fmt is chars_format::fixed and not scientific the e is disallowed
  214. // if fmt is chars_format::general (which is scientific and fixed) the e is optional
  215. // If we have the value 0.00001 we can continue to chop zeros and adjust the exponent
  216. // so that we get the useful parts of the fraction
  217. if (all_zeros)
  218. {
  219. while (next != last && *next == '0')
  220. {
  221. ++next;
  222. --leading_zero_powers;
  223. }
  224. if (next == last)
  225. {
  226. significand = 0;
  227. exponent = 0;
  228. return {last, std::errc()};
  229. }
  230. }
  231. while (next != last && char_validation_func(*next) && i < significand_buffer_size)
  232. {
  233. significand_buffer[i] = *next;
  234. ++next;
  235. ++i;
  236. }
  237. }
  238. if (i == significand_buffer_size)
  239. {
  240. // We can not process any more significant figures into the significand so skip to the end
  241. // or the exponent part and capture the additional orders of magnitude for the exponent
  242. bool found_dot = false;
  243. while (next != last && (char_validation_func(*next) || *next == '.'))
  244. {
  245. ++next;
  246. if (!fractional && !found_dot)
  247. {
  248. ++extra_zeros;
  249. }
  250. if (next != last && *next == '.')
  251. {
  252. found_dot = true;
  253. }
  254. }
  255. }
  256. if (next == last || is_delimiter(*next, fmt))
  257. {
  258. if (fmt == chars_format::scientific)
  259. {
  260. return {first, std::errc::invalid_argument};
  261. }
  262. if (dot_position != 0 || fractional)
  263. {
  264. exponent = static_cast<Integer>(dot_position) - static_cast<Integer>(i) + extra_zeros + leading_zero_powers;
  265. }
  266. else
  267. {
  268. exponent = extra_zeros + leading_zero_powers;
  269. }
  270. std::size_t offset = i;
  271. from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
  272. switch (r.ec)
  273. {
  274. case std::errc::invalid_argument:
  275. return {first, std::errc::invalid_argument};
  276. case std::errc::result_out_of_range:
  277. return {next, std::errc::result_out_of_range};
  278. default:
  279. return {next, std::errc()};
  280. }
  281. }
  282. else if (*next == exp_char || *next == capital_exp_char)
  283. {
  284. // Would be a number without a significand e.g. e+03
  285. if (next == first)
  286. {
  287. return {next, std::errc::invalid_argument};
  288. }
  289. ++next;
  290. if (fmt == chars_format::fixed)
  291. {
  292. return {first, std::errc::invalid_argument};
  293. }
  294. std::size_t offset = i;
  295. bool round = false;
  296. // If more digits are present than representable in the significand of the target type
  297. // we set the maximum
  298. if (offset > significand_buffer_size)
  299. {
  300. offset = significand_buffer_size - 1;
  301. i = significand_buffer_size;
  302. if (significand_buffer[offset] == '5' ||
  303. significand_buffer[offset] == '6' ||
  304. significand_buffer[offset] == '7' ||
  305. significand_buffer[offset] == '8' ||
  306. significand_buffer[offset] == '9')
  307. {
  308. round = true;
  309. }
  310. }
  311. // If the significand is 0 from chars will return std::errc::invalid_argument because there is nothing in the buffer,
  312. // but it is a valid value. We need to continue parsing to get the correct value of ptr even
  313. // though we know we could bail now.
  314. //
  315. // See GitHub issue #29: https://github.com/cppalliance/charconv/issues/29
  316. if (offset != 0)
  317. {
  318. from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
  319. switch (r.ec)
  320. {
  321. case std::errc::invalid_argument:
  322. return {first, std::errc::invalid_argument};
  323. case std::errc::result_out_of_range:
  324. return {next, std::errc::result_out_of_range};
  325. default:
  326. break;
  327. }
  328. if (round)
  329. {
  330. significand = static_cast<Unsigned_Integer>(significand + 1u);
  331. }
  332. }
  333. else
  334. significand = 0;
  335. }
  336. else
  337. {
  338. return {first, std::errc::invalid_argument};
  339. }
  340. // Finally we get the exponent
  341. constexpr std::size_t exponent_buffer_size = 6; // Float128 min exp is −16382
  342. char exponent_buffer[exponent_buffer_size] {};
  343. const auto significand_digits = i;
  344. i = 0;
  345. // Get the sign first
  346. if (next != last && *next == '-')
  347. {
  348. exponent_buffer[i] = *next;
  349. ++next;
  350. ++i;
  351. }
  352. else if (next != last && *next == '+')
  353. {
  354. ++next;
  355. }
  356. // Next strip any leading zeros
  357. while (next != last && *next == '0')
  358. {
  359. ++next;
  360. }
  361. // Process the significant values
  362. while (next != last && is_integer_char(*next) && i < exponent_buffer_size)
  363. {
  364. exponent_buffer[i] = *next;
  365. ++next;
  366. ++i;
  367. }
  368. // If the exponent can't fit in the buffer the number is not representable
  369. if (next != last && i == exponent_buffer_size)
  370. {
  371. return {next, std::errc::result_out_of_range};
  372. }
  373. // If the exponent was e+00 or e-00
  374. if (i == 0 || (i == 1 && exponent_buffer[0] == '-'))
  375. {
  376. if (fractional)
  377. {
  378. exponent = static_cast<Integer>(dot_position - significand_digits);
  379. }
  380. else
  381. {
  382. exponent = extra_zeros;
  383. }
  384. return {next, std::errc()};
  385. }
  386. const auto r = from_chars(exponent_buffer, exponent_buffer + i, exponent);
  387. exponent += leading_zero_powers;
  388. switch (r.ec)
  389. {
  390. case std::errc::invalid_argument:
  391. return {first, std::errc::invalid_argument};
  392. case std::errc::result_out_of_range:
  393. return {next, std::errc::result_out_of_range};
  394. default:
  395. if (fractional)
  396. {
  397. // Need to take the offset from 1.xxx because compute_floatXXX assumes the significand is an integer
  398. // so the exponent is off by the number of digits in the significand - 1
  399. if (fmt == chars_format::hex)
  400. {
  401. // In hex the number of digits parsed is possibly less than the number of digits in base10
  402. exponent -= num_digits(significand) - static_cast<Integer>(dot_position);
  403. }
  404. else
  405. {
  406. exponent -= static_cast<Integer>(significand_digits - dot_position);
  407. }
  408. }
  409. else
  410. {
  411. exponent += extra_zeros;
  412. }
  413. return {next, std::errc()};
  414. }
  415. }
  416. }}} // Namespaces
  417. #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
  418. # pragma GCC diagnostic pop
  419. #endif
  420. #endif // BOOST_CHARCONV_DETAIL_PARSER_HPP