| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379 |
- // Copyright 2023 Matt Borland
- // Distributed under the Boost Software License, Version 1.0.
- // https://www.boost.org/LICENSE_1_0.txt
- #ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
- #define BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
- #include <boost/json/detail/charconv/detail/config.hpp>
- #include <boost/json/detail/charconv/detail/from_chars_result.hpp>
- #include <boost/json/detail/charconv/detail/from_chars_integer_impl.hpp>
- #include <boost/json/detail/charconv/detail/integer_search_trees.hpp>
- #include <boost/json/detail/charconv/limits.hpp>
- #include <boost/json/detail/charconv/chars_format.hpp>
- #include <system_error>
- #include <type_traits>
- #include <limits>
- #include <cerrno>
- #include <cstdint>
- #include <cstring>
- #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
- # pragma GCC diagnostic push
- # pragma GCC diagnostic ignored "-Wmissing-field-initializers"
- #endif
- namespace boost { namespace json { namespace detail { namespace charconv { namespace detail {
- inline bool is_integer_char(char c) noexcept
- {
- return (c >= '0') && (c <= '9');
- }
- inline bool is_hex_char(char c) noexcept
- {
- return is_integer_char(c) || (((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')));
- }
- inline bool is_delimiter(char c, chars_format fmt) noexcept
- {
- if (fmt != chars_format::hex)
- {
- return !is_integer_char(c) && c != 'e' && c != 'E';
- }
- return !is_hex_char(c) && c != 'p' && c != 'P';
- }
- template <typename Unsigned_Integer, typename Integer>
- inline from_chars_result parser(const char* first, const char* last, bool& sign, Unsigned_Integer& significand, Integer& exponent, chars_format fmt = chars_format::general) noexcept
- {
- if (first > last)
- {
- return {first, std::errc::invalid_argument};
- }
- auto next = first;
- bool all_zeros = true;
- // First extract the sign
- if (*next == '-')
- {
- sign = true;
- ++next;
- }
- else if (*next == '+')
- {
- return {next, std::errc::invalid_argument};
- }
- else
- {
- sign = false;
- }
- // Ignore leading zeros (e.g. 00005 or -002.3e+5)
- while (*next == '0' && next != last)
- {
- ++next;
- }
- // If the number is 0 we can abort now
- char exp_char;
- char capital_exp_char;
- if (fmt != chars_format::hex)
- {
- exp_char = 'e';
- capital_exp_char = 'E';
- }
- else
- {
- exp_char = 'p';
- capital_exp_char = 'P';
- }
- if (next == last || *next == exp_char || *next == -capital_exp_char)
- {
- significand = 0;
- exponent = 0;
- return {next, std::errc()};
- }
- // Next we get the significand
- constexpr std::size_t significand_buffer_size = limits<Unsigned_Integer>::max_chars10 - 1; // Base 10 or 16
- char significand_buffer[significand_buffer_size] {};
- std::size_t i = 0;
- std::size_t dot_position = 0;
- Integer extra_zeros = 0;
- Integer leading_zero_powers = 0;
- const auto char_validation_func = (fmt != charconv::chars_format::hex) ? is_integer_char : is_hex_char;
- const int base = (fmt != charconv::chars_format::hex) ? 10 : 16;
- while (char_validation_func(*next) && next != last && i < significand_buffer_size)
- {
- all_zeros = false;
- significand_buffer[i] = *next;
- ++next;
- ++i;
- }
- bool fractional = false;
- if (next == last)
- {
- // if fmt is chars_format::scientific the e is required
- if (fmt == chars_format::scientific)
- {
- return {first, std::errc::invalid_argument};
- }
- exponent = 0;
- std::size_t offset = i;
- from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
- switch (r.ec)
- {
- case std::errc::invalid_argument:
- return {first, std::errc::invalid_argument};
- case std::errc::result_out_of_range:
- return {next, std::errc::result_out_of_range};
- default:
- return {next, std::errc()};
- }
- }
- else if (*next == '.')
- {
- ++next;
- fractional = true;
- dot_position = i;
- // Process the fractional part if we have it
- //
- // if fmt is chars_format::scientific the e is required
- // if fmt is chars_format::fixed and not scientific the e is disallowed
- // if fmt is chars_format::general (which is scientific and fixed) the e is optional
- // If we have the value 0.00001 we can continue to chop zeros and adjust the exponent
- // so that we get the useful parts of the fraction
- if (all_zeros)
- {
- while (*next == '0' && next != last)
- {
- ++next;
- --leading_zero_powers;
- }
- if (next == last)
- {
- return {last, std::errc()};
- }
- }
- while (char_validation_func(*next) && next != last && i < significand_buffer_size)
- {
- significand_buffer[i] = *next;
- ++next;
- ++i;
- }
- }
- if (i == significand_buffer_size)
- {
- // We can not process any more significant figures into the significand so skip to the end
- // or the exponent part and capture the additional orders of magnitude for the exponent
- bool found_dot = false;
- while ((char_validation_func(*next) || *next == '.') && next != last)
- {
- ++next;
- if (!fractional && !found_dot)
- {
- ++extra_zeros;
- }
- if (*next == '.')
- {
- found_dot = true;
- }
- }
- }
- if (next == last || is_delimiter(*next, fmt))
- {
- if (fmt == chars_format::scientific)
- {
- return {first, std::errc::invalid_argument};
- }
- if (dot_position != 0 || fractional)
- {
- exponent = static_cast<Integer>(dot_position) - i + extra_zeros + leading_zero_powers;
- }
- else
- {
- exponent = extra_zeros + leading_zero_powers;
- }
- std::size_t offset = i;
- from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
- switch (r.ec)
- {
- case std::errc::invalid_argument:
- return {first, std::errc::invalid_argument};
- case std::errc::result_out_of_range:
- return {next, std::errc::result_out_of_range};
- default:
- return {next, std::errc()};
- }
- }
- else if (*next == exp_char || *next == capital_exp_char)
- {
- // Would be a number without a significand e.g. e+03
- if (next == first)
- {
- return {next, std::errc::invalid_argument};
- }
- ++next;
- if (fmt == chars_format::fixed)
- {
- return {first, std::errc::invalid_argument};
- }
- exponent = i - 1;
- std::size_t offset = i;
- bool round = false;
- // If more digits are present than representable in the significand of the target type
- // we set the maximum
- if (offset > significand_buffer_size)
- {
- offset = significand_buffer_size - 1;
- i = significand_buffer_size;
- if (significand_buffer[offset] == '5' ||
- significand_buffer[offset] == '6' ||
- significand_buffer[offset] == '7' ||
- significand_buffer[offset] == '8' ||
- significand_buffer[offset] == '9')
- {
- round = true;
- }
- }
- // If the significand is 0 from chars will return std::errc::invalid_argument because there is nothing in the buffer,
- // but it is a valid value. We need to continue parsing to get the correct value of ptr even
- // though we know we could bail now.
- //
- // See GitHub issue #29: https://github.com/cppalliance/charconv/issues/29
- if (offset != 0)
- {
- from_chars_result r = from_chars(significand_buffer, significand_buffer + offset, significand, base);
- switch (r.ec)
- {
- case std::errc::invalid_argument:
- return {first, std::errc::invalid_argument};
- case std::errc::result_out_of_range:
- return {next, std::errc::result_out_of_range};
- default:
- break;
- }
- if (round)
- {
- significand += 1;
- }
- }
- }
- else
- {
- return {first, std::errc::invalid_argument};
- }
- // Finally we get the exponent
- constexpr std::size_t exponent_buffer_size = 6; // Float128 min exp is −16382
- char exponent_buffer[exponent_buffer_size] {};
- Integer significand_digits = i;
- i = 0;
- // Get the sign first
- if (*next == '-')
- {
- exponent_buffer[i] = *next;
- ++next;
- ++i;
- }
- else if (*next == '+')
- {
- ++next;
- }
- // Next strip any leading zeros
- while (*next == '0')
- {
- ++next;
- }
- // Process the significant values
- while (is_integer_char(*next) && next != last && i < exponent_buffer_size)
- {
- exponent_buffer[i] = *next;
- ++next;
- ++i;
- }
- // If the exponent can't fit in the buffer the number is not representable
- if (next != last && i == exponent_buffer_size)
- {
- return {next, std::errc::result_out_of_range};
- }
- // If the exponent was e+00 or e-00
- if (i == 0 || (i == 1 && exponent_buffer[0] == '-'))
- {
- if (fractional)
- {
- exponent = static_cast<Integer>(dot_position) - significand_digits;
- }
- else
- {
- exponent = extra_zeros;
- }
- return {next, std::errc()};
- }
- const auto r = from_chars(exponent_buffer, exponent_buffer + i, exponent);
- exponent += leading_zero_powers;
- switch (r.ec)
- {
- case std::errc::invalid_argument:
- return {first, std::errc::invalid_argument};
- case std::errc::result_out_of_range:
- return {next, std::errc::result_out_of_range};
- default:
- if (fractional)
- {
- // Need to take the offset from 1.xxx because compute_floatXXX assumes the significand is an integer
- // so the exponent is off by the number of digits in the significand - 1
- if (fmt == chars_format::hex)
- {
- // In hex the number of digits parsed is possibly less than the number of digits in base10
- exponent -= num_digits(significand) - dot_position;
- }
- else
- {
- exponent -= significand_digits - dot_position;
- }
- }
- else
- {
- exponent += extra_zeros;
- }
- return {next, std::errc()};
- }
- }
- }}}}} // Namespaces
- #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
- # pragma GCC diagnostic pop
- #endif
- #endif // BOOST_JSON_DETAIL_CHARCONV_DETAIL_PARSER_HPP
|