| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672 |
- //
- // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
- // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- //
- // Official repository: https://github.com/boostorg/json
- //
- #ifndef BOOST_JSON_BASIC_PARSER_HPP
- #define BOOST_JSON_BASIC_PARSER_HPP
- #include <boost/json/detail/config.hpp>
- #include <boost/json/detail/except.hpp>
- #include <boost/json/error.hpp>
- #include <boost/json/kind.hpp>
- #include <boost/json/parse_options.hpp>
- #include <boost/json/detail/stack.hpp>
- #include <boost/json/detail/stream.hpp>
- #include <boost/json/detail/utf8.hpp>
- #include <boost/json/detail/sbo_buffer.hpp>
- namespace boost {
- namespace json {
- /** An incremental SAX parser for serialized JSON.
- This implements a SAX-style parser, invoking a caller-supplied handler with
- each parsing event. To use, first declare a variable of type
- `basic_parser<T>` where `T` meets the handler requirements specified below.
- Then call @ref write_some one or more times with the input, setting
- `more = false` on the final buffer. The parsing events are realized through
- member function calls on the handler, which exists as a data member of the
- parser.
- The parser may dynamically allocate intermediate storage as needed to
- accommodate the nesting level of the input JSON. On subsequent invocations,
- the parser can cheaply re-use this memory, improving performance. This
- storage is freed when the parser is destroyed
- @par Usage
- To get the declaration and function definitions for this class it is
- necessary to include this file instead:
- @code
- #include <boost/json/basic_parser_impl.hpp>
- @endcode
- Users who wish to parse JSON into the DOM container @ref value will not use
- this class directly; instead they will create an instance of @ref parser or
- @ref stream_parser and use that instead. Alternatively, they may call the
- function @ref parse. This class is designed for users who wish to perform
- custom actions instead of building a @ref value. For example, to produce a
- DOM from an external library.
- @note
- By default, only conforming JSON using UTF-8 encoding is accepted. However,
- select non-compliant syntax can be allowed by construction using a
- @ref parse_options set to desired values.
- @par Handler
- The handler provided must be implemented as an object of class type which
- defines each of the required event member functions below. The event
- functions return a `bool` where `true` indicates success, and `false`
- indicates failure. If the member function returns `false`, it must set the
- error code to a suitable value. This error code will be returned by the
- write function to the caller.
- Handlers are required to declare the maximum limits on various elements. If
- these limits are exceeded during parsing, then parsing fails with an error.
- The following declaration meets the parser's handler requirements:
- @code
- struct handler
- {
- /// The maximum number of elements allowed in an array
- static constexpr std::size_t max_array_size = -1;
- /// The maximum number of elements allowed in an object
- static constexpr std::size_t max_object_size = -1;
- /// The maximum number of characters allowed in a string
- static constexpr std::size_t max_string_size = -1;
- /// The maximum number of characters allowed in a key
- static constexpr std::size_t max_key_size = -1;
- /// Called once when the JSON parsing begins.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_document_begin( error_code& ec );
- /// Called when the JSON parsing is done.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_document_end( error_code& ec );
- /// Called when the beginning of an array is encountered.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_array_begin( error_code& ec );
- /// Called when the end of the current array is encountered.
- ///
- /// @return `true` on success.
- /// @param n The number of elements in the array.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_array_end( std::size_t n, error_code& ec );
- /// Called when the beginning of an object is encountered.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_object_begin( error_code& ec );
- /// Called when the end of the current object is encountered.
- ///
- /// @return `true` on success.
- /// @param n The number of elements in the object.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_object_end( std::size_t n, error_code& ec );
- /// Called with characters corresponding to part of the current string.
- ///
- /// @return `true` on success.
- /// @param s The partial characters
- /// @param n The total size of the string thus far
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_string_part( string_view s, std::size_t n, error_code& ec );
- /// Called with the last characters corresponding to the current string.
- ///
- /// @return `true` on success.
- /// @param s The remaining characters
- /// @param n The total size of the string
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_string( string_view s, std::size_t n, error_code& ec );
- /// Called with characters corresponding to part of the current key.
- ///
- /// @return `true` on success.
- /// @param s The partial characters
- /// @param n The total size of the key thus far
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_key_part( string_view s, std::size_t n, error_code& ec );
- /// Called with the last characters corresponding to the current key.
- ///
- /// @return `true` on success.
- /// @param s The remaining characters
- /// @param n The total size of the key
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_key( string_view s, std::size_t n, error_code& ec );
- /// Called with the characters corresponding to part of the current number.
- ///
- /// @return `true` on success.
- /// @param s The partial characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_number_part( string_view s, error_code& ec );
- /// Called when a signed integer is parsed.
- ///
- /// @return `true` on success.
- /// @param i The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_int64( int64_t i, string_view s, error_code& ec );
- /// Called when an unsigend integer is parsed.
- ///
- /// @return `true` on success.
- /// @param u The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_uint64( uint64_t u, string_view s, error_code& ec );
- /// Called when a double is parsed.
- ///
- /// @return `true` on success.
- /// @param d The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_double( double d, string_view s, error_code& ec );
- /// Called when a boolean is parsed.
- ///
- /// @return `true` on success.
- /// @param b The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_bool( bool b, error_code& ec );
- /// Called when a null is parsed.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_null( error_code& ec );
- /// Called with characters corresponding to part of the current comment.
- ///
- /// @return `true` on success.
- /// @param s The partial characters.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_comment_part( string_view s, error_code& ec );
- /// Called with the last characters corresponding to the current comment.
- ///
- /// @return `true` on success.
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_comment( string_view s, error_code& ec );
- };
- @endcode
- @see
- @ref parse,
- @ref stream_parser,
- \<\<examples_validate, validating parser example\>\>.
- */
- template<class Handler>
- class basic_parser
- {
- enum class state : char
- {
- doc1, doc3,
- com1, com2, com3, com4,
- lit1,
- str1, str2, str3, str4,
- str5, str6, str7, str8,
- sur1, sur2, sur3,
- sur4, sur5, sur6,
- obj1, obj2, obj3, obj4,
- obj5, obj6, obj7, obj8,
- obj9, obj10, obj11,
- arr1, arr2, arr3,
- arr4, arr5, arr6,
- num1, num2, num3, num4,
- num5, num6, num7, num8,
- exp1, exp2, exp3,
- val1, val2, val3
- };
- struct number
- {
- uint64_t mant;
- int bias;
- int exp;
- bool frac;
- bool neg;
- };
- template< bool StackEmpty_, char First_ >
- struct parse_number_helper;
- // optimization: must come first
- Handler h_;
- number num_;
- system::error_code ec_;
- detail::stack st_;
- detail::utf8_sequence seq_;
- unsigned u1_;
- unsigned u2_;
- bool more_; // false for final buffer
- bool done_ = false; // true on complete parse
- bool clean_ = true; // write_some exited cleanly
- const char* end_;
- detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
- parse_options opt_;
- // how many levels deeper the parser can go
- std::size_t depth_ = opt_.max_depth;
- unsigned char cur_lit_ = 0;
- unsigned char lit_offset_ = 0;
- inline void reserve();
- inline const char* sentinel();
- inline bool incomplete(
- const detail::const_stream_wrapper& cs);
- #ifdef __INTEL_COMPILER
- #pragma warning push
- #pragma warning disable 2196
- #endif
- BOOST_NOINLINE
- inline
- const char*
- suspend_or_fail(state st);
- BOOST_NOINLINE
- inline
- const char*
- suspend_or_fail(
- state st,
- std::size_t n);
- BOOST_NOINLINE
- inline
- const char*
- fail(const char* p) noexcept;
- BOOST_NOINLINE
- inline
- const char*
- fail(
- const char* p,
- error ev,
- source_location const* loc) noexcept;
- BOOST_NOINLINE
- inline
- const char*
- maybe_suspend(
- const char* p,
- state st);
- BOOST_NOINLINE
- inline
- const char*
- maybe_suspend(
- const char* p,
- state st,
- std::size_t n);
- BOOST_NOINLINE
- inline
- const char*
- maybe_suspend(
- const char* p,
- state st,
- const number& num);
- BOOST_NOINLINE
- inline
- const char*
- suspend(
- const char* p,
- state st);
- BOOST_NOINLINE
- inline
- const char*
- suspend(
- const char* p,
- state st,
- const number& num);
- #ifdef __INTEL_COMPILER
- #pragma warning pop
- #endif
- template<bool StackEmpty_/*, bool Terminal_*/>
- const char* parse_comment(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- /*std::integral_constant<bool, Terminal_>*/ bool terminal);
- template<bool StackEmpty_>
- const char* parse_document(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty);
- template<bool StackEmpty_, bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* parse_value(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
- bool allow_bad_utf16);
- template<bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* resume_value(const char* p,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
- bool allow_bad_utf16);
- template<bool StackEmpty_, bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* parse_object(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
- bool allow_bad_utf16);
- template<bool StackEmpty_, bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* parse_array(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
- bool allow_bad_utf16);
- template<class Literal>
- const char* parse_literal(const char* p, Literal literal);
- template<bool StackEmpty_, bool IsKey_>
- const char* parse_string(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, IsKey_> is_key,
- bool allow_bad_utf8,
- bool allow_bad_utf16);
- template<bool StackEmpty_>
- const char* parse_escaped(
- const char* p,
- std::size_t& total,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- bool is_key,
- bool allow_bad_utf16);
- template<bool StackEmpty_, char First_, number_precision Numbers_>
- const char* parse_number(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<char, First_> first,
- std::integral_constant<number_precision, Numbers_> numbers);
- // intentionally private
- std::size_t
- depth() const noexcept
- {
- return opt_.max_depth - depth_;
- }
- public:
- /** Destructor.
- All dynamically allocated internal memory is freed.
- @par Effects
- @code
- handler().~Handler()
- @endcode
- @par Complexity
- Same as `~Handler()`.
- @par Exception Safety
- Same as `~Handler()`.
- */
- ~basic_parser() = default;
- /** Constructors.
- Overload **(1)** constructs the parser with the specified options, with
- any additional arguments forwarded to the handler's constructor.
- `basic_parser` is not copyable or movable, so the copy constructor is
- deleted.
- @par Complexity
- Same as `Handler( std::forward< Args >( args )... )`.
- @par Exception Safety
- Same as `Handler( std::forward< Args >( args )... )`.
- @param opt Configuration settings for the parser. If this structure is
- default constructed, the parser will accept only standard JSON.
- @param args Optional additional arguments forwarded to the handler's
- constructor.
- @{
- */
- template<class... Args>
- explicit
- basic_parser(
- parse_options const& opt,
- Args&&... args);
- /// Overload
- basic_parser(
- basic_parser const&) = delete;
- /// @}
- /** Assignment.
- This type cannot be copied or moved. The copy assignment is deleted.
- */
- basic_parser& operator=(
- basic_parser const&) = delete;
- /** Return a reference to the handler.
- This function provides access to the constructed
- instance of the handler owned by the parser.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- @{
- */
- Handler&
- handler() noexcept
- {
- return h_;
- }
- Handler const&
- handler() const noexcept
- {
- return h_;
- }
- /// @}
- /** Return the last error.
- This returns the last error code which
- was generated in the most recent call
- to @ref write_some.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- system::error_code
- last_error() const noexcept
- {
- return ec_;
- }
- /** Check if a complete JSON text has been parsed.
- This function returns `true` when all of these conditions are met:
- @li A complete serialized JSON text has been presented to the parser,
- and
- @li No error or exception has occurred since the parser was
- constructed, or since the last call to @ref reset.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- bool
- done() const noexcept
- {
- return done_;
- }
- /** Reset the state, to parse a new document.
- This function discards the current parsing
- state, to prepare for parsing a new document.
- Dynamically allocated temporary memory used
- by the implementation is not deallocated.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- void
- reset() noexcept;
- /** Indicate a parsing failure.
- This changes the state of the parser to indicate that the parse has
- failed. A parser implementation can use this to fail the parser if
- needed due to external inputs.
- @attention
- If `! ec.failed()`, an implementation-defined error code that indicates
- failure will be stored instead.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- @param ec The error code to set.
- */
- void
- fail(system::error_code ec) noexcept;
- /** Parse some of input characters as JSON, incrementally.
- This function parses the JSON text in the specified buffer, calling the
- handler to emit each SAX parsing event. The parse proceeds from the
- current state, which is at the beginning of a new JSON or in the middle
- of the current JSON if any characters were already parsed.
- The characters in the buffer are processed starting from the beginning,
- until one of the following conditions is met:
- @li All of the characters in the buffer have been parsed, or
- @li Some of the characters in the buffer have been parsed and the JSON
- is complete, or
- @li A parsing error occurs.
- The supplied buffer does not need to contain the entire JSON.
- Subsequent calls can provide more serialized data, allowing JSON to be
- processed incrementally. The end of the serialized JSON can be
- indicated by passing `more = false`.
- @par Complexity
- Linear in `size`.
- @par Exception Safety
- Basic guarantee. Calls to the handler may throw.
- Upon error or exception, subsequent calls will fail until @ref reset
- is called to parse a new JSON.
- @return The number of characters successfully
- parsed, which may be smaller than `size`.
- @param more `true` if there are possibly more buffers in the current
- JSON, otherwise `false`.
- @param data A pointer to a buffer of `size` characters to parse.
- @param size The number of characters pointed to by `data`.
- @param ec Set to the error, if any occurred.
- @{
- */
- std::size_t
- write_some(
- bool more,
- char const* data,
- std::size_t size,
- system::error_code& ec);
- std::size_t
- write_some(
- bool more,
- char const* data,
- std::size_t size,
- std::error_code& ec);
- /// @}
- };
- } // namespace json
- } // namespace boost
- #endif
|