basic_parser.hpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_HPP
  11. #define BOOST_JSON_BASIC_PARSER_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/detail/except.hpp>
  14. #include <boost/json/error.hpp>
  15. #include <boost/json/kind.hpp>
  16. #include <boost/json/parse_options.hpp>
  17. #include <boost/json/detail/stack.hpp>
  18. #include <boost/json/detail/stream.hpp>
  19. #include <boost/json/detail/utf8.hpp>
  20. #include <boost/json/detail/sbo_buffer.hpp>
  21. namespace boost {
  22. namespace json {
  23. /** An incremental SAX parser for serialized JSON.
  24. This implements a SAX-style parser, invoking a caller-supplied handler with
  25. each parsing event. To use, first declare a variable of type
  26. `basic_parser<T>` where `T` meets the handler requirements specified below.
  27. Then call @ref write_some one or more times with the input, setting
  28. `more = false` on the final buffer. The parsing events are realized through
  29. member function calls on the handler, which exists as a data member of the
  30. parser.
  31. The parser may dynamically allocate intermediate storage as needed to
  32. accommodate the nesting level of the input JSON. On subsequent invocations,
  33. the parser can cheaply re-use this memory, improving performance. This
  34. storage is freed when the parser is destroyed
  35. @par Usage
  36. To get the declaration and function definitions for this class it is
  37. necessary to include this file instead:
  38. @code
  39. #include <boost/json/basic_parser_impl.hpp>
  40. @endcode
  41. Users who wish to parse JSON into the DOM container @ref value will not use
  42. this class directly; instead they will create an instance of @ref parser or
  43. @ref stream_parser and use that instead. Alternatively, they may call the
  44. function @ref parse. This class is designed for users who wish to perform
  45. custom actions instead of building a @ref value. For example, to produce a
  46. DOM from an external library.
  47. @note
  48. By default, only conforming JSON using UTF-8 encoding is accepted. However,
  49. select non-compliant syntax can be allowed by construction using a
  50. @ref parse_options set to desired values.
  51. @par Handler
  52. The handler provided must be implemented as an object of class type which
  53. defines each of the required event member functions below. The event
  54. functions return a `bool` where `true` indicates success, and `false`
  55. indicates failure. If the member function returns `false`, it must set the
  56. error code to a suitable value. This error code will be returned by the
  57. write function to the caller.
  58. Handlers are required to declare the maximum limits on various elements. If
  59. these limits are exceeded during parsing, then parsing fails with an error.
  60. The following declaration meets the parser's handler requirements:
  61. @code
  62. struct handler
  63. {
  64. /// The maximum number of elements allowed in an array
  65. static constexpr std::size_t max_array_size = -1;
  66. /// The maximum number of elements allowed in an object
  67. static constexpr std::size_t max_object_size = -1;
  68. /// The maximum number of characters allowed in a string
  69. static constexpr std::size_t max_string_size = -1;
  70. /// The maximum number of characters allowed in a key
  71. static constexpr std::size_t max_key_size = -1;
  72. /// Called once when the JSON parsing begins.
  73. ///
  74. /// @return `true` on success.
  75. /// @param ec Set to the error, if any occurred.
  76. ///
  77. bool on_document_begin( error_code& ec );
  78. /// Called when the JSON parsing is done.
  79. ///
  80. /// @return `true` on success.
  81. /// @param ec Set to the error, if any occurred.
  82. ///
  83. bool on_document_end( error_code& ec );
  84. /// Called when the beginning of an array is encountered.
  85. ///
  86. /// @return `true` on success.
  87. /// @param ec Set to the error, if any occurred.
  88. ///
  89. bool on_array_begin( error_code& ec );
  90. /// Called when the end of the current array is encountered.
  91. ///
  92. /// @return `true` on success.
  93. /// @param n The number of elements in the array.
  94. /// @param ec Set to the error, if any occurred.
  95. ///
  96. bool on_array_end( std::size_t n, error_code& ec );
  97. /// Called when the beginning of an object is encountered.
  98. ///
  99. /// @return `true` on success.
  100. /// @param ec Set to the error, if any occurred.
  101. ///
  102. bool on_object_begin( error_code& ec );
  103. /// Called when the end of the current object is encountered.
  104. ///
  105. /// @return `true` on success.
  106. /// @param n The number of elements in the object.
  107. /// @param ec Set to the error, if any occurred.
  108. ///
  109. bool on_object_end( std::size_t n, error_code& ec );
  110. /// Called with characters corresponding to part of the current string.
  111. ///
  112. /// @return `true` on success.
  113. /// @param s The partial characters
  114. /// @param n The total size of the string thus far
  115. /// @param ec Set to the error, if any occurred.
  116. ///
  117. bool on_string_part( string_view s, std::size_t n, error_code& ec );
  118. /// Called with the last characters corresponding to the current string.
  119. ///
  120. /// @return `true` on success.
  121. /// @param s The remaining characters
  122. /// @param n The total size of the string
  123. /// @param ec Set to the error, if any occurred.
  124. ///
  125. bool on_string( string_view s, std::size_t n, error_code& ec );
  126. /// Called with characters corresponding to part of the current key.
  127. ///
  128. /// @return `true` on success.
  129. /// @param s The partial characters
  130. /// @param n The total size of the key thus far
  131. /// @param ec Set to the error, if any occurred.
  132. ///
  133. bool on_key_part( string_view s, std::size_t n, error_code& ec );
  134. /// Called with the last characters corresponding to the current key.
  135. ///
  136. /// @return `true` on success.
  137. /// @param s The remaining characters
  138. /// @param n The total size of the key
  139. /// @param ec Set to the error, if any occurred.
  140. ///
  141. bool on_key( string_view s, std::size_t n, error_code& ec );
  142. /// Called with the characters corresponding to part of the current number.
  143. ///
  144. /// @return `true` on success.
  145. /// @param s The partial characters
  146. /// @param ec Set to the error, if any occurred.
  147. ///
  148. bool on_number_part( string_view s, error_code& ec );
  149. /// Called when a signed integer is parsed.
  150. ///
  151. /// @return `true` on success.
  152. /// @param i The value
  153. /// @param s The remaining characters
  154. /// @param ec Set to the error, if any occurred.
  155. ///
  156. bool on_int64( int64_t i, string_view s, error_code& ec );
  157. /// Called when an unsigend integer is parsed.
  158. ///
  159. /// @return `true` on success.
  160. /// @param u The value
  161. /// @param s The remaining characters
  162. /// @param ec Set to the error, if any occurred.
  163. ///
  164. bool on_uint64( uint64_t u, string_view s, error_code& ec );
  165. /// Called when a double is parsed.
  166. ///
  167. /// @return `true` on success.
  168. /// @param d The value
  169. /// @param s The remaining characters
  170. /// @param ec Set to the error, if any occurred.
  171. ///
  172. bool on_double( double d, string_view s, error_code& ec );
  173. /// Called when a boolean is parsed.
  174. ///
  175. /// @return `true` on success.
  176. /// @param b The value
  177. /// @param s The remaining characters
  178. /// @param ec Set to the error, if any occurred.
  179. ///
  180. bool on_bool( bool b, error_code& ec );
  181. /// Called when a null is parsed.
  182. ///
  183. /// @return `true` on success.
  184. /// @param ec Set to the error, if any occurred.
  185. ///
  186. bool on_null( error_code& ec );
  187. /// Called with characters corresponding to part of the current comment.
  188. ///
  189. /// @return `true` on success.
  190. /// @param s The partial characters.
  191. /// @param ec Set to the error, if any occurred.
  192. ///
  193. bool on_comment_part( string_view s, error_code& ec );
  194. /// Called with the last characters corresponding to the current comment.
  195. ///
  196. /// @return `true` on success.
  197. /// @param s The remaining characters
  198. /// @param ec Set to the error, if any occurred.
  199. ///
  200. bool on_comment( string_view s, error_code& ec );
  201. };
  202. @endcode
  203. @see
  204. @ref parse,
  205. @ref stream_parser,
  206. \<\<examples_validate, validating parser example\>\>.
  207. */
  208. template<class Handler>
  209. class basic_parser
  210. {
  211. enum class state : char
  212. {
  213. doc1, doc3,
  214. com1, com2, com3, com4,
  215. lit1,
  216. str1, str2, str3, str4,
  217. str5, str6, str7, str8,
  218. sur1, sur2, sur3,
  219. sur4, sur5, sur6,
  220. obj1, obj2, obj3, obj4,
  221. obj5, obj6, obj7, obj8,
  222. obj9, obj10, obj11,
  223. arr1, arr2, arr3,
  224. arr4, arr5, arr6,
  225. num1, num2, num3, num4,
  226. num5, num6, num7, num8,
  227. exp1, exp2, exp3,
  228. val1, val2, val3
  229. };
  230. struct number
  231. {
  232. uint64_t mant;
  233. int bias;
  234. int exp;
  235. bool frac;
  236. bool neg;
  237. };
  238. template< bool StackEmpty_, char First_ >
  239. struct parse_number_helper;
  240. // optimization: must come first
  241. Handler h_;
  242. number num_;
  243. system::error_code ec_;
  244. detail::stack st_;
  245. detail::utf8_sequence seq_;
  246. unsigned u1_;
  247. unsigned u2_;
  248. bool more_; // false for final buffer
  249. bool done_ = false; // true on complete parse
  250. bool clean_ = true; // write_some exited cleanly
  251. const char* end_;
  252. detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
  253. parse_options opt_;
  254. // how many levels deeper the parser can go
  255. std::size_t depth_ = opt_.max_depth;
  256. unsigned char cur_lit_ = 0;
  257. unsigned char lit_offset_ = 0;
  258. inline void reserve();
  259. inline const char* sentinel();
  260. inline bool incomplete(
  261. const detail::const_stream_wrapper& cs);
  262. #ifdef __INTEL_COMPILER
  263. #pragma warning push
  264. #pragma warning disable 2196
  265. #endif
  266. BOOST_NOINLINE
  267. inline
  268. const char*
  269. suspend_or_fail(state st);
  270. BOOST_NOINLINE
  271. inline
  272. const char*
  273. suspend_or_fail(
  274. state st,
  275. std::size_t n);
  276. BOOST_NOINLINE
  277. inline
  278. const char*
  279. fail(const char* p) noexcept;
  280. BOOST_NOINLINE
  281. inline
  282. const char*
  283. fail(
  284. const char* p,
  285. error ev,
  286. source_location const* loc) noexcept;
  287. BOOST_NOINLINE
  288. inline
  289. const char*
  290. maybe_suspend(
  291. const char* p,
  292. state st);
  293. BOOST_NOINLINE
  294. inline
  295. const char*
  296. maybe_suspend(
  297. const char* p,
  298. state st,
  299. std::size_t n);
  300. BOOST_NOINLINE
  301. inline
  302. const char*
  303. maybe_suspend(
  304. const char* p,
  305. state st,
  306. const number& num);
  307. BOOST_NOINLINE
  308. inline
  309. const char*
  310. suspend(
  311. const char* p,
  312. state st);
  313. BOOST_NOINLINE
  314. inline
  315. const char*
  316. suspend(
  317. const char* p,
  318. state st,
  319. const number& num);
  320. #ifdef __INTEL_COMPILER
  321. #pragma warning pop
  322. #endif
  323. template<bool StackEmpty_/*, bool Terminal_*/>
  324. const char* parse_comment(const char* p,
  325. std::integral_constant<bool, StackEmpty_> stack_empty,
  326. /*std::integral_constant<bool, Terminal_>*/ bool terminal);
  327. template<bool StackEmpty_>
  328. const char* parse_document(const char* p,
  329. std::integral_constant<bool, StackEmpty_> stack_empty);
  330. template<bool StackEmpty_, bool AllowComments_/*,
  331. bool AllowTrailing_, bool AllowBadUTF8_*/>
  332. const char* parse_value(const char* p,
  333. std::integral_constant<bool, StackEmpty_> stack_empty,
  334. std::integral_constant<bool, AllowComments_> allow_comments,
  335. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  336. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  337. bool allow_bad_utf16);
  338. template<bool AllowComments_/*,
  339. bool AllowTrailing_, bool AllowBadUTF8_*/>
  340. const char* resume_value(const char* p,
  341. std::integral_constant<bool, AllowComments_> allow_comments,
  342. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  343. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  344. bool allow_bad_utf16);
  345. template<bool StackEmpty_, bool AllowComments_/*,
  346. bool AllowTrailing_, bool AllowBadUTF8_*/>
  347. const char* parse_object(const char* p,
  348. std::integral_constant<bool, StackEmpty_> stack_empty,
  349. std::integral_constant<bool, AllowComments_> allow_comments,
  350. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  351. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  352. bool allow_bad_utf16);
  353. template<bool StackEmpty_, bool AllowComments_/*,
  354. bool AllowTrailing_, bool AllowBadUTF8_*/>
  355. const char* parse_array(const char* p,
  356. std::integral_constant<bool, StackEmpty_> stack_empty,
  357. std::integral_constant<bool, AllowComments_> allow_comments,
  358. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  359. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  360. bool allow_bad_utf16);
  361. template<class Literal>
  362. const char* parse_literal(const char* p, Literal literal);
  363. template<bool StackEmpty_, bool IsKey_>
  364. const char* parse_string(const char* p,
  365. std::integral_constant<bool, StackEmpty_> stack_empty,
  366. std::integral_constant<bool, IsKey_> is_key,
  367. bool allow_bad_utf8,
  368. bool allow_bad_utf16);
  369. template<bool StackEmpty_>
  370. const char* parse_escaped(
  371. const char* p,
  372. std::size_t& total,
  373. std::integral_constant<bool, StackEmpty_> stack_empty,
  374. bool is_key,
  375. bool allow_bad_utf16);
  376. template<bool StackEmpty_, char First_, number_precision Numbers_>
  377. const char* parse_number(const char* p,
  378. std::integral_constant<bool, StackEmpty_> stack_empty,
  379. std::integral_constant<char, First_> first,
  380. std::integral_constant<number_precision, Numbers_> numbers);
  381. // intentionally private
  382. std::size_t
  383. depth() const noexcept
  384. {
  385. return opt_.max_depth - depth_;
  386. }
  387. public:
  388. /** Destructor.
  389. All dynamically allocated internal memory is freed.
  390. @par Effects
  391. @code
  392. handler().~Handler()
  393. @endcode
  394. @par Complexity
  395. Same as `~Handler()`.
  396. @par Exception Safety
  397. Same as `~Handler()`.
  398. */
  399. ~basic_parser() = default;
  400. /** Constructors.
  401. Overload **(1)** constructs the parser with the specified options, with
  402. any additional arguments forwarded to the handler's constructor.
  403. `basic_parser` is not copyable or movable, so the copy constructor is
  404. deleted.
  405. @par Complexity
  406. Same as `Handler( std::forward< Args >( args )... )`.
  407. @par Exception Safety
  408. Same as `Handler( std::forward< Args >( args )... )`.
  409. @param opt Configuration settings for the parser. If this structure is
  410. default constructed, the parser will accept only standard JSON.
  411. @param args Optional additional arguments forwarded to the handler's
  412. constructor.
  413. @{
  414. */
  415. template<class... Args>
  416. explicit
  417. basic_parser(
  418. parse_options const& opt,
  419. Args&&... args);
  420. /// Overload
  421. basic_parser(
  422. basic_parser const&) = delete;
  423. /// @}
  424. /** Assignment.
  425. This type cannot be copied or moved. The copy assignment is deleted.
  426. */
  427. basic_parser& operator=(
  428. basic_parser const&) = delete;
  429. /** Return a reference to the handler.
  430. This function provides access to the constructed
  431. instance of the handler owned by the parser.
  432. @par Complexity
  433. Constant.
  434. @par Exception Safety
  435. No-throw guarantee.
  436. @{
  437. */
  438. Handler&
  439. handler() noexcept
  440. {
  441. return h_;
  442. }
  443. Handler const&
  444. handler() const noexcept
  445. {
  446. return h_;
  447. }
  448. /// @}
  449. /** Return the last error.
  450. This returns the last error code which
  451. was generated in the most recent call
  452. to @ref write_some.
  453. @par Complexity
  454. Constant.
  455. @par Exception Safety
  456. No-throw guarantee.
  457. */
  458. system::error_code
  459. last_error() const noexcept
  460. {
  461. return ec_;
  462. }
  463. /** Check if a complete JSON text has been parsed.
  464. This function returns `true` when all of these conditions are met:
  465. @li A complete serialized JSON text has been presented to the parser,
  466. and
  467. @li No error or exception has occurred since the parser was
  468. constructed, or since the last call to @ref reset.
  469. @par Complexity
  470. Constant.
  471. @par Exception Safety
  472. No-throw guarantee.
  473. */
  474. bool
  475. done() const noexcept
  476. {
  477. return done_;
  478. }
  479. /** Reset the state, to parse a new document.
  480. This function discards the current parsing
  481. state, to prepare for parsing a new document.
  482. Dynamically allocated temporary memory used
  483. by the implementation is not deallocated.
  484. @par Complexity
  485. Constant.
  486. @par Exception Safety
  487. No-throw guarantee.
  488. */
  489. void
  490. reset() noexcept;
  491. /** Indicate a parsing failure.
  492. This changes the state of the parser to indicate that the parse has
  493. failed. A parser implementation can use this to fail the parser if
  494. needed due to external inputs.
  495. @attention
  496. If `! ec.failed()`, an implementation-defined error code that indicates
  497. failure will be stored instead.
  498. @par Complexity
  499. Constant.
  500. @par Exception Safety
  501. No-throw guarantee.
  502. @param ec The error code to set.
  503. */
  504. void
  505. fail(system::error_code ec) noexcept;
  506. /** Parse some of input characters as JSON, incrementally.
  507. This function parses the JSON text in the specified buffer, calling the
  508. handler to emit each SAX parsing event. The parse proceeds from the
  509. current state, which is at the beginning of a new JSON or in the middle
  510. of the current JSON if any characters were already parsed.
  511. The characters in the buffer are processed starting from the beginning,
  512. until one of the following conditions is met:
  513. @li All of the characters in the buffer have been parsed, or
  514. @li Some of the characters in the buffer have been parsed and the JSON
  515. is complete, or
  516. @li A parsing error occurs.
  517. The supplied buffer does not need to contain the entire JSON.
  518. Subsequent calls can provide more serialized data, allowing JSON to be
  519. processed incrementally. The end of the serialized JSON can be
  520. indicated by passing `more = false`.
  521. @par Complexity
  522. Linear in `size`.
  523. @par Exception Safety
  524. Basic guarantee. Calls to the handler may throw.
  525. Upon error or exception, subsequent calls will fail until @ref reset
  526. is called to parse a new JSON.
  527. @return The number of characters successfully
  528. parsed, which may be smaller than `size`.
  529. @param more `true` if there are possibly more buffers in the current
  530. JSON, otherwise `false`.
  531. @param data A pointer to a buffer of `size` characters to parse.
  532. @param size The number of characters pointed to by `data`.
  533. @param ec Set to the error, if any occurred.
  534. @{
  535. */
  536. std::size_t
  537. write_some(
  538. bool more,
  539. char const* data,
  540. std::size_t size,
  541. system::error_code& ec);
  542. std::size_t
  543. write_some(
  544. bool more,
  545. char const* data,
  546. std::size_t size,
  547. std::error_code& ec);
  548. /// @}
  549. };
  550. } // namespace json
  551. } // namespace boost
  552. #endif