stream_parser.hpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. // Official repository: https://github.com/boostorg/json
  8. //
  9. #ifndef BOOST_JSON_STREAM_PARSER_HPP
  10. #define BOOST_JSON_STREAM_PARSER_HPP
  11. #include <boost/json/detail/config.hpp>
  12. #include <boost/json/basic_parser.hpp>
  13. #include <boost/json/parse_options.hpp>
  14. #include <boost/json/storage_ptr.hpp>
  15. #include <boost/json/value.hpp>
  16. #include <boost/json/detail/handler.hpp>
  17. #include <type_traits>
  18. #include <cstddef>
  19. namespace boost {
  20. namespace json {
  21. //----------------------------------------------------------
  22. /** A DOM parser for JSON text contained in multiple buffers.
  23. This class is used to parse a JSON text contained in a series of one or
  24. more character buffers, into a @ref value container. It implements a
  25. [_streaming algorithm_](https://en.wikipedia.org/wiki/Streaming_algorithm),
  26. allowing these parsing strategies:
  27. @li parse a JSON file a piece at a time;
  28. @li parse incoming JSON text as it arrives, one buffer at a time;
  29. @li parse with bounded resource consumption per cycle.
  30. @par Usage
  31. To use the parser first construct it, then optionally call @ref reset to
  32. specify a @ref storage_ptr to use for the resulting @ref value. Then call
  33. @ref write one or more times to parse a single, complete JSON text. Call
  34. @ref done to determine if the parse has completed. To indicate there are no
  35. more buffers, call @ref finish. If the parse is successful, call @ref
  36. release to take ownership of the value:
  37. @code
  38. stream_parser p; // construct a parser
  39. p.write( "[1,2" ); // parse some of a JSON text
  40. p.write( ",3,4]" ); // parse the rest of the JSON text
  41. assert( p.done() ); // we have a complete JSON text
  42. value jv = p.release(); // take ownership of the value
  43. @endcode
  44. @par Extra Data
  45. When the character buffer provided as input contains additional data that
  46. is not part of the complete JSON text, an error is returned. The @ref
  47. write_some function is an alternative which allows the parse to finish
  48. early, without consuming all the characters in the buffer. This allows
  49. parsing of a buffer containing multiple individual JSON texts or containing
  50. different protocol data:
  51. @code
  52. stream_parser p; // construct a parser
  53. std::size_t n; // number of characters used
  54. n = p.write_some( "[1,2" ); // parse some of a JSON text
  55. assert( n == 4 ); // all characters consumed
  56. n = p.write_some( ",3,4] null" ); // parse the remainder of the JSON text
  57. assert( n == 6 ); // only some characters consumed
  58. assert( p.done() ); // we have a complete JSON text
  59. value jv = p.release(); // take ownership of the value
  60. @endcode
  61. @par Temporary Storage
  62. The parser may dynamically allocate temporary storage as needed to
  63. accommodate the nesting level of the JSON text being parsed. Temporary
  64. storage is first obtained from an optional, caller-owned buffer specified
  65. upon construction. When that is exhausted, the next allocation uses the
  66. @ref boost::container::pmr::memory_resource passed to the constructor; if
  67. no such argument is specified, the default memory resource is used.
  68. Temporary storage is freed only when the parser is destroyed; The
  69. performance of parsing multiple JSON texts may be improved by reusing the
  70. same parser instance.
  71. It is important to note that the @ref
  72. boost::container::pmr::memory_resource supplied upon construction is used
  73. for temporary storage only, and not for allocating the elements which make
  74. up the parsed value. That other memory resource is optionally supplied in
  75. each call to @ref reset.
  76. @par Duplicate Keys
  77. If there are object elements with duplicate keys; that is, if multiple
  78. elements in an object have keys that compare equal, only the last
  79. equivalent element will be inserted.
  80. @par Non-Standard JSON
  81. The @ref parse_options structure optionally provided upon construction is
  82. used to customize some parameters of the parser, including which
  83. non-standard JSON extensions should be allowed. A default-constructed parse
  84. options allows only standard JSON.
  85. @par Thread Safety
  86. Distinct instances may be accessed concurrently. Non-const member functions
  87. of a shared instance may not be called concurrently with any other member
  88. functions of that instance.
  89. @see @ref parse, @ref parser, @ref parse_options.
  90. */
  91. class stream_parser
  92. {
  93. basic_parser<detail::handler> p_;
  94. public:
  95. /** Destructor.
  96. All dynamically allocated memory, including
  97. any incomplete parsing results, is freed.
  98. @par Complexity
  99. Linear in the size of partial results
  100. @par Exception Safety
  101. No-throw guarantee.
  102. */
  103. ~stream_parser() = default;
  104. /** Constructors.
  105. Construct a new parser.
  106. The parser will only support standard JSON if overloads **(1)**
  107. or **(2)** are used. Otherwise the parser will support extensions
  108. specified by the parameter `opt`.
  109. The parsed value will use the \<\<default_memory_resource,default
  110. memory resource\>\> for storage. To use a different resource, call @ref
  111. reset after construction.
  112. The main difference between the overloads is in what the constructed
  113. parser will use for temporary storage:
  114. @li **(1)** the constructed parser uses the default memory resource for
  115. temporary storage.
  116. @li **(2)**, **(3)** the constructed parser uses the memory resource of
  117. `sp` for temporary storage.
  118. @li **(4)**, **(6)** the constructed parser first uses the caller-owned
  119. storage `[buffer, buffer + size)` for temporary storage, falling back
  120. to the memory resource of `sp` if needed.
  121. @li **(5)**, **(7)** the constructed parser first uses the caller-owned
  122. storage `[buffer, buffer + N)` for temporary storage, falling back to
  123. the memory resource of `sp` if needed.
  124. @note Ownership of `buffer` is not transferred. The caller is
  125. responsible for ensuring the lifetime of the storage pointed to by
  126. `buffer` extends until the parser is destroyed.
  127. Overload **(8)** is the copy constructor. The type is neither copyable
  128. nor movable, so the overload is deleted.
  129. @par Complexity
  130. Constant.
  131. @par Exception Safety
  132. No-throw guarantee.
  133. @{
  134. */
  135. stream_parser() noexcept
  136. : stream_parser({}, {})
  137. {
  138. }
  139. /** Overload
  140. @param sp The memory resource to use for temporary storage.
  141. */
  142. explicit
  143. stream_parser(storage_ptr sp) noexcept
  144. : stream_parser(std::move(sp), {})
  145. {
  146. }
  147. /** Overload
  148. @param opt The parsing options to use.
  149. @param sp
  150. */
  151. BOOST_JSON_DECL
  152. stream_parser(
  153. storage_ptr sp,
  154. parse_options const& opt) noexcept;
  155. /** Overload
  156. @param buffer A pointer to valid storage.
  157. @param size The number of valid bytes in `buffer`.
  158. @param sp
  159. @param opt
  160. */
  161. BOOST_JSON_DECL
  162. stream_parser(
  163. storage_ptr sp,
  164. parse_options const& opt,
  165. unsigned char* buffer,
  166. std::size_t size) noexcept;
  167. /** Overload
  168. @tparam N The number of valid bytes in `buffer`.
  169. @param sp
  170. @param opt
  171. @param buffer
  172. */
  173. template<std::size_t N>
  174. stream_parser(
  175. storage_ptr sp,
  176. parse_options const& opt,
  177. unsigned char(&buffer)[N]) noexcept
  178. : stream_parser(std::move(sp),
  179. opt, &buffer[0], N)
  180. {
  181. }
  182. #if defined(__cpp_lib_byte) || defined(BOOST_JSON_DOCS)
  183. /** Overload
  184. @param sp
  185. @param opt
  186. @param buffer
  187. @param size
  188. */
  189. stream_parser(
  190. storage_ptr sp,
  191. parse_options const& opt,
  192. std::byte* buffer,
  193. std::size_t size) noexcept
  194. : stream_parser(sp, opt, reinterpret_cast<
  195. unsigned char*>(buffer), size)
  196. {
  197. }
  198. /** Overload
  199. @tparam N
  200. @param sp
  201. @param opt
  202. @param buffer
  203. */
  204. template<std::size_t N>
  205. stream_parser(
  206. storage_ptr sp,
  207. parse_options const& opt,
  208. std::byte(&buffer)[N]) noexcept
  209. : stream_parser(std::move(sp),
  210. opt, &buffer[0], N)
  211. {
  212. }
  213. #endif
  214. #ifndef BOOST_JSON_DOCS
  215. // Safety net for accidental buffer overflows
  216. template<std::size_t N>
  217. stream_parser(
  218. storage_ptr sp,
  219. parse_options const& opt,
  220. unsigned char(&buffer)[N],
  221. std::size_t n) noexcept
  222. : stream_parser(std::move(sp),
  223. opt, &buffer[0], n)
  224. {
  225. // If this goes off, check your parameters
  226. // closely, chances are you passed an array
  227. // thinking it was a pointer.
  228. BOOST_ASSERT(n <= N);
  229. }
  230. #ifdef __cpp_lib_byte
  231. // Safety net for accidental buffer overflows
  232. template<std::size_t N>
  233. stream_parser(
  234. storage_ptr sp,
  235. parse_options const& opt,
  236. std::byte(&buffer)[N], std::size_t n) noexcept
  237. : stream_parser(std::move(sp),
  238. opt, &buffer[0], n)
  239. {
  240. // If this goes off, check your parameters
  241. // closely, chances are you passed an array
  242. // thinking it was a pointer.
  243. BOOST_ASSERT(n <= N);
  244. }
  245. #endif
  246. #endif
  247. /// Overload
  248. stream_parser(
  249. stream_parser const&) = delete;
  250. /// @}
  251. /** Assignment operator.
  252. This type is neither copyable nor movable, so copy assignment operator
  253. is deleted.
  254. */
  255. stream_parser& operator=(
  256. stream_parser const&) = delete;
  257. /** Reset the parser for a new JSON text.
  258. This function is used to reset the parser to prepare it for parsing
  259. a new complete JSON text. Any previous partial results are destroyed.
  260. The new value will use the memory resource of `sp`.
  261. @par Complexity
  262. Constant or linear in the size of any previous partial parsing results.
  263. @par Exception Safety
  264. No-throw guarantee.
  265. @param sp A pointer to the @ref boost::container::pmr::memory_resource.
  266. */
  267. BOOST_JSON_DECL
  268. void
  269. reset(storage_ptr sp = {}) noexcept;
  270. /** Check if a complete JSON text has been parsed.
  271. This function returns `true` when all of these conditions are met:
  272. @li A complete serialized JSON text has been presented to the parser,
  273. and
  274. @li No error has occurred since the parser was constructed, or since
  275. the last call to @ref reset,
  276. @par Complexity
  277. Constant.
  278. @par Exception Safety
  279. No-throw guarantee.
  280. */
  281. bool
  282. done() const noexcept
  283. {
  284. return p_.done();
  285. }
  286. /** Parse a buffer containing all or part of a complete JSON text.
  287. This function parses JSON text contained in the specified character
  288. buffer. If parsing completes, any additional characters past the end of
  289. the complete JSON text are ignored. The function returns the actual
  290. number of characters parsed, which may be less than the size of the
  291. input. This allows parsing of a buffer containing multiple individual
  292. JSON texts or containing different protocol data.
  293. Overloads **(1)**, **(2)**, **(4)**, and **(5)** report errors by
  294. setting `ec`. Overloads **(3)** and **(6)** report errors by throwing
  295. exceptions. Upon error or exception, subsequent calls will fail until
  296. @ref reset is called to parse a new JSON text.
  297. @note To indicate there are no more character buffers, such as when
  298. @ref done returns `false` after writing, call @ref finish.
  299. @par Example
  300. @code
  301. stream_parser p; // construct a parser
  302. std::size_t n; // number of characters used
  303. n = p.write_some( "[1,2" ); // parse the first part of the JSON text
  304. assert( n == 4 ); // all characters consumed
  305. n = p.write_some( "3,4] null" ); // parse the rest of the JSON text
  306. assert( n == 5 ); // only some characters consumed
  307. value jv = p.release(); // take ownership of the value
  308. @endcode
  309. @par Complexity
  310. @li **(1)**--**(3)** linear in `size`.
  311. @li **(4)**--**(6)** linear in `s.size()`.
  312. @par Exception Safety
  313. Basic guarantee. Calls to `memory_resource::allocate` may throw.
  314. @return The number of characters consumed from the buffer.
  315. @param data A pointer to a buffer of `size` characters to parse.
  316. @param size The number of characters pointed to by `data`.
  317. @param ec Set to the error, if any occurred.
  318. @{
  319. */
  320. BOOST_JSON_DECL
  321. std::size_t
  322. write_some(
  323. char const* data,
  324. std::size_t size,
  325. system::error_code& ec);
  326. BOOST_JSON_DECL
  327. std::size_t
  328. write_some(
  329. char const* data,
  330. std::size_t size,
  331. std::error_code& ec);
  332. /** Overload
  333. @param data
  334. @param size
  335. @throw boost::system::system_error Thrown on error.
  336. */
  337. BOOST_JSON_DECL
  338. std::size_t
  339. write_some(
  340. char const* data,
  341. std::size_t size);
  342. /** Overload
  343. @param s The character string to parse.
  344. @param ec
  345. */
  346. std::size_t
  347. write_some(
  348. string_view s,
  349. system::error_code& ec)
  350. {
  351. return write_some(
  352. s.data(), s.size(), ec);
  353. }
  354. /** Overload
  355. @param s
  356. @param ec
  357. */
  358. std::size_t
  359. write_some(
  360. string_view s,
  361. std::error_code& ec)
  362. {
  363. return write_some(
  364. s.data(), s.size(), ec);
  365. }
  366. /** Overload
  367. @param s
  368. */
  369. std::size_t
  370. write_some(
  371. string_view s)
  372. {
  373. return write_some(
  374. s.data(), s.size());
  375. }
  376. /// @}
  377. /** Parse a buffer containing all or part of a complete JSON text.
  378. This function parses all or part of a JSON text contained in the
  379. specified character buffer. The entire buffer must be consumed; if
  380. there are additional characters past the end of the complete JSON text,
  381. the parse fails and an error is returned.
  382. Overloads **(1)**, **(2)**, **(4)**, and **(5)** report errors by
  383. setting `ec`. Overloads **(3)** and **(6)** report errors by throwing
  384. exceptions. Upon error or exception, subsequent calls will fail until
  385. @ref reset is called to parse a new JSON text.
  386. @note To indicate there are no more character buffers, such as when
  387. @ref done returns `false` after writing, call @ref finish.
  388. @par Example
  389. @code
  390. stream_parser p; // construct a parser
  391. std::size_t n; // number of characters used
  392. n = p.write( "[1,2" ); // parse some of the JSON text
  393. assert( n == 4 ); // all characters consumed
  394. n = p.write( "3,4]" ); // parse the rest of the JSON text
  395. assert( n == 4 ); // all characters consumed
  396. value jv = p.release(); // take ownership of the value
  397. @endcode
  398. @par Complexity
  399. @li **(1)**--**(3)** linear in `size`.
  400. @li **(4)**--**(6)** linear in `s.size()`.
  401. @par Exception Safety
  402. Basic guarantee. Calls to `memory_resource::allocate` may throw.
  403. @return The number of characters consumed from the buffer.
  404. @param data A pointer to a buffer of `size` characters to parse.
  405. @param size The number of characters pointed to by `data`.
  406. @param ec Set to the error, if any occurred.
  407. @{
  408. */
  409. BOOST_JSON_DECL
  410. std::size_t
  411. write(
  412. char const* data,
  413. std::size_t size,
  414. system::error_code& ec);
  415. BOOST_JSON_DECL
  416. std::size_t
  417. write(
  418. char const* data,
  419. std::size_t size,
  420. std::error_code& ec);
  421. /** Overload
  422. @param data
  423. @param size
  424. @throw boost::system::system_error Thrown on error.
  425. */
  426. BOOST_JSON_DECL
  427. std::size_t
  428. write(
  429. char const* data,
  430. std::size_t size);
  431. /** Overload
  432. @param s The character string to parse.
  433. @param ec
  434. */
  435. std::size_t
  436. write(
  437. string_view s,
  438. system::error_code& ec)
  439. {
  440. return write(
  441. s.data(), s.size(), ec);
  442. }
  443. /** Overload
  444. @param s
  445. @param ec
  446. */
  447. std::size_t
  448. write(
  449. string_view s,
  450. std::error_code& ec)
  451. {
  452. return write(
  453. s.data(), s.size(), ec);
  454. }
  455. /** Overload
  456. @param s
  457. */
  458. std::size_t
  459. write(
  460. string_view s)
  461. {
  462. return write(
  463. s.data(), s.size());
  464. }
  465. /// @}
  466. /** Indicate the end of JSON input.
  467. This function is used to indicate that there are no more character
  468. buffers in the current JSON text being parsed. If the resulting JSON
  469. text is incomplete, **(1)** and **(2)** assign the relevant
  470. `error_code` to `ec`, while **(3)** throws an exception.
  471. Upon error or exception, subsequent calls will fail until @ref reset is
  472. called to parse a new JSON text.
  473. @par Example
  474. In the code below, @ref finish is called to
  475. indicate there are no more digits in the
  476. resulting number:
  477. @code
  478. stream_parser p; // construct a parser
  479. p.write( "3." ); // write the first part of the number
  480. p.write( "14" ); // write the second part of the number
  481. assert( ! p.done() ); // there could be more digits
  482. p.finish(); // indicate the end of the JSON input
  483. assert( p.done() ); // now we are finished
  484. value jv = p.release(); // take ownership of the value
  485. @endcode
  486. @par Complexity
  487. Constant.
  488. @par Exception Safety
  489. Basic guarantee. Calls to `memory_resource::allocate` may throw.
  490. @param ec Set to the error, if any occurred.
  491. @{
  492. */
  493. BOOST_JSON_DECL
  494. void
  495. finish(system::error_code& ec);
  496. BOOST_JSON_DECL
  497. void
  498. finish(std::error_code& ec);
  499. /** Overload
  500. @throw boost::system::system_error Parsing error.
  501. */
  502. BOOST_JSON_DECL
  503. void
  504. finish();
  505. /// @}
  506. /** Return the parsed JSON as a @ref value.
  507. This returns the parsed value, or throws an exception if the parsing is
  508. incomplete or failed. If `! this->done()`, calls @ref finish() first.
  509. It is necessary to call @ref reset after calling this function in order
  510. to parse another JSON text.
  511. @par Complexity
  512. Constant.
  513. @return The parsed value. Ownership of this value is transferred to the
  514. caller.
  515. @throw boost::system::system_error A complete JSON text hasn't been
  516. parsed, or parsing failed.
  517. */
  518. BOOST_JSON_DECL
  519. value
  520. release();
  521. };
  522. } // namespace json
  523. } // namespace boost
  524. #endif