basic_parser.hpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. //
  2. // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. // Official repository: https://github.com/boostorg/beast
  8. //
  9. #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
  10. #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
  11. #include <boost/beast/core/detail/config.hpp>
  12. #include <boost/beast/core/error.hpp>
  13. #include <boost/beast/core/string.hpp>
  14. #include <boost/beast/http/field.hpp>
  15. #include <boost/beast/http/verb.hpp>
  16. #include <boost/beast/http/detail/basic_parser.hpp>
  17. #include <boost/asio/buffer.hpp>
  18. #include <boost/optional.hpp>
  19. #include <boost/assert.hpp>
  20. #include <cstdint>
  21. #include <limits>
  22. #include <memory>
  23. #include <type_traits>
  24. #include <utility>
  25. namespace boost {
  26. namespace beast {
  27. namespace http {
  28. /** A parser for decoding HTTP/1 wire format messages.
  29. This parser is designed to efficiently parse messages in the
  30. HTTP/1 wire format. It allocates no memory when input is
  31. presented as a single contiguous buffer, and uses minimal
  32. state. It will handle chunked encoding and it understands
  33. the semantics of the Connection, Content-Length, and Upgrade
  34. fields.
  35. The parser is optimized for the case where the input buffer
  36. sequence consists of a single contiguous buffer. The
  37. @ref beast::basic_flat_buffer class is provided, which guarantees
  38. that the input sequence of the stream buffer will be represented
  39. by exactly one contiguous buffer. To ensure the optimum performance
  40. of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
  41. such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
  42. Alternatively, the caller may use custom techniques to ensure that
  43. the structured portion of the HTTP message (header or chunk header)
  44. is contained in a linear buffer.
  45. The interface to the parser uses virtual member functions.
  46. To use this class, derive your type from @ref basic_parser. When
  47. bytes are presented, the implementation will make a series of zero
  48. or more calls to virtual functions, which the derived class must
  49. implement.
  50. Every virtual function must be provided by the derived class,
  51. or else a compilation error will be generated. The implementation
  52. will make sure that `ec` is clear before each virtual function
  53. is invoked. If a virtual function sets an error, it is propagated
  54. out of the parser to the caller.
  55. @tparam isRequest A `bool` indicating whether the parser will be
  56. presented with request or response message.
  57. @note If the parser encounters a field value with obs-fold
  58. longer than 4 kilobytes in length, an error is generated.
  59. */
  60. template<bool isRequest>
  61. class basic_parser
  62. : private detail::basic_parser_base
  63. {
  64. boost::optional<std::uint64_t>
  65. body_limit_ =
  66. boost::optional<std::uint64_t>(
  67. default_body_limit(is_request{})); // max payload body
  68. std::uint64_t len_ = 0; // size of chunk or body
  69. std::uint64_t len0_ = 0; // content length if known
  70. std::unique_ptr<char[]> buf_; // temp storage
  71. std::size_t buf_len_ = 0; // size of buf_
  72. std::uint32_t header_limit_ = 8192; // max header size
  73. unsigned short status_ = 0; // response status
  74. state state_ = state::nothing_yet; // initial state
  75. unsigned f_ = 0; // flags
  76. // limit on the size of the stack flat buffer
  77. static std::size_t constexpr max_stack_buffer = 8192;
  78. // Message will be complete after reading header
  79. static unsigned constexpr flagSkipBody = 1<< 0;
  80. // Consume input buffers across semantic boundaries
  81. static unsigned constexpr flagEager = 1<< 1;
  82. // The parser has read at least one byte
  83. static unsigned constexpr flagGotSome = 1<< 2;
  84. // Message semantics indicate a body is expected.
  85. // cleared if flagSkipBody set
  86. //
  87. static unsigned constexpr flagHasBody = 1<< 3;
  88. static unsigned constexpr flagHTTP11 = 1<< 4;
  89. static unsigned constexpr flagNeedEOF = 1<< 5;
  90. static unsigned constexpr flagExpectCRLF = 1<< 6;
  91. static unsigned constexpr flagConnectionClose = 1<< 7;
  92. static unsigned constexpr flagConnectionUpgrade = 1<< 8;
  93. static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
  94. static unsigned constexpr flagContentLength = 1<< 10;
  95. static unsigned constexpr flagChunked = 1<< 11;
  96. static unsigned constexpr flagUpgrade = 1<< 12;
  97. static constexpr
  98. std::uint64_t
  99. default_body_limit(std::true_type)
  100. {
  101. // limit for requests
  102. return 1 * 1024 * 1024; // 1MB
  103. }
  104. static constexpr
  105. std::uint64_t
  106. default_body_limit(std::false_type)
  107. {
  108. // limit for responses
  109. return 8 * 1024 * 1024; // 8MB
  110. }
  111. template<bool OtherIsRequest>
  112. friend class basic_parser;
  113. #ifndef BOOST_BEAST_DOXYGEN
  114. friend class basic_parser_test;
  115. #endif
  116. protected:
  117. /// Default constructor
  118. basic_parser() = default;
  119. /** Move constructor
  120. @note
  121. After the move, the only valid operation on the
  122. moved-from object is destruction.
  123. */
  124. basic_parser(basic_parser &&) = default;
  125. /// Move assignment
  126. basic_parser& operator=(basic_parser &&) = default;
  127. public:
  128. /// `true` if this parser parses requests, `false` for responses.
  129. using is_request =
  130. std::integral_constant<bool, isRequest>;
  131. /// Destructor
  132. virtual ~basic_parser() = default;
  133. /// Copy constructor
  134. basic_parser(basic_parser const&) = delete;
  135. /// Copy assignment
  136. basic_parser& operator=(basic_parser const&) = delete;
  137. /// Returns `true` if the parser has received at least one byte of input.
  138. bool
  139. got_some() const
  140. {
  141. return state_ != state::nothing_yet;
  142. }
  143. /** Returns `true` if the message is complete.
  144. The message is complete after the full header is prduced
  145. and one of the following is true:
  146. @li The skip body option was set.
  147. @li The semantics of the message indicate there is no body.
  148. @li The semantics of the message indicate a body is expected,
  149. and the entire body was parsed.
  150. */
  151. bool
  152. is_done() const
  153. {
  154. return state_ == state::complete;
  155. }
  156. /** Returns `true` if a the parser has produced the full header.
  157. */
  158. bool
  159. is_header_done() const
  160. {
  161. return state_ > state::fields;
  162. }
  163. /** Returns `true` if the message is an upgrade message.
  164. @note The return value is undefined unless
  165. @ref is_header_done would return `true`.
  166. */
  167. bool
  168. upgrade() const
  169. {
  170. return (f_ & flagConnectionUpgrade) != 0;
  171. }
  172. /** Returns `true` if the last value for Transfer-Encoding is "chunked".
  173. @note The return value is undefined unless
  174. @ref is_header_done would return `true`.
  175. */
  176. bool
  177. chunked() const
  178. {
  179. return (f_ & flagChunked) != 0;
  180. }
  181. /** Returns `true` if the message has keep-alive connection semantics.
  182. This function always returns `false` if @ref need_eof would return
  183. `false`.
  184. @note The return value is undefined unless
  185. @ref is_header_done would return `true`.
  186. */
  187. bool
  188. keep_alive() const;
  189. /** Returns the optional value of Content-Length if known.
  190. @note The return value is undefined unless
  191. @ref is_header_done would return `true`.
  192. */
  193. boost::optional<std::uint64_t>
  194. content_length() const;
  195. /** Returns the remaining content length if known
  196. If the message header specifies a Content-Length,
  197. the return value will be the number of bytes remaining
  198. in the payload body have not yet been parsed.
  199. @note The return value is undefined unless
  200. @ref is_header_done would return `true`.
  201. */
  202. boost::optional<std::uint64_t>
  203. content_length_remaining() const;
  204. /** Returns `true` if the message semantics require an end of file.
  205. Depending on the contents of the header, the parser may
  206. require and end of file notification to know where the end
  207. of the body lies. If this function returns `true` it will be
  208. necessary to call @ref put_eof when there will never be additional
  209. data from the input.
  210. */
  211. bool
  212. need_eof() const
  213. {
  214. return (f_ & flagNeedEOF) != 0;
  215. }
  216. /** Set the limit on the payload body.
  217. This function sets the maximum allowed size of the payload body,
  218. before any encodings except chunked have been removed. Depending
  219. on the message semantics, one of these cases will apply:
  220. @li The Content-Length is specified and exceeds the limit. In
  221. this case the result @ref error::body_limit is returned
  222. immediately after the header is parsed.
  223. @li The Content-Length is unspecified and the chunked encoding
  224. is not specified as the last encoding. In this case the end of
  225. message is determined by the end of file indicator on the
  226. associated stream or input source. If a sufficient number of
  227. body payload octets are presented to the parser to exceed the
  228. configured limit, the parse fails with the result
  229. @ref error::body_limit
  230. @li The Transfer-Encoding specifies the chunked encoding as the
  231. last encoding. In this case, when the number of payload body
  232. octets produced by removing the chunked encoding exceeds
  233. the configured limit, the parse fails with the result
  234. @ref error::body_limit.
  235. Setting the limit after any body octets have been parsed
  236. results in undefined behavior.
  237. The default limit is 1MB for requests and 8MB for responses.
  238. @param v An optional integral value representing the body limit.
  239. If this is equal to `boost::none`, then the body limit is disabled.
  240. */
  241. void
  242. body_limit(boost::optional<std::uint64_t> v)
  243. {
  244. body_limit_ = v;
  245. }
  246. /** Set a limit on the total size of the header.
  247. This function sets the maximum allowed size of the header
  248. including all field name, value, and delimiter characters
  249. and also including the CRLF sequences in the serialized
  250. input. If the end of the header is not found within the
  251. limit of the header size, the error @ref error::header_limit
  252. is returned by @ref put.
  253. Setting the limit after any header octets have been parsed
  254. results in undefined behavior.
  255. */
  256. void
  257. header_limit(std::uint32_t v)
  258. {
  259. header_limit_ = v;
  260. }
  261. /// Returns `true` if the eager parse option is set.
  262. bool
  263. eager() const
  264. {
  265. return (f_ & flagEager) != 0;
  266. }
  267. /** Set the eager parse option.
  268. Normally the parser returns after successfully parsing a structured
  269. element (header, chunk header, or chunk body) even if there are octets
  270. remaining in the input. This is necessary when attempting to parse the
  271. header first, or when the caller wants to inspect information which may
  272. be invalidated by subsequent parsing, such as a chunk extension. The
  273. `eager` option controls whether the parser keeps going after parsing
  274. structured element if there are octets remaining in the buffer and no
  275. error occurs. This option is automatically set or cleared during certain
  276. stream operations to improve performance with no change in functionality.
  277. The default setting is `false`.
  278. @param v `true` to set the eager parse option or `false` to disable it.
  279. */
  280. void
  281. eager(bool v)
  282. {
  283. if(v)
  284. f_ |= flagEager;
  285. else
  286. f_ &= ~flagEager;
  287. }
  288. /// Returns `true` if the skip parse option is set.
  289. bool
  290. skip() const
  291. {
  292. return (f_ & flagSkipBody) != 0;
  293. }
  294. /** Set the skip parse option.
  295. This option controls whether or not the parser expects to see an HTTP
  296. body, regardless of the presence or absence of certain fields such as
  297. Content-Length or a chunked Transfer-Encoding. Depending on the request,
  298. some responses do not carry a body. For example, a 200 response to a
  299. CONNECT request from a tunneling proxy, or a response to a HEAD request.
  300. In these cases, callers may use this function inform the parser that
  301. no body is expected. The parser will consider the message complete
  302. after the header has been received.
  303. @param v `true` to set the skip body option or `false` to disable it.
  304. @note This function must called before any bytes are processed.
  305. */
  306. void
  307. skip(bool v);
  308. /** Write a buffer sequence to the parser.
  309. This function attempts to incrementally parse the HTTP
  310. message data stored in the caller provided buffers. Upon
  311. success, a positive return value indicates that the parser
  312. made forward progress, consuming that number of
  313. bytes.
  314. In some cases there may be an insufficient number of octets
  315. in the input buffer in order to make forward progress. This
  316. is indicated by the code @ref error::need_more. When
  317. this happens, the caller should place additional bytes into
  318. the buffer sequence and call @ref put again.
  319. The error code @ref error::need_more is special. When this
  320. error is returned, a subsequent call to @ref put may succeed
  321. if the buffers have been updated. Otherwise, upon error
  322. the parser may not be restarted.
  323. @param buffers An object meeting the requirements of
  324. <em>ConstBufferSequence</em> that represents the next chunk of
  325. message data. If the length of this buffer sequence is
  326. one, the implementation will not allocate additional memory.
  327. The class @ref beast::basic_flat_buffer is provided as one way to
  328. meet this requirement
  329. @param ec Set to the error, if any occurred.
  330. @return The number of octets consumed in the buffer
  331. sequence. The caller should remove these octets even if the
  332. error is set.
  333. */
  334. template<class ConstBufferSequence>
  335. std::size_t
  336. put(ConstBufferSequence const& buffers, error_code& ec);
  337. #if ! BOOST_BEAST_DOXYGEN
  338. std::size_t
  339. put(net::const_buffer buffer,
  340. error_code& ec);
  341. #endif
  342. /** Inform the parser that the end of stream was reached.
  343. In certain cases, HTTP needs to know where the end of
  344. the stream is. For example, sometimes servers send
  345. responses without Content-Length and expect the client
  346. to consume input (for the body) until EOF. Callbacks
  347. and errors will still be processed as usual.
  348. This is typically called when a read from the
  349. underlying stream object sets the error code to
  350. `net::error::eof`.
  351. @note Only valid after parsing a complete header.
  352. @param ec Set to the error, if any occurred.
  353. */
  354. void
  355. put_eof(error_code& ec);
  356. protected:
  357. /** Called after receiving the request-line.
  358. This virtual function is invoked after receiving a request-line
  359. when parsing HTTP requests.
  360. It can only be called when `isRequest == true`.
  361. @param method The verb enumeration. If the method string is not
  362. one of the predefined strings, this value will be @ref verb::unknown.
  363. @param method_str The unmodified string representing the verb.
  364. @param target The request-target.
  365. @param version The HTTP-version. This will be 10 for HTTP/1.0,
  366. and 11 for HTTP/1.1.
  367. @param ec An output parameter which the function may set to indicate
  368. an error. The error will be clear before this function is invoked.
  369. */
  370. virtual
  371. void
  372. on_request_impl(
  373. verb method,
  374. string_view method_str,
  375. string_view target,
  376. int version,
  377. error_code& ec) = 0;
  378. /** Called after receiving the status-line.
  379. This virtual function is invoked after receiving a status-line
  380. when parsing HTTP responses.
  381. It can only be called when `isRequest == false`.
  382. @param code The numeric status code.
  383. @param reason The reason-phrase. Note that this value is
  384. now obsolete, and only provided for historical or diagnostic
  385. purposes.
  386. @param version The HTTP-version. This will be 10 for HTTP/1.0,
  387. and 11 for HTTP/1.1.
  388. @param ec An output parameter which the function may set to indicate
  389. an error. The error will be clear before this function is invoked.
  390. */
  391. virtual
  392. void
  393. on_response_impl(
  394. int code,
  395. string_view reason,
  396. int version,
  397. error_code& ec) = 0;
  398. /** Called once for each complete field in the HTTP header.
  399. This virtual function is invoked for each field that is received
  400. while parsing an HTTP message.
  401. @param name The known field enum value. If the name of the field
  402. is not recognized, this value will be @ref field::unknown.
  403. @param name_string The exact name of the field as received from
  404. the input, represented as a string.
  405. @param value A string holding the value of the field.
  406. @param ec An output parameter which the function may set to indicate
  407. an error. The error will be clear before this function is invoked.
  408. */
  409. virtual
  410. void
  411. on_field_impl(
  412. field name,
  413. string_view name_string,
  414. string_view value,
  415. error_code& ec) = 0;
  416. /** Called once for each complete field in the HTTP trailer header.
  417. This virtual function is invoked for each field that is received
  418. while parsing the trailer part of a chunked HTTP message.
  419. @param name The known field enum value. If the name of the field
  420. is not recognized, this value will be @ref field::unknown.
  421. @param name_string The exact name of the field as received from
  422. the input, represented as a string.
  423. @param value A string holding the value of the field.
  424. @param ec An output parameter which the function may set to indicate
  425. an error. The error will be clear before this function is invoked.
  426. */
  427. virtual
  428. void
  429. on_trailer_field_impl(
  430. field name,
  431. string_view name_string,
  432. string_view value,
  433. error_code& ec) = 0;
  434. /** Called once after the complete HTTP header is received.
  435. This virtual function is invoked once, after the complete HTTP
  436. header is received while parsing a message.
  437. @param ec An output parameter which the function may set to indicate
  438. an error. The error will be clear before this function is invoked.
  439. */
  440. virtual
  441. void
  442. on_header_impl(error_code& ec) = 0;
  443. /** Called once before the body is processed.
  444. This virtual function is invoked once, before the content body is
  445. processed (but after the complete header is received).
  446. @param content_length A value representing the content length in
  447. bytes if the length is known (this can include a zero length).
  448. Otherwise, the value will be `boost::none`.
  449. @param ec An output parameter which the function may set to indicate
  450. an error. The error will be clear before this function is invoked.
  451. */
  452. virtual
  453. void
  454. on_body_init_impl(
  455. boost::optional<std::uint64_t> const& content_length,
  456. error_code& ec) = 0;
  457. /** Called each time additional data is received representing the content body.
  458. This virtual function is invoked for each piece of the body which is
  459. received while parsing of a message. This function is only used when
  460. no chunked transfer encoding is present.
  461. @param body A string holding the additional body contents. This may
  462. contain nulls or unprintable characters.
  463. @param ec An output parameter which the function may set to indicate
  464. an error. The error will be clear before this function is invoked.
  465. @see on_chunk_body_impl
  466. */
  467. virtual
  468. std::size_t
  469. on_body_impl(
  470. string_view body,
  471. error_code& ec) = 0;
  472. /** Called each time a new chunk header of a chunk encoded body is received.
  473. This function is invoked each time a new chunk header is received.
  474. The function is only used when the chunked transfer encoding is present.
  475. @param size The size of this chunk, in bytes.
  476. @param extensions A string containing the entire chunk extensions.
  477. This may be empty, indicating no extensions are present.
  478. @param ec An output parameter which the function may set to indicate
  479. an error. The error will be clear before this function is invoked.
  480. */
  481. virtual
  482. void
  483. on_chunk_header_impl(
  484. std::uint64_t size,
  485. string_view extensions,
  486. error_code& ec) = 0;
  487. /** Called each time additional data is received representing part of a body chunk.
  488. This virtual function is invoked for each piece of the body which is
  489. received while parsing of a message. This function is only used when
  490. no chunked transfer encoding is present.
  491. @param remain The number of bytes remaining in this chunk. This includes
  492. the contents of passed `body`. If this value is zero, then this represents
  493. the final chunk.
  494. @param body A string holding the additional body contents. This may
  495. contain nulls or unprintable characters.
  496. @param ec An output parameter which the function may set to indicate
  497. an error. The error will be clear before this function is invoked.
  498. @return This function should return the number of bytes actually consumed
  499. from the `body` value. Any bytes that are not consumed on this call
  500. will be presented in a subsequent call.
  501. @see on_body_impl
  502. */
  503. virtual
  504. std::size_t
  505. on_chunk_body_impl(
  506. std::uint64_t remain,
  507. string_view body,
  508. error_code& ec) = 0;
  509. /** Called once when the complete message is received.
  510. This virtual function is invoked once, after successfully parsing
  511. a complete HTTP message.
  512. @param ec An output parameter which the function may set to indicate
  513. an error. The error will be clear before this function is invoked.
  514. */
  515. virtual
  516. void
  517. on_finish_impl(error_code& ec) = 0;
  518. private:
  519. boost::optional<std::uint64_t>
  520. content_length_unchecked() const;
  521. template<class ConstBufferSequence>
  522. std::size_t
  523. put_from_stack(
  524. std::size_t size,
  525. ConstBufferSequence const& buffers,
  526. error_code& ec);
  527. void
  528. inner_parse_start_line(
  529. char const*& p, char const* last,
  530. error_code& ec, std::true_type);
  531. void
  532. inner_parse_start_line(
  533. char const*& p, char const* last,
  534. error_code& ec, std::false_type);
  535. void
  536. parse_start_line(
  537. char const*& p, std::size_t n,
  538. error_code& ec);
  539. void
  540. inner_parse_fields(
  541. char const*& p, char const* last,
  542. error_code& ec);
  543. void
  544. parse_fields(
  545. char const*& p, std::size_t n,
  546. error_code& ec);
  547. void
  548. finish_header(
  549. error_code& ec, std::true_type);
  550. void
  551. finish_header(
  552. error_code& ec, std::false_type);
  553. void
  554. parse_body(char const*& p,
  555. std::size_t n, error_code& ec);
  556. void
  557. parse_body_to_eof(char const*& p,
  558. std::size_t n, error_code& ec);
  559. void
  560. parse_chunk_header(char const*& p,
  561. std::size_t n, error_code& ec);
  562. void
  563. parse_chunk_body(char const*& p,
  564. std::size_t n, error_code& ec);
  565. void
  566. do_field(field f,
  567. string_view value, error_code& ec);
  568. };
  569. } // http
  570. } // beast
  571. } // boost
  572. #include <boost/beast/http/impl/basic_parser.hpp>
  573. #ifdef BOOST_BEAST_HEADER_ONLY
  574. #include <boost/beast/http/impl/basic_parser.ipp>
  575. #endif
  576. #endif