basic_parser.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. //
  2. // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. // Official repository: https://github.com/boostorg/beast
  8. //
  9. #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
  10. #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
  11. #include <boost/beast/core/detail/config.hpp>
  12. #include <boost/beast/core/error.hpp>
  13. #include <boost/beast/core/string.hpp>
  14. #include <boost/beast/http/field.hpp>
  15. #include <boost/beast/http/verb.hpp>
  16. #include <boost/beast/http/detail/basic_parser.hpp>
  17. #include <boost/asio/buffer.hpp>
  18. #include <boost/optional.hpp>
  19. #include <boost/assert.hpp>
  20. #include <limits>
  21. #include <memory>
  22. #include <type_traits>
  23. #include <utility>
  24. namespace boost {
  25. namespace beast {
  26. namespace http {
  27. /** A parser for decoding HTTP/1 wire format messages.
  28. This parser is designed to efficiently parse messages in the
  29. HTTP/1 wire format. It allocates no memory when input is
  30. presented as a single contiguous buffer, and uses minimal
  31. state. It will handle chunked encoding and it understands
  32. the semantics of the Connection, Content-Length, and Upgrade
  33. fields.
  34. The parser is optimized for the case where the input buffer
  35. sequence consists of a single contiguous buffer. The
  36. @ref flat_buffer class is provided, which guarantees
  37. that the input sequence of the stream buffer will be represented
  38. by exactly one contiguous buffer. To ensure the optimum performance
  39. of the parser, use @ref flat_buffer with HTTP algorithms
  40. such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
  41. Alternatively, the caller may use custom techniques to ensure that
  42. the structured portion of the HTTP message (header or chunk header)
  43. is contained in a linear buffer.
  44. The interface uses CRTP (Curiously Recurring Template Pattern).
  45. To use this class directly, derive from @ref basic_parser. When
  46. bytes are presented, the implementation will make a series of zero
  47. or more calls to derived class members functions (termed "callbacks"
  48. in this context) matching a specific signature.
  49. Every callback must be provided by the derived class, or else
  50. a compilation error will be generated. This exemplar shows
  51. the signature and description of the callbacks required in
  52. the derived class.
  53. For each callback, the function will ensure that `!ec` is `true`
  54. if there was no error or set to the appropriate error code if
  55. there was one. If an error is set, the value is propagated to
  56. the caller of the parser.
  57. @par Derived Class Requirements
  58. @code
  59. template<bool isRequest>
  60. class derived
  61. : public basic_parser<isRequest, derived<isRequest>>
  62. {
  63. private:
  64. // The friend declaration is needed,
  65. // otherwise the callbacks must be made public.
  66. friend class basic_parser<isRequest, derived>;
  67. /// Called after receiving the request-line (isRequest == true).
  68. void
  69. on_request_impl(
  70. verb method, // The method verb, verb::unknown if no match
  71. string_view method_str, // The method as a string
  72. string_view target, // The request-target
  73. int version, // The HTTP-version
  74. error_code& ec); // The error returned to the caller, if any
  75. /// Called after receiving the start-line (isRequest == false).
  76. void
  77. on_response_impl(
  78. int code, // The status-code
  79. string_view reason, // The obsolete reason-phrase
  80. int version, // The HTTP-version
  81. error_code& ec); // The error returned to the caller, if any
  82. /// Called after receiving a header field.
  83. void
  84. on_field_impl(
  85. field f, // The known-field enumeration constant
  86. string_view name, // The field name string.
  87. string_view value, // The field value
  88. error_code& ec); // The error returned to the caller, if any
  89. /// Called after the complete header is received.
  90. void
  91. on_header_impl(
  92. error_code& ec); // The error returned to the caller, if any
  93. /// Called just before processing the body, if a body exists.
  94. void
  95. on_body_init_impl(
  96. boost::optional<
  97. std::uint64_t> const&
  98. content_length, // Content length if known, else `boost::none`
  99. error_code& ec); // The error returned to the caller, if any
  100. /// Called for each piece of the body, if a body exists.
  101. //!
  102. //! This is used when there is no chunked transfer coding.
  103. //!
  104. //! The function returns the number of bytes consumed from the
  105. //! input buffer. Any input octets not consumed will be will be
  106. //! presented on subsequent calls.
  107. //!
  108. std::size_t
  109. on_body_impl(
  110. string_view s, // A portion of the body
  111. error_code& ec); // The error returned to the caller, if any
  112. /// Called for each chunk header.
  113. void
  114. on_chunk_header_impl(
  115. std::uint64_t size, // The size of the upcoming chunk,
  116. // or zero for the last chunk
  117. string_view extension, // The chunk extensions (may be empty)
  118. error_code& ec); // The error returned to the caller, if any
  119. /// Called to deliver the chunk body.
  120. //!
  121. //! This is used when there is a chunked transfer coding. The
  122. //! implementation will automatically remove the encoding before
  123. //! calling this function.
  124. //!
  125. //! The function returns the number of bytes consumed from the
  126. //! input buffer. Any input octets not consumed will be will be
  127. //! presented on subsequent calls.
  128. //!
  129. std::size_t
  130. on_chunk_body_impl(
  131. std::uint64_t remain, // The number of bytes remaining in the chunk,
  132. // including what is being passed here.
  133. // or zero for the last chunk
  134. string_view body, // The next piece of the chunk body
  135. error_code& ec); // The error returned to the caller, if any
  136. /// Called when the complete message is parsed.
  137. void
  138. on_finish_impl(error_code& ec);
  139. public:
  140. derived() = default;
  141. };
  142. @endcode
  143. @tparam isRequest A `bool` indicating whether the parser will be
  144. presented with request or response message.
  145. @tparam Derived The derived class type. This is part of the
  146. Curiously Recurring Template Pattern interface.
  147. @note If the parser encounters a field value with obs-fold
  148. longer than 4 kilobytes in length, an error is generated.
  149. */
  150. template<bool isRequest, class Derived>
  151. class basic_parser
  152. : private detail::basic_parser_base
  153. {
  154. template<bool OtherIsRequest, class OtherDerived>
  155. friend class basic_parser;
  156. // limit on the size of the stack flat buffer
  157. static std::size_t constexpr max_stack_buffer = 8192;
  158. // Message will be complete after reading header
  159. static unsigned constexpr flagSkipBody = 1<< 0;
  160. // Consume input buffers across semantic boundaries
  161. static unsigned constexpr flagEager = 1<< 1;
  162. // The parser has read at least one byte
  163. static unsigned constexpr flagGotSome = 1<< 2;
  164. // Message semantics indicate a body is expected.
  165. // cleared if flagSkipBody set
  166. //
  167. static unsigned constexpr flagHasBody = 1<< 3;
  168. static unsigned constexpr flagHTTP11 = 1<< 4;
  169. static unsigned constexpr flagNeedEOF = 1<< 5;
  170. static unsigned constexpr flagExpectCRLF = 1<< 6;
  171. static unsigned constexpr flagConnectionClose = 1<< 7;
  172. static unsigned constexpr flagConnectionUpgrade = 1<< 8;
  173. static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
  174. static unsigned constexpr flagContentLength = 1<< 10;
  175. static unsigned constexpr flagChunked = 1<< 11;
  176. static unsigned constexpr flagUpgrade = 1<< 12;
  177. static unsigned constexpr flagFinalChunk = 1<< 13;
  178. static constexpr
  179. std::uint64_t
  180. default_body_limit(std::true_type)
  181. {
  182. // limit for requests
  183. return 1 * 1024 * 1024; // 1MB
  184. }
  185. static constexpr
  186. std::uint64_t
  187. default_body_limit(std::false_type)
  188. {
  189. // limit for responses
  190. return 8 * 1024 * 1024; // 8MB
  191. }
  192. std::uint64_t body_limit_ =
  193. default_body_limit(is_request{}); // max payload body
  194. std::uint64_t len_ = 0; // size of chunk or body
  195. std::unique_ptr<char[]> buf_; // temp storage
  196. std::size_t buf_len_ = 0; // size of buf_
  197. std::size_t skip_ = 0; // resume search here
  198. std::uint32_t header_limit_ = 8192; // max header size
  199. unsigned short status_ = 0; // response status
  200. state state_ = state::nothing_yet; // initial state
  201. unsigned f_ = 0; // flags
  202. protected:
  203. /// Default constructor
  204. basic_parser() = default;
  205. /// Move constructor
  206. basic_parser(basic_parser &&) = default;
  207. /// Move assignment
  208. basic_parser& operator=(basic_parser &&) = default;
  209. /** Move constructor
  210. @note
  211. After the move, the only valid operation on the
  212. moved-from object is destruction.
  213. */
  214. template<class OtherDerived>
  215. basic_parser(basic_parser<isRequest, OtherDerived>&&);
  216. public:
  217. /// `true` if this parser parses requests, `false` for responses.
  218. using is_request =
  219. std::integral_constant<bool, isRequest>;
  220. /// Destructor
  221. ~basic_parser() = default;
  222. /// Copy constructor
  223. basic_parser(basic_parser const&) = delete;
  224. /// Copy assignment
  225. basic_parser& operator=(basic_parser const&) = delete;
  226. /** Returns a reference to this object as a `basic_parser`.
  227. This is used to pass a derived class where a base class is
  228. expected, to choose a correct function overload when the
  229. resolution would be ambiguous.
  230. */
  231. basic_parser&
  232. base()
  233. {
  234. return *this;
  235. }
  236. /** Returns a constant reference to this object as a `basic_parser`.
  237. This is used to pass a derived class where a base class is
  238. expected, to choose a correct function overload when the
  239. resolution would be ambiguous.
  240. */
  241. basic_parser const&
  242. base() const
  243. {
  244. return *this;
  245. }
  246. /// Returns `true` if the parser has received at least one byte of input.
  247. bool
  248. got_some() const
  249. {
  250. return state_ != state::nothing_yet;
  251. }
  252. /** Returns `true` if the message is complete.
  253. The message is complete after the full header is prduced
  254. and one of the following is true:
  255. @li The skip body option was set.
  256. @li The semantics of the message indicate there is no body.
  257. @li The semantics of the message indicate a body is expected,
  258. and the entire body was parsed.
  259. */
  260. bool
  261. is_done() const
  262. {
  263. return state_ == state::complete;
  264. }
  265. /** Returns `true` if a the parser has produced the full header.
  266. */
  267. bool
  268. is_header_done() const
  269. {
  270. return state_ > state::fields;
  271. }
  272. /** Returns `true` if the message is an upgrade message.
  273. @note The return value is undefined unless
  274. @ref is_header_done would return `true`.
  275. */
  276. bool
  277. upgrade() const
  278. {
  279. return (f_ & flagConnectionUpgrade) != 0;
  280. }
  281. /** Returns `true` if the last value for Transfer-Encoding is "chunked".
  282. @note The return value is undefined unless
  283. @ref is_header_done would return `true`.
  284. */
  285. bool
  286. chunked() const
  287. {
  288. return (f_ & flagChunked) != 0;
  289. }
  290. /** Returns `true` if the message has keep-alive connection semantics.
  291. This function always returns `false` if @ref need_eof would return
  292. `false`.
  293. @note The return value is undefined unless
  294. @ref is_header_done would return `true`.
  295. */
  296. bool
  297. keep_alive() const;
  298. /** Returns the optional value of Content-Length if known.
  299. @note The return value is undefined unless
  300. @ref is_header_done would return `true`.
  301. */
  302. boost::optional<std::uint64_t>
  303. content_length() const;
  304. /** Returns `true` if the message semantics require an end of file.
  305. Depending on the contents of the header, the parser may
  306. require and end of file notification to know where the end
  307. of the body lies. If this function returns `true` it will be
  308. necessary to call @ref put_eof when there will never be additional
  309. data from the input.
  310. */
  311. bool
  312. need_eof() const
  313. {
  314. return (f_ & flagNeedEOF) != 0;
  315. }
  316. /** Set the limit on the payload body.
  317. This function sets the maximum allowed size of the payload body,
  318. before any encodings except chunked have been removed. Depending
  319. on the message semantics, one of these cases will apply:
  320. @li The Content-Length is specified and exceeds the limit. In
  321. this case the result @ref error::body_limit is returned
  322. immediately after the header is parsed.
  323. @li The Content-Length is unspecified and the chunked encoding
  324. is not specified as the last encoding. In this case the end of
  325. message is determined by the end of file indicator on the
  326. associated stream or input source. If a sufficient number of
  327. body payload octets are presented to the parser to exceed the
  328. configured limit, the parse fails with the result
  329. @ref error::body_limit
  330. @li The Transfer-Encoding specifies the chunked encoding as the
  331. last encoding. In this case, when the number of payload body
  332. octets produced by removing the chunked encoding exceeds
  333. the configured limit, the parse fails with the result
  334. @ref error::body_limit.
  335. Setting the limit after any body octets have been parsed
  336. results in undefined behavior.
  337. The default limit is 1MB for requests and 8MB for responses.
  338. @param v The payload body limit to set
  339. */
  340. void
  341. body_limit(std::uint64_t v)
  342. {
  343. body_limit_ = v;
  344. }
  345. /** Set a limit on the total size of the header.
  346. This function sets the maximum allowed size of the header
  347. including all field name, value, and delimiter characters
  348. and also including the CRLF sequences in the serialized
  349. input. If the end of the header is not found within the
  350. limit of the header size, the error @ref error::header_limit
  351. is returned by @ref put.
  352. Setting the limit after any header octets have been parsed
  353. results in undefined behavior.
  354. */
  355. void
  356. header_limit(std::uint32_t v)
  357. {
  358. header_limit_ = v;
  359. }
  360. /// Returns `true` if the eager parse option is set.
  361. bool
  362. eager() const
  363. {
  364. return (f_ & flagEager) != 0;
  365. }
  366. /** Set the eager parse option.
  367. Normally the parser returns after successfully parsing a structured
  368. element (header, chunk header, or chunk body) even if there are octets
  369. remaining in the input. This is necessary when attempting to parse the
  370. header first, or when the caller wants to inspect information which may
  371. be invalidated by subsequent parsing, such as a chunk extension. The
  372. `eager` option controls whether the parser keeps going after parsing
  373. structured element if there are octets remaining in the buffer and no
  374. error occurs. This option is automatically set or cleared during certain
  375. stream operations to improve performance with no change in functionality.
  376. The default setting is `false`.
  377. @param v `true` to set the eager parse option or `false` to disable it.
  378. */
  379. void
  380. eager(bool v)
  381. {
  382. if(v)
  383. f_ |= flagEager;
  384. else
  385. f_ &= ~flagEager;
  386. }
  387. /// Returns `true` if the skip parse option is set.
  388. bool
  389. skip() const
  390. {
  391. return (f_ & flagSkipBody) != 0;
  392. }
  393. /** Set the skip parse option.
  394. This option controls whether or not the parser expects to see an HTTP
  395. body, regardless of the presence or absence of certain fields such as
  396. Content-Length or a chunked Transfer-Encoding. Depending on the request,
  397. some responses do not carry a body. For example, a 200 response to a
  398. CONNECT request from a tunneling proxy, or a response to a HEAD request.
  399. In these cases, callers may use this function inform the parser that
  400. no body is expected. The parser will consider the message complete
  401. after the header has been received.
  402. @param v `true` to set the skip body option or `false` to disable it.
  403. @note This function must called before any bytes are processed.
  404. */
  405. void
  406. skip(bool v);
  407. /** Write a buffer sequence to the parser.
  408. This function attempts to incrementally parse the HTTP
  409. message data stored in the caller provided buffers. Upon
  410. success, a positive return value indicates that the parser
  411. made forward progress, consuming that number of
  412. bytes.
  413. In some cases there may be an insufficient number of octets
  414. in the input buffer in order to make forward progress. This
  415. is indicated by the code @ref error::need_more. When
  416. this happens, the caller should place additional bytes into
  417. the buffer sequence and call @ref put again.
  418. The error code @ref error::need_more is special. When this
  419. error is returned, a subsequent call to @ref put may succeed
  420. if the buffers have been updated. Otherwise, upon error
  421. the parser may not be restarted.
  422. @param buffers An object meeting the requirements of
  423. @b ConstBufferSequence that represents the next chunk of
  424. message data. If the length of this buffer sequence is
  425. one, the implementation will not allocate additional memory.
  426. The class @ref beast::flat_buffer is provided as one way to
  427. meet this requirement
  428. @param ec Set to the error, if any occurred.
  429. @return The number of octets consumed in the buffer
  430. sequence. The caller should remove these octets even if the
  431. error is set.
  432. */
  433. template<class ConstBufferSequence>
  434. std::size_t
  435. put(ConstBufferSequence const& buffers, error_code& ec);
  436. #if ! BOOST_BEAST_DOXYGEN
  437. std::size_t
  438. put(boost::asio::const_buffer const& buffer,
  439. error_code& ec);
  440. #endif
  441. /** Inform the parser that the end of stream was reached.
  442. In certain cases, HTTP needs to know where the end of
  443. the stream is. For example, sometimes servers send
  444. responses without Content-Length and expect the client
  445. to consume input (for the body) until EOF. Callbacks
  446. and errors will still be processed as usual.
  447. This is typically called when a read from the
  448. underlying stream object sets the error code to
  449. `boost::asio::error::eof`.
  450. @note Only valid after parsing a complete header.
  451. @param ec Set to the error, if any occurred.
  452. */
  453. void
  454. put_eof(error_code& ec);
  455. private:
  456. inline
  457. Derived&
  458. impl()
  459. {
  460. return *static_cast<Derived*>(this);
  461. }
  462. template<class ConstBufferSequence>
  463. std::size_t
  464. put_from_stack(std::size_t size,
  465. ConstBufferSequence const& buffers,
  466. error_code& ec);
  467. void
  468. maybe_need_more(
  469. char const* p, std::size_t n,
  470. error_code& ec);
  471. void
  472. parse_start_line(
  473. char const*& p, char const* last,
  474. error_code& ec, std::true_type);
  475. void
  476. parse_start_line(
  477. char const*& p, char const* last,
  478. error_code& ec, std::false_type);
  479. void
  480. parse_fields(
  481. char const*& p, char const* last,
  482. error_code& ec);
  483. void
  484. finish_header(
  485. error_code& ec, std::true_type);
  486. void
  487. finish_header(
  488. error_code& ec, std::false_type);
  489. void
  490. parse_body(char const*& p,
  491. std::size_t n, error_code& ec);
  492. void
  493. parse_body_to_eof(char const*& p,
  494. std::size_t n, error_code& ec);
  495. void
  496. parse_chunk_header(char const*& p,
  497. std::size_t n, error_code& ec);
  498. void
  499. parse_chunk_body(char const*& p,
  500. std::size_t n, error_code& ec);
  501. void
  502. do_field(field f,
  503. string_view value, error_code& ec);
  504. };
  505. } // http
  506. } // beast
  507. } // boost
  508. #include <boost/beast/http/impl/basic_parser.ipp>
  509. #endif