encode.hpp 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. // Official repository: https://github.com/boostorg/url
  8. //
  9. #ifndef BOOST_URL_IMPL_ENCODE_HPP
  10. #define BOOST_URL_IMPL_ENCODE_HPP
  11. #include "boost/url/grammar/token_rule.hpp"
  12. #include <boost/assert.hpp>
  13. #include <boost/core/detail/static_assert.hpp>
  14. #include <boost/url/detail/encode.hpp>
  15. #include <boost/url/detail/except.hpp>
  16. #include <boost/url/encoding_opts.hpp>
  17. #include <boost/url/grammar/charset.hpp>
  18. #include <boost/url/grammar/hexdig_chars.hpp>
  19. #include <boost/url/grammar/string_token.hpp>
  20. #include <boost/url/grammar/type_traits.hpp>
  21. namespace boost {
  22. namespace urls {
  23. //------------------------------------------------
  24. template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
  25. std::size_t
  26. encoded_size(
  27. core::string_view s,
  28. CS const& allowed,
  29. encoding_opts opt) noexcept
  30. {
  31. /*
  32. If you get a compilation error here, it
  33. means that the value you passed does
  34. not meet the requirements stated in
  35. the documentation.
  36. */
  37. BOOST_CORE_STATIC_ASSERT(
  38. grammar::is_charset<CS>::value);
  39. std::size_t n = 0;
  40. auto it = s.data();
  41. auto const last = it + s.size();
  42. if (!opt.space_as_plus)
  43. {
  44. while (it != last)
  45. {
  46. char const c = *it;
  47. if (allowed(c))
  48. {
  49. ++n;
  50. }
  51. else
  52. {
  53. n += 3;
  54. }
  55. ++it;
  56. }
  57. }
  58. else
  59. {
  60. // '+' is always encoded (thus
  61. // spending 3 chars) even if
  62. // allowed because "%2B" and
  63. // "+" have different meanings
  64. // when space as plus is enabled
  65. using FNT = bool (*)(CS const& allowed, char);
  66. FNT takes_one_char =
  67. allowed('+') ?
  68. (allowed(' ') ?
  69. FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
  70. FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
  71. (allowed(' ') ?
  72. FNT([](CS const& allowed, char c){ return allowed(c); }) :
  73. FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
  74. while (it != last)
  75. {
  76. char const c = *it;
  77. if (takes_one_char(allowed, c))
  78. {
  79. ++n;
  80. }
  81. else
  82. {
  83. n += 3;
  84. }
  85. ++it;
  86. }
  87. }
  88. return n;
  89. }
  90. //------------------------------------------------
  91. template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
  92. std::size_t
  93. encode(
  94. char* dest,
  95. std::size_t size,
  96. core::string_view s,
  97. CS const& allowed,
  98. encoding_opts opt)
  99. {
  100. /* If you get a compilation error here, it
  101. means that the value you passed does
  102. not meet the requirements stated in
  103. the documentation.
  104. */
  105. BOOST_CORE_STATIC_ASSERT(
  106. grammar::is_charset<CS>::value);
  107. // '%' must be reserved
  108. BOOST_ASSERT(!allowed('%'));
  109. char const* const hex =
  110. detail::hexdigs[opt.lower_case];
  111. auto const encode = [hex](
  112. char*& dest,
  113. unsigned char c) noexcept
  114. {
  115. *dest++ = '%';
  116. *dest++ = hex[c>>4];
  117. *dest++ = hex[c&0xf];
  118. };
  119. auto it = s.data();
  120. auto const end = dest + size;
  121. auto const last = it + s.size();
  122. auto const dest0 = dest;
  123. auto const end3 = end - 3;
  124. if (!opt.space_as_plus)
  125. {
  126. while(it != last)
  127. {
  128. char const c = *it;
  129. if (allowed(c))
  130. {
  131. if(dest == end)
  132. return dest - dest0;
  133. *dest++ = c;
  134. ++it;
  135. continue;
  136. }
  137. if (dest > end3)
  138. return dest - dest0;
  139. encode(dest, c);
  140. ++it;
  141. }
  142. return dest - dest0;
  143. }
  144. else
  145. {
  146. while (it != last)
  147. {
  148. char const c = *it;
  149. if (c == ' ')
  150. {
  151. if(dest == end)
  152. return dest - dest0;
  153. *dest++ = '+';
  154. ++it;
  155. continue;
  156. }
  157. else if (
  158. allowed(c) &&
  159. c != '+')
  160. {
  161. if(dest == end)
  162. return dest - dest0;
  163. *dest++ = c;
  164. ++it;
  165. continue;
  166. }
  167. if(dest > end3)
  168. return dest - dest0;
  169. encode(dest, c);
  170. ++it;
  171. }
  172. }
  173. return dest - dest0;
  174. }
  175. //------------------------------------------------
  176. // unsafe encode just
  177. // asserts on the output buffer
  178. //
  179. template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
  180. std::size_t
  181. encode_unsafe(
  182. char* dest,
  183. std::size_t size,
  184. core::string_view s,
  185. CS const& allowed,
  186. encoding_opts opt)
  187. {
  188. BOOST_CORE_STATIC_ASSERT(
  189. grammar::is_charset<CS>::value);
  190. // '%' must be reserved
  191. BOOST_ASSERT(!allowed('%'));
  192. auto it = s.data();
  193. auto const last = it + s.size();
  194. auto const end = dest + size;
  195. ignore_unused(end);
  196. char const* const hex =
  197. detail::hexdigs[opt.lower_case];
  198. auto const encode = [end, hex](
  199. char*& dest,
  200. unsigned char c) noexcept
  201. {
  202. ignore_unused(end);
  203. *dest++ = '%';
  204. BOOST_ASSERT(dest != end);
  205. *dest++ = hex[c>>4];
  206. BOOST_ASSERT(dest != end);
  207. *dest++ = hex[c&0xf];
  208. };
  209. auto const dest0 = dest;
  210. if (!opt.space_as_plus)
  211. {
  212. while(it != last)
  213. {
  214. BOOST_ASSERT(dest != end);
  215. char const c = *it;
  216. if(allowed(c))
  217. {
  218. *dest++ = c;
  219. }
  220. else
  221. {
  222. encode(dest, c);
  223. }
  224. ++it;
  225. }
  226. }
  227. else
  228. {
  229. while(it != last)
  230. {
  231. BOOST_ASSERT(dest != end);
  232. char const c = *it;
  233. if (c == ' ')
  234. {
  235. *dest++ = '+';
  236. }
  237. else if (
  238. allowed(c) &&
  239. c != '+')
  240. {
  241. *dest++ = c;
  242. }
  243. else
  244. {
  245. encode(dest, c);
  246. }
  247. ++it;
  248. }
  249. }
  250. return dest - dest0;
  251. }
  252. //------------------------------------------------
  253. template<
  254. BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken,
  255. BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
  256. BOOST_URL_STRTOK_RETURN
  257. encode(
  258. core::string_view s,
  259. CS const& allowed,
  260. encoding_opts opt,
  261. StringToken&& token) noexcept
  262. {
  263. BOOST_CORE_STATIC_ASSERT(
  264. grammar::is_charset<CS>::value);
  265. auto const n = encoded_size(
  266. s, allowed, opt);
  267. auto p = token.prepare(n);
  268. if(n > 0)
  269. encode_unsafe(
  270. p, n, s, allowed, opt);
  271. return token.result();
  272. }
  273. } // urls
  274. } // boost
  275. #endif