lut_chars.hpp 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. //
  2. // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. // Official repository: https://github.com/boostorg/url
  8. //
  9. #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
  10. #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
  11. #include <boost/url/detail/config.hpp>
  12. #include <boost/url/grammar/detail/charset.hpp>
  13. #include <cstdint>
  14. #include <type_traits>
  15. // Credit to Peter Dimov for ideas regarding
  16. // SIMD constexpr, and character set masks.
  17. namespace boost {
  18. namespace urls {
  19. namespace grammar {
  20. #ifndef BOOST_URL_DOCS
  21. namespace detail {
  22. template<class T, class = void>
  23. struct is_pred : std::false_type {};
  24. template<class T>
  25. struct is_pred<T, void_t<
  26. decltype(
  27. std::declval<bool&>() =
  28. std::declval<T const&>().operator()(
  29. std::declval<char>())
  30. ) > > : std::true_type
  31. {
  32. };
  33. } // detail
  34. #endif
  35. /** A set of characters
  36. The characters defined by instances of
  37. this set are provided upon construction.
  38. The `constexpr` implementation allows
  39. these to become compile-time constants.
  40. @par Example
  41. Character sets are used with rules and the
  42. functions @ref find_if and @ref find_if_not.
  43. @code
  44. constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
  45. system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
  46. @endcode
  47. @see
  48. @ref find_if,
  49. @ref find_if_not,
  50. @ref parse,
  51. @ref token_rule.
  52. */
  53. class lut_chars
  54. {
  55. std::uint64_t mask_[4] = {};
  56. constexpr
  57. static
  58. std::uint64_t
  59. lo(char c) noexcept
  60. {
  61. return static_cast<
  62. unsigned char>(c) & 3;
  63. }
  64. constexpr
  65. static
  66. std::uint64_t
  67. hi(char c) noexcept
  68. {
  69. return 1ULL << (static_cast<
  70. unsigned char>(c) >> 2);
  71. }
  72. constexpr
  73. static
  74. lut_chars
  75. construct(
  76. char const* s) noexcept
  77. {
  78. return *s
  79. ? lut_chars(*s) +
  80. construct(s+1)
  81. : lut_chars();
  82. }
  83. constexpr
  84. static
  85. lut_chars
  86. construct(
  87. unsigned char ch,
  88. bool b) noexcept
  89. {
  90. return b
  91. ? lut_chars(ch)
  92. : lut_chars();
  93. }
  94. template<class Pred>
  95. constexpr
  96. static
  97. lut_chars
  98. construct(
  99. Pred pred,
  100. unsigned char ch) noexcept
  101. {
  102. return ch == 255
  103. ? construct(ch, pred(static_cast<char>(ch)))
  104. : construct(ch, pred(static_cast<char>(ch))) +
  105. construct(pred, ch + 1);
  106. }
  107. constexpr
  108. lut_chars() = default;
  109. constexpr
  110. lut_chars(
  111. std::uint64_t m0,
  112. std::uint64_t m1,
  113. std::uint64_t m2,
  114. std::uint64_t m3) noexcept
  115. : mask_{ m0, m1, m2, m3 }
  116. {
  117. }
  118. public:
  119. /** Constructor
  120. This function constructs a character
  121. set which has as a single member,
  122. the character `ch`.
  123. @par Example
  124. @code
  125. constexpr lut_chars asterisk( '*' );
  126. @endcode
  127. @par Complexity
  128. Constant.
  129. @par Exception Safety
  130. Throws nothing.
  131. @param ch A character.
  132. */
  133. constexpr
  134. lut_chars(char ch) noexcept
  135. : mask_ {
  136. lo(ch) == 0 ? hi(ch) : 0,
  137. lo(ch) == 1 ? hi(ch) : 0,
  138. lo(ch) == 2 ? hi(ch) : 0,
  139. lo(ch) == 3 ? hi(ch) : 0 }
  140. {
  141. }
  142. /** Constructor
  143. This function constructs a character
  144. set which has as members, all of the
  145. characters present in the null-terminated
  146. string `s`.
  147. @par Example
  148. @code
  149. constexpr lut_chars digits = "0123456789";
  150. @endcode
  151. @par Complexity
  152. Linear in `::strlen(s)`, or constant
  153. if `s` is a constant expression.
  154. @par Exception Safety
  155. Throws nothing.
  156. @param s A null-terminated string.
  157. */
  158. constexpr
  159. lut_chars(
  160. char const* s) noexcept
  161. : lut_chars(construct(s))
  162. {
  163. }
  164. /** Constructor.
  165. This function constructs a character
  166. set which has as members, every value
  167. of `char ch` for which the expression
  168. `pred(ch)` returns `true`.
  169. @par Example
  170. @code
  171. struct is_digit
  172. {
  173. constexpr bool
  174. operator()(char c ) const noexcept
  175. {
  176. return c >= '0' && c <= '9';
  177. }
  178. };
  179. constexpr lut_chars digits( is_digit{} );
  180. @endcode
  181. @par Complexity
  182. Linear in `pred`, or constant if
  183. `pred(ch)` is a constant expression.
  184. @par Exception Safety
  185. Throws nothing.
  186. @param pred The function object to
  187. use for determining membership in
  188. the character set.
  189. */
  190. template<class Pred
  191. #ifndef BOOST_URL_DOCS
  192. ,class = typename std::enable_if<
  193. detail::is_pred<Pred>::value &&
  194. ! std::is_base_of<
  195. lut_chars, Pred>::value>::type
  196. #endif
  197. >
  198. constexpr
  199. lut_chars(Pred const& pred) noexcept
  200. : lut_chars(
  201. construct(pred, 0))
  202. {
  203. }
  204. /** Return true if ch is in the character set.
  205. This function returns true if the
  206. character `ch` is in the set, otherwise
  207. it returns false.
  208. @par Complexity
  209. Constant.
  210. @par Exception Safety
  211. Throws nothing.
  212. @param ch The character to test.
  213. @return `true` if `ch` is in the set.
  214. */
  215. constexpr
  216. bool
  217. operator()(
  218. unsigned char ch) const noexcept
  219. {
  220. return operator()(static_cast<char>(ch));
  221. }
  222. /// @copydoc operator()(unsigned char) const
  223. constexpr
  224. bool
  225. operator()(char ch) const noexcept
  226. {
  227. return mask_[lo(ch)] & hi(ch);
  228. }
  229. /** Return the union of two character sets.
  230. This function returns a new character
  231. set which contains all of the characters
  232. in `cs0` as well as all of the characters
  233. in `cs`.
  234. @par Example
  235. This creates a character set which
  236. includes all letters and numbers
  237. @code
  238. constexpr lut_chars alpha_chars(
  239. "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  240. "abcdefghijklmnopqrstuvwxyz");
  241. constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
  242. @endcode
  243. @par Complexity
  244. Constant.
  245. @return The new character set.
  246. @param cs0 A character to join
  247. @param cs1 A character to join
  248. */
  249. friend
  250. constexpr
  251. lut_chars
  252. operator+(
  253. lut_chars const& cs0,
  254. lut_chars const& cs1) noexcept
  255. {
  256. return lut_chars(
  257. cs0.mask_[0] | cs1.mask_[0],
  258. cs0.mask_[1] | cs1.mask_[1],
  259. cs0.mask_[2] | cs1.mask_[2],
  260. cs0.mask_[3] | cs1.mask_[3]);
  261. }
  262. /** Return a new character set by subtracting
  263. This function returns a new character
  264. set which is formed from all of the
  265. characters in `cs0` which are not in `cs`.
  266. @par Example
  267. This statement declares a character set
  268. containing all the lowercase letters
  269. which are not vowels:
  270. @code
  271. constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
  272. @endcode
  273. @par Complexity
  274. Constant.
  275. @return The new character set.
  276. @param cs0 A character set to join.
  277. @param cs1 A character set to join.
  278. */
  279. friend
  280. constexpr
  281. lut_chars
  282. operator-(
  283. lut_chars const& cs0,
  284. lut_chars const& cs1) noexcept
  285. {
  286. return lut_chars(
  287. cs0.mask_[0] & ~cs1.mask_[0],
  288. cs0.mask_[1] & ~cs1.mask_[1],
  289. cs0.mask_[2] & ~cs1.mask_[2],
  290. cs0.mask_[3] & ~cs1.mask_[3]);
  291. }
  292. /** Return a new character set which is the complement of another character set.
  293. This function returns a new character
  294. set which contains all of the characters
  295. that are not in `*this`.
  296. @par Example
  297. This statement declares a character set
  298. containing everything but vowels:
  299. @code
  300. constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
  301. @endcode
  302. @par Complexity
  303. Constant.
  304. @par Exception Safety
  305. Throws nothing.
  306. @return The new character set.
  307. */
  308. constexpr
  309. lut_chars
  310. operator~() const noexcept
  311. {
  312. return lut_chars(
  313. ~mask_[0],
  314. ~mask_[1],
  315. ~mask_[2],
  316. ~mask_[3]
  317. );
  318. }
  319. #ifndef BOOST_URL_DOCS
  320. #ifdef BOOST_URL_USE_SSE2
  321. char const*
  322. find_if(
  323. char const* first,
  324. char const* last) const noexcept
  325. {
  326. return detail::find_if_pred(
  327. *this, first, last);
  328. }
  329. char const*
  330. find_if_not(
  331. char const* first,
  332. char const* last) const noexcept
  333. {
  334. return detail::find_if_not_pred(
  335. *this, first, last);
  336. }
  337. #endif
  338. #endif
  339. };
  340. } // grammar
  341. } // urls
  342. } // boost
  343. #endif