c_regex_traits.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE c_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
  16. */
  17. #ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_C_REGEX_TRAITS_HPP_INCLUDED
  19. #ifndef BOOST_REGEX_AS_MODULE
  20. #include <cctype>
  21. #include <cstdint>
  22. #include <cwctype>
  23. #endif
  24. #include <boost/regex/config.hpp>
  25. #include <boost/regex/v5/regex_workaround.hpp>
  26. #include <boost/regex/v5/primary_transform.hpp>
  27. #include <boost/regex/v5/regex_traits_defaults.hpp>
  28. namespace boost{
  29. namespace BOOST_REGEX_DETAIL_NS {
  30. enum
  31. {
  32. char_class_space = 1 << 0,
  33. char_class_print = 1 << 1,
  34. char_class_cntrl = 1 << 2,
  35. char_class_upper = 1 << 3,
  36. char_class_lower = 1 << 4,
  37. char_class_alpha = 1 << 5,
  38. char_class_digit = 1 << 6,
  39. char_class_punct = 1 << 7,
  40. char_class_xdigit = 1 << 8,
  41. char_class_alnum = char_class_alpha | char_class_digit,
  42. char_class_graph = char_class_alnum | char_class_punct,
  43. char_class_blank = 1 << 9,
  44. char_class_word = 1 << 10,
  45. char_class_unicode = 1 << 11,
  46. char_class_horizontal = 1 << 12,
  47. char_class_vertical = 1 << 13
  48. };
  49. }
  50. BOOST_REGEX_MODULE_EXPORT template <class charT>
  51. struct c_regex_traits;
  52. BOOST_REGEX_MODULE_EXPORT template<>
  53. struct c_regex_traits<char>
  54. {
  55. c_regex_traits(){}
  56. typedef char char_type;
  57. typedef std::size_t size_type;
  58. typedef std::string string_type;
  59. struct locale_type{};
  60. typedef std::uint32_t char_class_type;
  61. static size_type length(const char_type* p)
  62. {
  63. return (std::strlen)(p);
  64. }
  65. char translate(char c) const
  66. {
  67. return c;
  68. }
  69. char translate_nocase(char c) const
  70. {
  71. return static_cast<char>((std::tolower)(static_cast<unsigned char>(c)));
  72. }
  73. static string_type transform(const char* p1, const char* p2);
  74. static string_type transform_primary(const char* p1, const char* p2);
  75. static char_class_type lookup_classname(const char* p1, const char* p2);
  76. static string_type lookup_collatename(const char* p1, const char* p2);
  77. static bool isctype(char, char_class_type);
  78. static int value(char, int);
  79. locale_type imbue(locale_type l)
  80. { return l; }
  81. locale_type getloc()const
  82. { return locale_type(); }
  83. private:
  84. // this type is not copyable:
  85. c_regex_traits(const c_regex_traits&);
  86. c_regex_traits& operator=(const c_regex_traits&);
  87. };
  88. #ifndef BOOST_NO_WREGEX
  89. BOOST_REGEX_MODULE_EXPORT template<>
  90. struct c_regex_traits<wchar_t>
  91. {
  92. c_regex_traits(){}
  93. typedef wchar_t char_type;
  94. typedef std::size_t size_type;
  95. typedef std::wstring string_type;
  96. struct locale_type{};
  97. typedef std::uint32_t char_class_type;
  98. static size_type length(const char_type* p)
  99. {
  100. return (std::wcslen)(p);
  101. }
  102. wchar_t translate(wchar_t c) const
  103. {
  104. return c;
  105. }
  106. wchar_t translate_nocase(wchar_t c) const
  107. {
  108. return (std::towlower)(c);
  109. }
  110. static string_type transform(const wchar_t* p1, const wchar_t* p2);
  111. static string_type transform_primary(const wchar_t* p1, const wchar_t* p2);
  112. static char_class_type lookup_classname(const wchar_t* p1, const wchar_t* p2);
  113. static string_type lookup_collatename(const wchar_t* p1, const wchar_t* p2);
  114. static bool isctype(wchar_t, char_class_type);
  115. static int value(wchar_t, int);
  116. locale_type imbue(locale_type l)
  117. { return l; }
  118. locale_type getloc()const
  119. { return locale_type(); }
  120. private:
  121. // this type is not copyable:
  122. c_regex_traits(const c_regex_traits&);
  123. c_regex_traits& operator=(const c_regex_traits&);
  124. };
  125. #endif // BOOST_NO_WREGEX
  126. inline c_regex_traits<char>::string_type c_regex_traits<char>::transform(const char* p1, const char* p2)
  127. {
  128. std::string result(10, ' ');
  129. std::size_t s = result.size();
  130. std::size_t r;
  131. std::string src(p1, p2);
  132. while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
  133. {
  134. #if defined(_CPPLIB_VER)
  135. //
  136. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  137. // to std::strxfrm, but only for certain locales :-(
  138. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  139. //
  140. if (r == INT_MAX)
  141. {
  142. result.erase();
  143. result.insert(result.begin(), static_cast<char>(0));
  144. return result;
  145. }
  146. #endif
  147. result.append(r - s + 3, ' ');
  148. s = result.size();
  149. }
  150. result.erase(r);
  151. return result;
  152. }
  153. inline c_regex_traits<char>::string_type c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
  154. {
  155. static char s_delim;
  156. static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
  157. std::string result;
  158. //
  159. // What we do here depends upon the format of the sort key returned by
  160. // sort key returned by this->transform:
  161. //
  162. switch (s_collate_type)
  163. {
  164. case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
  165. case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
  166. // the best we can do is translate to lower case, then get a regular sort key:
  167. {
  168. result.assign(p1, p2);
  169. for (std::string::size_type i = 0; i < result.size(); ++i)
  170. result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
  171. result = transform(&*result.begin(), &*result.begin() + result.size());
  172. break;
  173. }
  174. case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
  175. {
  176. // get a regular sort key, and then truncate it:
  177. result = transform(p1, p2);
  178. result.erase(s_delim);
  179. break;
  180. }
  181. case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
  182. // get a regular sort key, and then truncate everything after the delim:
  183. result = transform(p1, p2);
  184. if ((!result.empty()) && (result[0] == s_delim))
  185. break;
  186. std::size_t i;
  187. for (i = 0; i < result.size(); ++i)
  188. {
  189. if (result[i] == s_delim)
  190. break;
  191. }
  192. result.erase(i);
  193. break;
  194. }
  195. if (result.empty())
  196. result = std::string(1, char(0));
  197. return result;
  198. }
  199. inline c_regex_traits<char>::char_class_type c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
  200. {
  201. using namespace BOOST_REGEX_DETAIL_NS;
  202. static const char_class_type masks[] =
  203. {
  204. 0,
  205. char_class_alnum,
  206. char_class_alpha,
  207. char_class_blank,
  208. char_class_cntrl,
  209. char_class_digit,
  210. char_class_digit,
  211. char_class_graph,
  212. char_class_horizontal,
  213. char_class_lower,
  214. char_class_lower,
  215. char_class_print,
  216. char_class_punct,
  217. char_class_space,
  218. char_class_space,
  219. char_class_upper,
  220. char_class_unicode,
  221. char_class_upper,
  222. char_class_vertical,
  223. char_class_alnum | char_class_word,
  224. char_class_alnum | char_class_word,
  225. char_class_xdigit,
  226. };
  227. int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  228. if (idx < 0)
  229. {
  230. std::string s(p1, p2);
  231. for (std::string::size_type i = 0; i < s.size(); ++i)
  232. s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
  233. idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  234. }
  235. BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
  236. return masks[idx + 1];
  237. }
  238. inline bool c_regex_traits<char>::isctype(char c, char_class_type mask)
  239. {
  240. using namespace BOOST_REGEX_DETAIL_NS;
  241. return
  242. ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
  243. || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
  244. || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
  245. || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
  246. || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
  247. || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
  248. || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
  249. || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
  250. || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
  251. || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
  252. || ((mask & char_class_word) && (c == '_'))
  253. || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
  254. || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
  255. }
  256. inline c_regex_traits<char>::string_type c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
  257. {
  258. std::string s(p1, p2);
  259. s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
  260. if (s.empty() && (p2 - p1 == 1))
  261. s.append(1, *p1);
  262. return s;
  263. }
  264. inline int c_regex_traits<char>::value(char c, int radix)
  265. {
  266. char b[2] = { c, '\0', };
  267. char* ep;
  268. int result = std::strtol(b, &ep, radix);
  269. if (ep == b)
  270. return -1;
  271. return result;
  272. }
  273. #ifndef BOOST_NO_WREGEX
  274. inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
  275. {
  276. std::size_t r;
  277. std::size_t s = 10;
  278. std::wstring src(p1, p2);
  279. std::wstring result(s, L' ');
  280. while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
  281. {
  282. #if defined(_CPPLIB_VER)
  283. //
  284. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  285. // to std::strxfrm, but only for certain locales :-(
  286. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  287. //
  288. if (r == INT_MAX)
  289. {
  290. result.erase();
  291. result.insert(result.begin(), static_cast<wchar_t>(0));
  292. return result;
  293. }
  294. #endif
  295. result.append(r - s + 3, L' ');
  296. s = result.size();
  297. }
  298. result.erase(r);
  299. return result;
  300. }
  301. inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
  302. {
  303. static wchar_t s_delim;
  304. static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
  305. std::wstring result;
  306. //
  307. // What we do here depends upon the format of the sort key returned by
  308. // sort key returned by this->transform:
  309. //
  310. switch (s_collate_type)
  311. {
  312. case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
  313. case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
  314. // the best we can do is translate to lower case, then get a regular sort key:
  315. {
  316. result.assign(p1, p2);
  317. for (std::wstring::size_type i = 0; i < result.size(); ++i)
  318. result[i] = (std::towlower)(result[i]);
  319. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  320. break;
  321. }
  322. case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
  323. {
  324. // get a regular sort key, and then truncate it:
  325. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  326. result.erase(s_delim);
  327. break;
  328. }
  329. case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
  330. // get a regular sort key, and then truncate everything after the delim:
  331. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  332. if ((!result.empty()) && (result[0] == s_delim))
  333. break;
  334. std::size_t i;
  335. for (i = 0; i < result.size(); ++i)
  336. {
  337. if (result[i] == s_delim)
  338. break;
  339. }
  340. result.erase(i);
  341. break;
  342. }
  343. if (result.empty())
  344. result = std::wstring(1, char(0));
  345. return result;
  346. }
  347. inline c_regex_traits<wchar_t>::char_class_type c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
  348. {
  349. using namespace BOOST_REGEX_DETAIL_NS;
  350. static const char_class_type masks[] =
  351. {
  352. 0,
  353. char_class_alnum,
  354. char_class_alpha,
  355. char_class_blank,
  356. char_class_cntrl,
  357. char_class_digit,
  358. char_class_digit,
  359. char_class_graph,
  360. char_class_horizontal,
  361. char_class_lower,
  362. char_class_lower,
  363. char_class_print,
  364. char_class_punct,
  365. char_class_space,
  366. char_class_space,
  367. char_class_upper,
  368. char_class_unicode,
  369. char_class_upper,
  370. char_class_vertical,
  371. char_class_alnum | char_class_word,
  372. char_class_alnum | char_class_word,
  373. char_class_xdigit,
  374. };
  375. int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  376. if (idx < 0)
  377. {
  378. std::wstring s(p1, p2);
  379. for (std::wstring::size_type i = 0; i < s.size(); ++i)
  380. s[i] = (std::towlower)(s[i]);
  381. idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  382. }
  383. BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
  384. return masks[idx + 1];
  385. }
  386. inline bool c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
  387. {
  388. using namespace BOOST_REGEX_DETAIL_NS;
  389. return
  390. ((mask & char_class_space) && (std::iswspace)(c))
  391. || ((mask & char_class_print) && (std::iswprint)(c))
  392. || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
  393. || ((mask & char_class_upper) && (std::iswupper)(c))
  394. || ((mask & char_class_lower) && (std::iswlower)(c))
  395. || ((mask & char_class_alpha) && (std::iswalpha)(c))
  396. || ((mask & char_class_digit) && (std::iswdigit)(c))
  397. || ((mask & char_class_punct) && (std::iswpunct)(c))
  398. || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
  399. || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
  400. || ((mask & char_class_word) && (c == '_'))
  401. || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
  402. || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
  403. || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
  404. }
  405. inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
  406. {
  407. std::string name;
  408. // Usual msvc warning suppression does not work here with std::string template constructor.... use a workaround instead:
  409. for (const wchar_t* pos = p1; pos != p2; ++pos)
  410. name.push_back((char)*pos);
  411. name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
  412. if (!name.empty())
  413. return string_type(name.begin(), name.end());
  414. if (p2 - p1 == 1)
  415. return string_type(1, *p1);
  416. return string_type();
  417. }
  418. inline int c_regex_traits<wchar_t>::value(wchar_t c, int radix)
  419. {
  420. #ifdef BOOST_BORLANDC
  421. // workaround for broken wcstol:
  422. if ((std::iswxdigit)(c) == 0)
  423. return -1;
  424. #endif
  425. wchar_t b[2] = { c, '\0', };
  426. wchar_t* ep;
  427. int result = std::wcstol(b, &ep, radix);
  428. if (ep == b)
  429. return -1;
  430. return result;
  431. }
  432. #endif
  433. }
  434. #endif