cpp_regex_traits.hpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044
  1. /*
  2. *
  3. * Copyright (c) 2004 John Maddock
  4. * Copyright 2011 Garmin Ltd. or its subsidiaries
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE cpp_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class cpp_regex_traits.
  16. */
  17. #ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  19. #include <boost/regex/config.hpp>
  20. #ifndef BOOST_REGEX_AS_MODULE
  21. #include <cstdint>
  22. #include <locale>
  23. #include <type_traits>
  24. #include <climits>
  25. #include <ios>
  26. #include <istream>
  27. #ifdef BOOST_HAS_THREADS
  28. #include <mutex>
  29. #endif
  30. #endif
  31. #include <boost/regex/pattern_except.hpp>
  32. #include <boost/regex/v5/regex_traits_defaults.hpp>
  33. #include <boost/regex/v5/primary_transform.hpp>
  34. #include <boost/regex/v5/object_cache.hpp>
  35. #ifdef BOOST_REGEX_MSVC
  36. #pragma warning(push)
  37. #pragma warning(disable:4786 4251)
  38. #endif
  39. namespace boost{
  40. //
  41. // forward declaration is needed by some compilers:
  42. //
  43. BOOST_REGEX_MODULE_EXPORT template <class charT>
  44. class cpp_regex_traits;
  45. namespace BOOST_REGEX_DETAIL_NS{
  46. //
  47. // class parser_buf:
  48. // acts as a stream buffer which wraps around a pair of pointers:
  49. //
  50. template <class charT,
  51. class traits = ::std::char_traits<charT> >
  52. class parser_buf : public ::std::basic_streambuf<charT, traits>
  53. {
  54. typedef ::std::basic_streambuf<charT, traits> base_type;
  55. typedef typename base_type::int_type int_type;
  56. typedef typename base_type::char_type char_type;
  57. typedef typename base_type::pos_type pos_type;
  58. typedef ::std::streamsize streamsize;
  59. typedef typename base_type::off_type off_type;
  60. public:
  61. parser_buf() : base_type() { setbuf(0, 0); }
  62. const charT* getnext() { return this->gptr(); }
  63. protected:
  64. std::basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n) override;
  65. typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which) override;
  66. typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) override;
  67. private:
  68. parser_buf& operator=(const parser_buf&);
  69. parser_buf(const parser_buf&);
  70. };
  71. template<class charT, class traits>
  72. std::basic_streambuf<charT, traits>*
  73. parser_buf<charT, traits>::setbuf(char_type* s, streamsize n)
  74. {
  75. this->setg(s, s, s + n);
  76. return this;
  77. }
  78. template<class charT, class traits>
  79. typename parser_buf<charT, traits>::pos_type
  80. parser_buf<charT, traits>::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which)
  81. {
  82. if(which & ::std::ios_base::out)
  83. return pos_type(off_type(-1));
  84. std::ptrdiff_t size = this->egptr() - this->eback();
  85. std::ptrdiff_t pos = this->gptr() - this->eback();
  86. charT* g = this->eback();
  87. switch(static_cast<std::intmax_t>(way))
  88. {
  89. case ::std::ios_base::beg:
  90. if((off < 0) || (off > size))
  91. return pos_type(off_type(-1));
  92. else
  93. this->setg(g, g + off, g + size);
  94. break;
  95. case ::std::ios_base::end:
  96. if((off < 0) || (off > size))
  97. return pos_type(off_type(-1));
  98. else
  99. this->setg(g, g + size - off, g + size);
  100. break;
  101. case ::std::ios_base::cur:
  102. {
  103. std::ptrdiff_t newpos = static_cast<std::ptrdiff_t>(pos + off);
  104. if((newpos < 0) || (newpos > size))
  105. return pos_type(off_type(-1));
  106. else
  107. this->setg(g, g + newpos, g + size);
  108. break;
  109. }
  110. default: ;
  111. }
  112. #ifdef BOOST_REGEX_MSVC
  113. #pragma warning(push)
  114. #pragma warning(disable:4244)
  115. #endif
  116. return static_cast<pos_type>(this->gptr() - this->eback());
  117. #ifdef BOOST_REGEX_MSVC
  118. #pragma warning(pop)
  119. #endif
  120. }
  121. template<class charT, class traits>
  122. typename parser_buf<charT, traits>::pos_type
  123. parser_buf<charT, traits>::seekpos(pos_type sp, ::std::ios_base::openmode which)
  124. {
  125. if(which & ::std::ios_base::out)
  126. return pos_type(off_type(-1));
  127. off_type size = static_cast<off_type>(this->egptr() - this->eback());
  128. charT* g = this->eback();
  129. if(off_type(sp) <= size)
  130. {
  131. this->setg(g, g + off_type(sp), g + size);
  132. }
  133. return pos_type(off_type(-1));
  134. }
  135. //
  136. // class cpp_regex_traits_base:
  137. // acts as a container for locale and the facets we are using.
  138. //
  139. template <class charT>
  140. struct cpp_regex_traits_base
  141. {
  142. cpp_regex_traits_base(const std::locale& l)
  143. { (void)imbue(l); }
  144. std::locale imbue(const std::locale& l);
  145. std::locale m_locale;
  146. std::ctype<charT> const* m_pctype;
  147. std::messages<charT> const* m_pmessages;
  148. std::collate<charT> const* m_pcollate;
  149. bool operator<(const cpp_regex_traits_base& b)const
  150. {
  151. if(m_pctype == b.m_pctype)
  152. {
  153. if(m_pmessages == b.m_pmessages)
  154. {
  155. return m_pcollate < b.m_pcollate;
  156. }
  157. return m_pmessages < b.m_pmessages;
  158. }
  159. return m_pctype < b.m_pctype;
  160. }
  161. bool operator==(const cpp_regex_traits_base& b)const
  162. {
  163. return (m_pctype == b.m_pctype)
  164. && (m_pmessages == b.m_pmessages)
  165. && (m_pcollate == b.m_pcollate);
  166. }
  167. };
  168. template <class charT>
  169. std::locale cpp_regex_traits_base<charT>::imbue(const std::locale& l)
  170. {
  171. std::locale result(m_locale);
  172. m_locale = l;
  173. m_pctype = &std::use_facet<std::ctype<charT>>(l);
  174. m_pmessages = std::has_facet<std::messages<charT> >(l) ? &std::use_facet<std::messages<charT> >(l) : 0;
  175. m_pcollate = &std::use_facet<std::collate<charT> >(l);
  176. return result;
  177. }
  178. //
  179. // class cpp_regex_traits_char_layer:
  180. // implements methods that require specialization for narrow characters:
  181. //
  182. template <class charT>
  183. class cpp_regex_traits_char_layer : public cpp_regex_traits_base<charT>
  184. {
  185. typedef std::basic_string<charT> string_type;
  186. typedef std::map<charT, regex_constants::syntax_type> map_type;
  187. typedef typename map_type::const_iterator map_iterator_type;
  188. public:
  189. cpp_regex_traits_char_layer(const std::locale& l)
  190. : cpp_regex_traits_base<charT>(l)
  191. {
  192. init();
  193. }
  194. cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT>& b)
  195. : cpp_regex_traits_base<charT>(b)
  196. {
  197. init();
  198. }
  199. void init();
  200. regex_constants::syntax_type syntax_type(charT c)const
  201. {
  202. map_iterator_type i = m_char_map.find(c);
  203. return ((i == m_char_map.end()) ? 0 : i->second);
  204. }
  205. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  206. {
  207. map_iterator_type i = m_char_map.find(c);
  208. if(i == m_char_map.end())
  209. {
  210. if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class;
  211. if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class;
  212. return 0;
  213. }
  214. return i->second;
  215. }
  216. private:
  217. string_type get_default_message(regex_constants::syntax_type);
  218. // TODO: use a hash table when available!
  219. map_type m_char_map;
  220. };
  221. template <class charT>
  222. void cpp_regex_traits_char_layer<charT>::init()
  223. {
  224. // we need to start by initialising our syntax map so we know which
  225. // character is used for which purpose:
  226. #ifndef __IBMCPP__
  227. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  228. #else
  229. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  230. #endif
  231. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  232. if((!cat_name.empty()) && (this->m_pmessages != 0))
  233. {
  234. cat = this->m_pmessages->open(
  235. cat_name,
  236. this->m_locale);
  237. if((int)cat < 0)
  238. {
  239. std::string m("Unable to open message catalog: ");
  240. std::runtime_error err(m + cat_name);
  241. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  242. }
  243. }
  244. //
  245. // if we have a valid catalog then load our messages:
  246. //
  247. if((int)cat >= 0)
  248. {
  249. #ifndef BOOST_NO_EXCEPTIONS
  250. try{
  251. #endif
  252. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  253. {
  254. string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i));
  255. for(typename string_type::size_type j = 0; j < mss.size(); ++j)
  256. {
  257. m_char_map[mss[j]] = i;
  258. }
  259. }
  260. this->m_pmessages->close(cat);
  261. #ifndef BOOST_NO_EXCEPTIONS
  262. }
  263. catch(...)
  264. {
  265. if(this->m_pmessages)
  266. this->m_pmessages->close(cat);
  267. throw;
  268. }
  269. #endif
  270. }
  271. else
  272. {
  273. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  274. {
  275. const char* ptr = get_default_syntax(i);
  276. while(ptr && *ptr)
  277. {
  278. m_char_map[this->m_pctype->widen(*ptr)] = i;
  279. ++ptr;
  280. }
  281. }
  282. }
  283. }
  284. template <class charT>
  285. typename cpp_regex_traits_char_layer<charT>::string_type
  286. cpp_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i)
  287. {
  288. const char* ptr = get_default_syntax(i);
  289. string_type result;
  290. while(ptr && *ptr)
  291. {
  292. result.append(1, this->m_pctype->widen(*ptr));
  293. ++ptr;
  294. }
  295. return result;
  296. }
  297. //
  298. // specialized version for narrow characters:
  299. //
  300. template <>
  301. class cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char>
  302. {
  303. typedef std::string string_type;
  304. public:
  305. cpp_regex_traits_char_layer(const std::locale& l)
  306. : cpp_regex_traits_base<char>(l)
  307. {
  308. init();
  309. }
  310. cpp_regex_traits_char_layer(const cpp_regex_traits_base<char>& l)
  311. : cpp_regex_traits_base<char>(l)
  312. {
  313. init();
  314. }
  315. regex_constants::syntax_type syntax_type(char c)const
  316. {
  317. return m_char_map[static_cast<unsigned char>(c)];
  318. }
  319. regex_constants::escape_syntax_type escape_syntax_type(char c) const
  320. {
  321. return m_char_map[static_cast<unsigned char>(c)];
  322. }
  323. private:
  324. regex_constants::syntax_type m_char_map[1u << CHAR_BIT];
  325. void init();
  326. };
  327. //
  328. // class cpp_regex_traits_implementation:
  329. // provides pimpl implementation for cpp_regex_traits.
  330. //
  331. template <class charT>
  332. class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
  333. {
  334. public:
  335. typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
  336. typedef typename std::ctype<charT>::mask native_mask_type;
  337. typedef typename std::make_unsigned<native_mask_type>::type unsigned_native_mask_type;
  338. static const char_class_type mask_blank = 1u << 24;
  339. static const char_class_type mask_word = 1u << 25;
  340. static const char_class_type mask_unicode = 1u << 26;
  341. static const char_class_type mask_horizontal = 1u << 27;
  342. static const char_class_type mask_vertical = 1u << 28;
  343. typedef std::basic_string<charT> string_type;
  344. typedef charT char_type;
  345. //cpp_regex_traits_implementation();
  346. cpp_regex_traits_implementation(const std::locale& l)
  347. : cpp_regex_traits_char_layer<charT>(l)
  348. {
  349. init();
  350. }
  351. cpp_regex_traits_implementation(const cpp_regex_traits_base<charT>& l)
  352. : cpp_regex_traits_char_layer<charT>(l)
  353. {
  354. init();
  355. }
  356. std::string error_string(regex_constants::error_type n) const
  357. {
  358. if(!m_error_strings.empty())
  359. {
  360. std::map<int, std::string>::const_iterator p = m_error_strings.find(n);
  361. return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second;
  362. }
  363. return get_default_error_string(n);
  364. }
  365. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  366. {
  367. char_class_type result = lookup_classname_imp(p1, p2);
  368. if(result == 0)
  369. {
  370. string_type temp(p1, p2);
  371. this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size());
  372. result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size());
  373. }
  374. return result;
  375. }
  376. string_type lookup_collatename(const charT* p1, const charT* p2) const;
  377. string_type transform_primary(const charT* p1, const charT* p2) const;
  378. string_type transform(const charT* p1, const charT* p2) const;
  379. private:
  380. std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
  381. std::map<string_type, char_class_type> m_custom_class_names; // character class names
  382. std::map<string_type, string_type> m_custom_collate_names; // collating element names
  383. unsigned m_collate_type; // the form of the collation string
  384. charT m_collate_delim; // the collation group delimiter
  385. //
  386. // helpers:
  387. //
  388. char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
  389. void init();
  390. };
  391. template <class charT>
  392. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_blank;
  393. template <class charT>
  394. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word;
  395. template <class charT>
  396. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode;
  397. template <class charT>
  398. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical;
  399. template <class charT>
  400. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal;
  401. template <class charT>
  402. typename cpp_regex_traits_implementation<charT>::string_type
  403. cpp_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const
  404. {
  405. //
  406. // PRECONDITIONS:
  407. //
  408. // A bug in gcc 3.2 (and maybe other versions as well) treats
  409. // p1 as a null terminated string, for efficiency reasons
  410. // we work around this elsewhere, but just assert here that
  411. // we adhere to gcc's (buggy) preconditions...
  412. //
  413. BOOST_REGEX_ASSERT(*p2 == 0);
  414. string_type result;
  415. #if defined(_CPPLIB_VER)
  416. //
  417. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  418. // to std::collate::transform, but only for certain locales :-(
  419. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  420. //
  421. if(*p1 == 0)
  422. {
  423. return string_type(1, charT(0));
  424. }
  425. #endif
  426. //
  427. // swallowing all exceptions here is a bad idea
  428. // however at least one std lib will always throw
  429. // std::bad_alloc for certain arguments...
  430. //
  431. #ifndef BOOST_NO_EXCEPTIONS
  432. try{
  433. #endif
  434. //
  435. // What we do here depends upon the format of the sort key returned by
  436. // sort key returned by this->transform:
  437. //
  438. switch(m_collate_type)
  439. {
  440. case sort_C:
  441. case sort_unknown:
  442. // the best we can do is translate to lower case, then get a regular sort key:
  443. {
  444. result.assign(p1, p2);
  445. this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size());
  446. result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size());
  447. break;
  448. }
  449. case sort_fixed:
  450. {
  451. // get a regular sort key, and then truncate it:
  452. result.assign(this->m_pcollate->transform(p1, p2));
  453. result.erase(this->m_collate_delim);
  454. break;
  455. }
  456. case sort_delim:
  457. // get a regular sort key, and then truncate everything after the delim:
  458. result.assign(this->m_pcollate->transform(p1, p2));
  459. std::size_t i;
  460. for(i = 0; i < result.size(); ++i)
  461. {
  462. if(result[i] == m_collate_delim)
  463. break;
  464. }
  465. result.erase(i);
  466. break;
  467. }
  468. #ifndef BOOST_NO_EXCEPTIONS
  469. }catch(...){}
  470. #endif
  471. while((!result.empty()) && (charT(0) == *result.rbegin()))
  472. result.erase(result.size() - 1);
  473. if(result.empty())
  474. {
  475. // character is ignorable at the primary level:
  476. result = string_type(1, charT(0));
  477. }
  478. return result;
  479. }
  480. template <class charT>
  481. typename cpp_regex_traits_implementation<charT>::string_type
  482. cpp_regex_traits_implementation<charT>::transform(const charT* p1, const charT* p2) const
  483. {
  484. //
  485. // PRECONDITIONS:
  486. //
  487. // A bug in gcc 3.2 (and maybe other versions as well) treats
  488. // p1 as a null terminated string, for efficiency reasons
  489. // we work around this elsewhere, but just assert here that
  490. // we adhere to gcc's (buggy) preconditions...
  491. //
  492. BOOST_REGEX_ASSERT(*p2 == 0);
  493. //
  494. // swallowing all exceptions here is a bad idea
  495. // however at least one std lib will always throw
  496. // std::bad_alloc for certain arguments...
  497. //
  498. string_type result, result2;
  499. #if defined(_CPPLIB_VER)
  500. //
  501. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  502. // to std::collate::transform, but only for certain locales :-(
  503. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  504. //
  505. if(*p1 == 0)
  506. {
  507. return result;
  508. }
  509. #endif
  510. #ifndef BOOST_NO_EXCEPTIONS
  511. try{
  512. #endif
  513. result = this->m_pcollate->transform(p1, p2);
  514. //
  515. // some implementations (Dinkumware) append unnecessary trailing \0's:
  516. while((!result.empty()) && (charT(0) == *result.rbegin()))
  517. result.erase(result.size() - 1);
  518. //
  519. // We may have NULL's used as separators between sections of the collate string,
  520. // an example would be Boost.Locale. We have no way to detect this case via
  521. // #defines since this can be used with any compiler/platform combination.
  522. // Unfortunately our state machine (which was devised when all implementations
  523. // used underlying C language API's) can't cope with that case. One workaround
  524. // is to replace each character with 2, fortunately this code isn't used that
  525. // much as this is now slower than before :-(
  526. //
  527. typedef typename std::make_unsigned<charT>::type uchar_type;
  528. result2.reserve(result.size() * 2 + 2);
  529. for(unsigned i = 0; i < result.size(); ++i)
  530. {
  531. if(static_cast<uchar_type>(result[i]) == (std::numeric_limits<uchar_type>::max)())
  532. {
  533. result2.append(1, charT((std::numeric_limits<uchar_type>::max)())).append(1, charT('b'));
  534. }
  535. else
  536. {
  537. result2.append(1, static_cast<charT>(1 + static_cast<uchar_type>(result[i]))).append(1, charT('b') - 1);
  538. }
  539. }
  540. BOOST_REGEX_ASSERT(std::find(result2.begin(), result2.end(), charT(0)) == result2.end());
  541. #ifndef BOOST_NO_EXCEPTIONS
  542. }
  543. catch(...)
  544. {
  545. }
  546. #endif
  547. return result2;
  548. }
  549. template <class charT>
  550. typename cpp_regex_traits_implementation<charT>::string_type
  551. cpp_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const
  552. {
  553. typedef typename std::map<string_type, string_type>::const_iterator iter_type;
  554. if(!m_custom_collate_names.empty())
  555. {
  556. iter_type pos = m_custom_collate_names.find(string_type(p1, p2));
  557. if(pos != m_custom_collate_names.end())
  558. return pos->second;
  559. }
  560. std::string name(p1, p2);
  561. name = lookup_default_collate_name(name);
  562. if(!name.empty())
  563. return string_type(name.begin(), name.end());
  564. if(p2 - p1 == 1)
  565. return string_type(1, *p1);
  566. return string_type();
  567. }
  568. template <class charT>
  569. void cpp_regex_traits_implementation<charT>::init()
  570. {
  571. #ifndef __IBMCPP__
  572. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  573. #else
  574. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  575. #endif
  576. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  577. if((!cat_name.empty()) && (this->m_pmessages != 0))
  578. {
  579. cat = this->m_pmessages->open(
  580. cat_name,
  581. this->m_locale);
  582. if((int)cat < 0)
  583. {
  584. std::string m("Unable to open message catalog: ");
  585. std::runtime_error err(m + cat_name);
  586. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  587. }
  588. }
  589. //
  590. // if we have a valid catalog then load our messages:
  591. //
  592. if((int)cat >= 0)
  593. {
  594. //
  595. // Error messages:
  596. //
  597. for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0);
  598. i <= boost::regex_constants::error_unknown;
  599. i = static_cast<boost::regex_constants::error_type>(i + 1))
  600. {
  601. const char* p = get_default_error_string(i);
  602. string_type default_message;
  603. while(*p)
  604. {
  605. default_message.append(1, this->m_pctype->widen(*p));
  606. ++p;
  607. }
  608. string_type s = this->m_pmessages->get(cat, 0, i+200, default_message);
  609. std::string result;
  610. for(std::string::size_type j = 0; j < s.size(); ++j)
  611. {
  612. result.append(1, this->m_pctype->narrow(s[j], 0));
  613. }
  614. m_error_strings[i] = result;
  615. }
  616. //
  617. // Custom class names:
  618. //
  619. static const char_class_type masks[16] =
  620. {
  621. static_cast<unsigned_native_mask_type>(std::ctype<charT>::alnum),
  622. static_cast<unsigned_native_mask_type>(std::ctype<charT>::alpha),
  623. static_cast<unsigned_native_mask_type>(std::ctype<charT>::cntrl),
  624. static_cast<unsigned_native_mask_type>(std::ctype<charT>::digit),
  625. static_cast<unsigned_native_mask_type>(std::ctype<charT>::graph),
  626. cpp_regex_traits_implementation<charT>::mask_horizontal,
  627. static_cast<unsigned_native_mask_type>(std::ctype<charT>::lower),
  628. static_cast<unsigned_native_mask_type>(std::ctype<charT>::print),
  629. static_cast<unsigned_native_mask_type>(std::ctype<charT>::punct),
  630. static_cast<unsigned_native_mask_type>(std::ctype<charT>::space),
  631. static_cast<unsigned_native_mask_type>(std::ctype<charT>::upper),
  632. cpp_regex_traits_implementation<charT>::mask_vertical,
  633. static_cast<unsigned_native_mask_type>(std::ctype<charT>::xdigit),
  634. cpp_regex_traits_implementation<charT>::mask_blank,
  635. cpp_regex_traits_implementation<charT>::mask_word,
  636. cpp_regex_traits_implementation<charT>::mask_unicode,
  637. };
  638. static const string_type null_string;
  639. for(unsigned int j = 0; j <= 13; ++j)
  640. {
  641. string_type s(this->m_pmessages->get(cat, 0, j+300, null_string));
  642. if(!s.empty())
  643. this->m_custom_class_names[s] = masks[j];
  644. }
  645. }
  646. //
  647. // get the collation format used by m_pcollate:
  648. //
  649. m_collate_type = BOOST_REGEX_DETAIL_NS::find_sort_syntax(this, &m_collate_delim);
  650. }
  651. template <class charT>
  652. typename cpp_regex_traits_implementation<charT>::char_class_type
  653. cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
  654. {
  655. static const char_class_type masks[22] =
  656. {
  657. 0,
  658. static_cast<unsigned_native_mask_type>(std::ctype<char>::alnum),
  659. static_cast<unsigned_native_mask_type>(std::ctype<char>::alpha),
  660. cpp_regex_traits_implementation<charT>::mask_blank,
  661. static_cast<unsigned_native_mask_type>(std::ctype<char>::cntrl),
  662. static_cast<unsigned_native_mask_type>(std::ctype<char>::digit),
  663. static_cast<unsigned_native_mask_type>(std::ctype<char>::digit),
  664. static_cast<unsigned_native_mask_type>(std::ctype<char>::graph),
  665. cpp_regex_traits_implementation<charT>::mask_horizontal,
  666. static_cast<unsigned_native_mask_type>(std::ctype<char>::lower),
  667. static_cast<unsigned_native_mask_type>(std::ctype<char>::lower),
  668. static_cast<unsigned_native_mask_type>(std::ctype<char>::print),
  669. static_cast<unsigned_native_mask_type>(std::ctype<char>::punct),
  670. static_cast<unsigned_native_mask_type>(std::ctype<char>::space),
  671. static_cast<unsigned_native_mask_type>(std::ctype<char>::space),
  672. static_cast<unsigned_native_mask_type>(std::ctype<char>::upper),
  673. cpp_regex_traits_implementation<charT>::mask_unicode,
  674. static_cast<unsigned_native_mask_type>(std::ctype<char>::upper),
  675. cpp_regex_traits_implementation<charT>::mask_vertical,
  676. static_cast<unsigned_native_mask_type>(std::ctype<char>::alnum) | cpp_regex_traits_implementation<charT>::mask_word,
  677. static_cast<unsigned_native_mask_type>(std::ctype<char>::alnum) | cpp_regex_traits_implementation<charT>::mask_word,
  678. static_cast<unsigned_native_mask_type>(std::ctype<char>::xdigit),
  679. };
  680. if(!m_custom_class_names.empty())
  681. {
  682. typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter;
  683. map_iter pos = m_custom_class_names.find(string_type(p1, p2));
  684. if(pos != m_custom_class_names.end())
  685. return pos->second;
  686. }
  687. std::size_t state_id = 1 + BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  688. BOOST_REGEX_ASSERT(state_id < sizeof(masks) / sizeof(masks[0]));
  689. return masks[state_id];
  690. }
  691. template <class charT>
  692. inline std::shared_ptr<const cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l)
  693. {
  694. cpp_regex_traits_base<charT> key(l);
  695. return ::boost::object_cache<cpp_regex_traits_base<charT>, cpp_regex_traits_implementation<charT> >::get(key, 5);
  696. }
  697. } // BOOST_REGEX_DETAIL_NS
  698. BOOST_REGEX_MODULE_EXPORT template <class charT>
  699. class cpp_regex_traits
  700. {
  701. private:
  702. typedef std::ctype<charT> ctype_type;
  703. public:
  704. typedef charT char_type;
  705. typedef std::size_t size_type;
  706. typedef std::basic_string<char_type> string_type;
  707. typedef std::locale locale_type;
  708. typedef std::uint_least32_t char_class_type;
  709. struct boost_extensions_tag{};
  710. cpp_regex_traits()
  711. : m_pimpl(BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits<charT>(std::locale()))
  712. { }
  713. static size_type length(const char_type* p)
  714. {
  715. return std::char_traits<charT>::length(p);
  716. }
  717. regex_constants::syntax_type syntax_type(charT c)const
  718. {
  719. return m_pimpl->syntax_type(c);
  720. }
  721. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  722. {
  723. return m_pimpl->escape_syntax_type(c);
  724. }
  725. charT translate(charT c) const
  726. {
  727. return c;
  728. }
  729. charT translate_nocase(charT c) const
  730. {
  731. return m_pimpl->m_pctype->tolower(c);
  732. }
  733. charT translate(charT c, bool icase) const
  734. {
  735. return icase ? m_pimpl->m_pctype->tolower(c) : c;
  736. }
  737. charT tolower(charT c) const
  738. {
  739. return m_pimpl->m_pctype->tolower(c);
  740. }
  741. charT toupper(charT c) const
  742. {
  743. return m_pimpl->m_pctype->toupper(c);
  744. }
  745. string_type transform(const charT* p1, const charT* p2) const
  746. {
  747. return m_pimpl->transform(p1, p2);
  748. }
  749. string_type transform_primary(const charT* p1, const charT* p2) const
  750. {
  751. return m_pimpl->transform_primary(p1, p2);
  752. }
  753. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  754. {
  755. return m_pimpl->lookup_classname(p1, p2);
  756. }
  757. string_type lookup_collatename(const charT* p1, const charT* p2) const
  758. {
  759. return m_pimpl->lookup_collatename(p1, p2);
  760. }
  761. bool isctype(charT c, char_class_type f) const
  762. {
  763. typedef typename std::ctype<charT>::mask ctype_mask;
  764. static const ctype_mask mask_base =
  765. static_cast<ctype_mask>(
  766. std::ctype<charT>::alnum
  767. | std::ctype<charT>::alpha
  768. | std::ctype<charT>::cntrl
  769. | std::ctype<charT>::digit
  770. | std::ctype<charT>::graph
  771. | std::ctype<charT>::lower
  772. | std::ctype<charT>::print
  773. | std::ctype<charT>::punct
  774. | std::ctype<charT>::space
  775. | std::ctype<charT>::upper
  776. | std::ctype<charT>::xdigit);
  777. if((f & mask_base)
  778. && (m_pimpl->m_pctype->is(
  779. static_cast<ctype_mask>(f & mask_base), c)))
  780. return true;
  781. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_unicode) && BOOST_REGEX_DETAIL_NS::is_extended(c))
  782. return true;
  783. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
  784. return true;
  785. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_blank)
  786. && m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
  787. && !BOOST_REGEX_DETAIL_NS::is_separator(c))
  788. return true;
  789. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_vertical)
  790. && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
  791. return true;
  792. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_horizontal)
  793. && this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_vertical))
  794. return true;
  795. #ifdef __CYGWIN__
  796. //
  797. // Cygwin has a buggy ctype facet, see https://www.cygwin.com/ml/cygwin/2012-08/msg00178.html:
  798. //
  799. else if((f & std::ctype<charT>::xdigit) == std::ctype<charT>::xdigit)
  800. {
  801. if((c >= 'a') && (c <= 'f'))
  802. return true;
  803. if((c >= 'A') && (c <= 'F'))
  804. return true;
  805. }
  806. #endif
  807. return false;
  808. }
  809. std::intmax_t toi(const charT*& p1, const charT* p2, int radix)const;
  810. int value(charT c, int radix)const
  811. {
  812. const charT* pc = &c;
  813. return (int)toi(pc, pc + 1, radix);
  814. }
  815. locale_type imbue(locale_type l)
  816. {
  817. std::locale result(getloc());
  818. m_pimpl = BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits<charT>(l);
  819. return result;
  820. }
  821. locale_type getloc()const
  822. {
  823. return m_pimpl->m_locale;
  824. }
  825. std::string error_string(regex_constants::error_type n) const
  826. {
  827. return m_pimpl->error_string(n);
  828. }
  829. //
  830. // extension:
  831. // set the name of the message catalog in use (defaults to "boost_regex").
  832. //
  833. static std::string catalog_name(const std::string& name);
  834. static std::string get_catalog_name();
  835. private:
  836. std::shared_ptr<const BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT> > m_pimpl;
  837. //
  838. // catalog name handler:
  839. //
  840. static std::string& get_catalog_name_inst();
  841. #ifdef BOOST_HAS_THREADS
  842. static std::mutex& get_mutex_inst();
  843. #endif
  844. };
  845. template <class charT>
  846. std::intmax_t cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
  847. {
  848. BOOST_REGEX_DETAIL_NS::parser_buf<charT> sbuf; // buffer for parsing numbers.
  849. std::basic_istream<charT> is(&sbuf); // stream for parsing numbers.
  850. // we do NOT want to parse any thousands separators inside the stream:
  851. last = std::find(first, last, std::use_facet<std::numpunct<charT>>(is.getloc()).thousands_sep());
  852. sbuf.pubsetbuf(const_cast<charT*>(static_cast<const charT*>(first)), static_cast<std::streamsize>(last-first));
  853. is.clear();
  854. if(std::abs(radix) == 16) is >> std::hex;
  855. else if(std::abs(radix) == 8) is >> std::oct;
  856. else is >> std::dec;
  857. std::intmax_t val;
  858. if(is >> val)
  859. {
  860. first = first + ((last - first) - sbuf.in_avail());
  861. return val;
  862. }
  863. else
  864. return -1;
  865. }
  866. template <class charT>
  867. std::string cpp_regex_traits<charT>::catalog_name(const std::string& name)
  868. {
  869. #ifdef BOOST_HAS_THREADS
  870. std::lock_guard<std::mutex> lk(get_mutex_inst());
  871. #endif
  872. std::string result(get_catalog_name_inst());
  873. get_catalog_name_inst() = name;
  874. return result;
  875. }
  876. template <class charT>
  877. std::string& cpp_regex_traits<charT>::get_catalog_name_inst()
  878. {
  879. static std::string s_name;
  880. return s_name;
  881. }
  882. template <class charT>
  883. std::string cpp_regex_traits<charT>::get_catalog_name()
  884. {
  885. #ifdef BOOST_HAS_THREADS
  886. std::lock_guard<std::mutex> lk(get_mutex_inst());
  887. #endif
  888. std::string result(get_catalog_name_inst());
  889. return result;
  890. }
  891. #ifdef BOOST_HAS_THREADS
  892. template <class charT>
  893. std::mutex& cpp_regex_traits<charT>::get_mutex_inst()
  894. {
  895. static std::mutex s_mutex;
  896. return s_mutex;
  897. }
  898. #endif
  899. namespace BOOST_REGEX_DETAIL_NS {
  900. inline void cpp_regex_traits_char_layer<char>::init()
  901. {
  902. // we need to start by initialising our syntax map so we know which
  903. // character is used for which purpose:
  904. std::memset(m_char_map, 0, sizeof(m_char_map));
  905. #ifndef __IBMCPP__
  906. std::messages<char>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  907. #else
  908. std::messages<char>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  909. #endif
  910. std::string cat_name(cpp_regex_traits<char>::get_catalog_name());
  911. if ((!cat_name.empty()) && (m_pmessages != 0))
  912. {
  913. cat = this->m_pmessages->open(
  914. cat_name,
  915. this->m_locale);
  916. if ((int)cat < 0)
  917. {
  918. std::string m("Unable to open message catalog: ");
  919. std::runtime_error err(m + cat_name);
  920. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  921. }
  922. }
  923. //
  924. // if we have a valid catalog then load our messages:
  925. //
  926. if ((int)cat >= 0)
  927. {
  928. #ifndef BOOST_NO_EXCEPTIONS
  929. try {
  930. #endif
  931. for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  932. {
  933. string_type mss = this->m_pmessages->get(cat, 0, i, get_default_syntax(i));
  934. for (string_type::size_type j = 0; j < mss.size(); ++j)
  935. {
  936. m_char_map[static_cast<unsigned char>(mss[j])] = i;
  937. }
  938. }
  939. this->m_pmessages->close(cat);
  940. #ifndef BOOST_NO_EXCEPTIONS
  941. }
  942. catch (...)
  943. {
  944. this->m_pmessages->close(cat);
  945. throw;
  946. }
  947. #endif
  948. }
  949. else
  950. {
  951. for (regex_constants::syntax_type j = 1; j < regex_constants::syntax_max; ++j)
  952. {
  953. const char* ptr = get_default_syntax(j);
  954. while (ptr && *ptr)
  955. {
  956. m_char_map[static_cast<unsigned char>(*ptr)] = j;
  957. ++ptr;
  958. }
  959. }
  960. }
  961. //
  962. // finish off by calculating our escape types:
  963. //
  964. unsigned char i = 'A';
  965. do
  966. {
  967. if (m_char_map[i] == 0)
  968. {
  969. if (this->m_pctype->is(std::ctype_base::lower, i))
  970. m_char_map[i] = regex_constants::escape_type_class;
  971. else if (this->m_pctype->is(std::ctype_base::upper, i))
  972. m_char_map[i] = regex_constants::escape_type_not_class;
  973. }
  974. } while (0xFF != i++);
  975. }
  976. } // namespace detail
  977. } // boost
  978. #ifdef BOOST_REGEX_MSVC
  979. #pragma warning(pop)
  980. #endif
  981. #endif