regex_traits_defaults.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE regex_traits_defaults.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares API's for access to regex_traits default properties.
  16. */
  17. #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  18. #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  19. #ifdef BOOST_MSVC
  20. #pragma warning(push)
  21. #pragma warning(disable: 4103)
  22. #endif
  23. #ifdef BOOST_HAS_ABI_HEADERS
  24. # include BOOST_ABI_PREFIX
  25. #endif
  26. #ifdef BOOST_MSVC
  27. #pragma warning(pop)
  28. #endif
  29. #include <boost/regex/config.hpp>
  30. #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP
  31. #include <boost/regex/v4/syntax_type.hpp>
  32. #endif
  33. #ifndef BOOST_REGEX_ERROR_TYPE_HPP
  34. #include <boost/regex/v4/error_type.hpp>
  35. #endif
  36. #include <boost/type_traits/make_unsigned.hpp>
  37. #ifdef BOOST_NO_STDC_NAMESPACE
  38. namespace std{
  39. using ::strlen;
  40. }
  41. #endif
  42. namespace boost{ namespace BOOST_REGEX_DETAIL_NS{
  43. //
  44. // helpers to suppress warnings:
  45. //
  46. template <class charT>
  47. inline bool is_extended(charT c)
  48. {
  49. typedef typename make_unsigned<charT>::type unsigned_type;
  50. return (sizeof(charT) > 1) && (static_cast<unsigned_type>(c) >= 256u);
  51. }
  52. inline bool is_extended(char)
  53. { return false; }
  54. BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n);
  55. BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n);
  56. BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c);
  57. BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c);
  58. // is charT c a combining character?
  59. BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s);
  60. template <class charT>
  61. inline bool is_combining(charT c)
  62. {
  63. return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c)));
  64. }
  65. template <>
  66. inline bool is_combining<char>(char)
  67. {
  68. return false;
  69. }
  70. template <>
  71. inline bool is_combining<signed char>(signed char)
  72. {
  73. return false;
  74. }
  75. template <>
  76. inline bool is_combining<unsigned char>(unsigned char)
  77. {
  78. return false;
  79. }
  80. #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
  81. #ifdef _MSC_VER
  82. template<>
  83. inline bool is_combining<wchar_t>(wchar_t c)
  84. {
  85. return is_combining_implementation(static_cast<unsigned short>(c));
  86. }
  87. #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
  88. #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX)
  89. template<>
  90. inline bool is_combining<wchar_t>(wchar_t c)
  91. {
  92. return is_combining_implementation(static_cast<unsigned short>(c));
  93. }
  94. #else
  95. template<>
  96. inline bool is_combining<wchar_t>(wchar_t c)
  97. {
  98. return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c));
  99. }
  100. #endif
  101. #endif
  102. #endif
  103. //
  104. // is a charT c a line separator?
  105. //
  106. template <class charT>
  107. inline bool is_separator(charT c)
  108. {
  109. return BOOST_REGEX_MAKE_BOOL(
  110. (c == static_cast<charT>('\n'))
  111. || (c == static_cast<charT>('\r'))
  112. || (c == static_cast<charT>('\f'))
  113. || (static_cast<boost::uint16_t>(c) == 0x2028u)
  114. || (static_cast<boost::uint16_t>(c) == 0x2029u)
  115. || (static_cast<boost::uint16_t>(c) == 0x85u));
  116. }
  117. template <>
  118. inline bool is_separator<char>(char c)
  119. {
  120. return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f'));
  121. }
  122. //
  123. // get a default collating element:
  124. //
  125. BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name);
  126. //
  127. // get the state_id of a character clasification, the individual
  128. // traits classes then transform that state_id into a bitmask:
  129. //
  130. template <class charT>
  131. struct character_pointer_range
  132. {
  133. const charT* p1;
  134. const charT* p2;
  135. bool operator < (const character_pointer_range& r)const
  136. {
  137. return std::lexicographical_compare(p1, p2, r.p1, r.p2);
  138. }
  139. bool operator == (const character_pointer_range& r)const
  140. {
  141. // Not only do we check that the ranges are of equal size before
  142. // calling std::equal, but there is no other algorithm available:
  143. // not even a non-standard MS one. So forward to unchecked_equal
  144. // in the MS case.
  145. return ((p2 - p1) == (r.p2 - r.p1)) && BOOST_REGEX_DETAIL_NS::equal(p1, p2, r.p1);
  146. }
  147. };
  148. template <class charT>
  149. int get_default_class_id(const charT* p1, const charT* p2)
  150. {
  151. static const charT data[73] = {
  152. 'a', 'l', 'n', 'u', 'm',
  153. 'a', 'l', 'p', 'h', 'a',
  154. 'b', 'l', 'a', 'n', 'k',
  155. 'c', 'n', 't', 'r', 'l',
  156. 'd', 'i', 'g', 'i', 't',
  157. 'g', 'r', 'a', 'p', 'h',
  158. 'l', 'o', 'w', 'e', 'r',
  159. 'p', 'r', 'i', 'n', 't',
  160. 'p', 'u', 'n', 'c', 't',
  161. 's', 'p', 'a', 'c', 'e',
  162. 'u', 'n', 'i', 'c', 'o', 'd', 'e',
  163. 'u', 'p', 'p', 'e', 'r',
  164. 'v',
  165. 'w', 'o', 'r', 'd',
  166. 'x', 'd', 'i', 'g', 'i', 't',
  167. };
  168. static const character_pointer_range<charT> ranges[21] =
  169. {
  170. {data+0, data+5,}, // alnum
  171. {data+5, data+10,}, // alpha
  172. {data+10, data+15,}, // blank
  173. {data+15, data+20,}, // cntrl
  174. {data+20, data+21,}, // d
  175. {data+20, data+25,}, // digit
  176. {data+25, data+30,}, // graph
  177. {data+29, data+30,}, // h
  178. {data+30, data+31,}, // l
  179. {data+30, data+35,}, // lower
  180. {data+35, data+40,}, // print
  181. {data+40, data+45,}, // punct
  182. {data+45, data+46,}, // s
  183. {data+45, data+50,}, // space
  184. {data+57, data+58,}, // u
  185. {data+50, data+57,}, // unicode
  186. {data+57, data+62,}, // upper
  187. {data+62, data+63,}, // v
  188. {data+63, data+64,}, // w
  189. {data+63, data+67,}, // word
  190. {data+67, data+73,}, // xdigit
  191. };
  192. static const character_pointer_range<charT>* ranges_begin = ranges;
  193. static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
  194. character_pointer_range<charT> t = { p1, p2, };
  195. const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
  196. if((p != ranges_end) && (t == *p))
  197. return static_cast<int>(p - ranges);
  198. return -1;
  199. }
  200. //
  201. // helper functions:
  202. //
  203. template <class charT>
  204. std::ptrdiff_t global_length(const charT* p)
  205. {
  206. std::ptrdiff_t n = 0;
  207. while(*p)
  208. {
  209. ++p;
  210. ++n;
  211. }
  212. return n;
  213. }
  214. template<>
  215. inline std::ptrdiff_t global_length<char>(const char* p)
  216. {
  217. return (std::strlen)(p);
  218. }
  219. #ifndef BOOST_NO_WREGEX
  220. template<>
  221. inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p)
  222. {
  223. return (std::wcslen)(p);
  224. }
  225. #endif
  226. template <class charT>
  227. inline charT BOOST_REGEX_CALL global_lower(charT c)
  228. {
  229. return c;
  230. }
  231. template <class charT>
  232. inline charT BOOST_REGEX_CALL global_upper(charT c)
  233. {
  234. return c;
  235. }
  236. BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c);
  237. BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c);
  238. #ifndef BOOST_NO_WREGEX
  239. BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c);
  240. BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c);
  241. #endif
  242. #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
  243. BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c);
  244. BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c);
  245. #endif
  246. //
  247. // This sucks: declare template specialisations of global_lower/global_upper
  248. // that just forward to the non-template implementation functions. We do
  249. // this because there is one compiler (Compaq Tru64 C++) that doesn't seem
  250. // to differentiate between templates and non-template overloads....
  251. // what's more, the primary template, plus all overloads have to be
  252. // defined in the same translation unit (if one is inline they all must be)
  253. // otherwise the "local template instantiation" compiler option can pick
  254. // the wrong instantiation when linking:
  255. //
  256. template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); }
  257. template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); }
  258. #ifndef BOOST_NO_WREGEX
  259. template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); }
  260. template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); }
  261. #endif
  262. #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
  263. template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); }
  264. template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); }
  265. #endif
  266. template <class charT>
  267. int global_value(charT c)
  268. {
  269. static const charT zero = '0';
  270. static const charT nine = '9';
  271. static const charT a = 'a';
  272. static const charT f = 'f';
  273. static const charT A = 'A';
  274. static const charT F = 'F';
  275. if(c > f) return -1;
  276. if(c >= a) return 10 + (c - a);
  277. if(c > F) return -1;
  278. if(c >= A) return 10 + (c - A);
  279. if(c > nine) return -1;
  280. if(c >= zero) return c - zero;
  281. return -1;
  282. }
  283. template <class charT, class traits>
  284. boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
  285. {
  286. (void)t; // warning suppression
  287. boost::intmax_t limit = (std::numeric_limits<boost::intmax_t>::max)() / radix;
  288. boost::intmax_t next_value = t.value(*p1, radix);
  289. if((p1 == p2) || (next_value < 0) || (next_value >= radix))
  290. return -1;
  291. boost::intmax_t result = 0;
  292. while(p1 != p2)
  293. {
  294. next_value = t.value(*p1, radix);
  295. if((next_value < 0) || (next_value >= radix))
  296. break;
  297. result *= radix;
  298. result += next_value;
  299. ++p1;
  300. if (result > limit)
  301. return -1;
  302. }
  303. return result;
  304. }
  305. template <class charT>
  306. inline const charT* get_escape_R_string()
  307. {
  308. #ifdef BOOST_MSVC
  309. # pragma warning(push)
  310. # pragma warning(disable:4309 4245)
  311. #endif
  312. static const charT e1[] = { '(', '?', '>', '\\', 'x', '0', 'D', '\\', 'x', '0', 'A', '?',
  313. '|', '[', '\\', 'x', '0', 'A', '\\', 'x', '0', 'B', '\\', 'x', '0', 'C', static_cast<unsigned char>('\x85'), '\\', 'x', '{', '2', '0', '2', '8', '}',
  314. '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
  315. static const charT e2[] = { '(', '?', '>', '\\', 'x', '0', 'D', '\\', 'x', '0', 'A', '?',
  316. '|', '[', '\\', 'x', '0', 'A', '\\', 'x', '0', 'B', '\\', 'x', '0', 'C', static_cast<unsigned char>('\x85'), ']', ')', '\0' };
  317. charT c = static_cast<charT>(0x2029u);
  318. bool b = (static_cast<unsigned>(c) == 0x2029u);
  319. return (b ? e1 : e2);
  320. #ifdef BOOST_MSVC
  321. # pragma warning(pop)
  322. #endif
  323. }
  324. template <>
  325. inline const char* get_escape_R_string<char>()
  326. {
  327. #ifdef BOOST_MSVC
  328. # pragma warning(push)
  329. # pragma warning(disable:4309)
  330. #endif
  331. static const char e2[] = { '(', '?', '>', '\\', 'x', '0', 'D', '\\', 'x', '0', 'A', '?',
  332. '|', '[', '\\', 'x', '0', 'A', '\\', 'x', '0', 'B', '\\', 'x', '0', 'C', '\\', 'x', '8', '5', ']', ')', '\0' };
  333. return e2;
  334. #ifdef BOOST_MSVC
  335. # pragma warning(pop)
  336. #endif
  337. }
  338. } // BOOST_REGEX_DETAIL_NS
  339. } // boost
  340. #ifdef BOOST_MSVC
  341. #pragma warning(push)
  342. #pragma warning(disable: 4103)
  343. #endif
  344. #ifdef BOOST_HAS_ABI_HEADERS
  345. # include BOOST_ABI_SUFFIX
  346. #endif
  347. #ifdef BOOST_MSVC
  348. #pragma warning(pop)
  349. #endif
  350. #endif