perl_matcher_common.hpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * common to both the recursive and non-recursive versions.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. #ifdef __BORLANDC__
  31. # pragma option push -w-8008 -w-8066
  32. #endif
  33. #ifdef BOOST_MSVC
  34. # pragma warning(push)
  35. # pragma warning(disable: 4800)
  36. #endif
  37. namespace boost{
  38. namespace BOOST_REGEX_DETAIL_NS{
  39. template <class BidiIterator, class Allocator, class traits>
  40. void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
  41. {
  42. typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
  43. typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
  44. if(e.empty())
  45. {
  46. // precondition failure: e is not a valid regex.
  47. std::invalid_argument ex("Invalid regular expression object");
  48. boost::throw_exception(ex);
  49. }
  50. pstate = 0;
  51. m_match_flags = f;
  52. estimate_max_state_count(static_cast<category*>(0));
  53. expression_flag_type re_f = re.flags();
  54. icase = re_f & regex_constants::icase;
  55. if(!(m_match_flags & (match_perl|match_posix)))
  56. {
  57. if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
  58. m_match_flags |= match_perl;
  59. else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  60. m_match_flags |= match_perl;
  61. else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
  62. m_match_flags |= match_perl;
  63. else
  64. m_match_flags |= match_posix;
  65. }
  66. if(m_match_flags & match_posix)
  67. {
  68. m_temp_match.reset(new match_results<BidiIterator, Allocator>());
  69. m_presult = m_temp_match.get();
  70. }
  71. else
  72. m_presult = &m_result;
  73. #ifdef BOOST_REGEX_NON_RECURSIVE
  74. m_stack_base = 0;
  75. m_backup_state = 0;
  76. #elif defined(BOOST_REGEX_RECURSIVE)
  77. m_can_backtrack = true;
  78. m_have_accept = false;
  79. #endif
  80. // find the value to use for matching word boundaries:
  81. m_word_mask = re.get_data().m_word_mask;
  82. // find bitmask to use for matching '.':
  83. match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
  84. // Disable match_any if requested in the state machine:
  85. if(e.get_data().m_disable_match_any)
  86. m_match_flags &= regex_constants::match_not_any;
  87. }
  88. template <class BidiIterator, class Allocator, class traits>
  89. void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
  90. {
  91. //
  92. // How many states should we allow our machine to visit before giving up?
  93. // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
  94. // where N is the length of the string, and S is the number of states
  95. // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
  96. // but these take unreasonably amounts of time to bale out in pathological
  97. // cases.
  98. //
  99. // Calculate NS^2 first:
  100. //
  101. static const std::ptrdiff_t k = 100000;
  102. std::ptrdiff_t dist = boost::BOOST_REGEX_DETAIL_NS::distance(base, last);
  103. if(dist == 0)
  104. dist = 1;
  105. std::ptrdiff_t states = re.size();
  106. if(states == 0)
  107. states = 1;
  108. if ((std::numeric_limits<std::ptrdiff_t>::max)() / states < states)
  109. {
  110. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  111. return;
  112. }
  113. states *= states;
  114. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  115. {
  116. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  117. return;
  118. }
  119. states *= dist;
  120. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  121. {
  122. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  123. return;
  124. }
  125. states += k;
  126. max_state_count = states;
  127. //
  128. // Now calculate N^2:
  129. //
  130. states = dist;
  131. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  132. {
  133. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  134. return;
  135. }
  136. states *= dist;
  137. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  138. {
  139. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  140. return;
  141. }
  142. states += k;
  143. //
  144. // N^2 can be a very large number indeed, to prevent things getting out
  145. // of control, cap the max states:
  146. //
  147. if(states > BOOST_REGEX_MAX_STATE_COUNT)
  148. states = BOOST_REGEX_MAX_STATE_COUNT;
  149. //
  150. // If (the possibly capped) N^2 is larger than our first estimate,
  151. // use this instead:
  152. //
  153. if(states > max_state_count)
  154. max_state_count = states;
  155. }
  156. template <class BidiIterator, class Allocator, class traits>
  157. inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
  158. {
  159. // we don't know how long the sequence is:
  160. max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
  161. }
  162. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  163. template <class BidiIterator, class Allocator, class traits>
  164. inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
  165. protected_proc_type proc)
  166. {
  167. ::boost::BOOST_REGEX_DETAIL_NS::concrete_protected_call
  168. <perl_matcher<BidiIterator, Allocator, traits> >
  169. obj(this, proc);
  170. return obj.execute();
  171. }
  172. #endif
  173. template <class BidiIterator, class Allocator, class traits>
  174. inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
  175. {
  176. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  177. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
  178. #else
  179. return match_imp();
  180. #endif
  181. }
  182. template <class BidiIterator, class Allocator, class traits>
  183. bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
  184. {
  185. // initialise our stack if we are non-recursive:
  186. #ifdef BOOST_REGEX_NON_RECURSIVE
  187. save_state_init init(&m_stack_base, &m_backup_state);
  188. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  189. #if !defined(BOOST_NO_EXCEPTIONS)
  190. try{
  191. #endif
  192. #endif
  193. // reset our state machine:
  194. position = base;
  195. search_base = base;
  196. state_count = 0;
  197. m_match_flags |= regex_constants::match_all;
  198. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  199. m_presult->set_base(base);
  200. m_presult->set_named_subs(this->re.get_named_subs());
  201. if(m_match_flags & match_posix)
  202. m_result = *m_presult;
  203. verify_options(re.flags(), m_match_flags);
  204. if(0 == match_prefix())
  205. return false;
  206. return (m_result[0].second == last) && (m_result[0].first == base);
  207. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  208. }
  209. catch(...)
  210. {
  211. // unwind all pushed states, apart from anything else this
  212. // ensures that all the states are correctly destructed
  213. // not just the memory freed.
  214. while(unwind(true)){}
  215. throw;
  216. }
  217. #endif
  218. }
  219. template <class BidiIterator, class Allocator, class traits>
  220. inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
  221. {
  222. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  223. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
  224. #else
  225. return find_imp();
  226. #endif
  227. }
  228. template <class BidiIterator, class Allocator, class traits>
  229. bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
  230. {
  231. static matcher_proc_type const s_find_vtable[7] =
  232. {
  233. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
  234. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
  235. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
  236. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
  237. &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
  238. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  239. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  240. };
  241. // initialise our stack if we are non-recursive:
  242. #ifdef BOOST_REGEX_NON_RECURSIVE
  243. save_state_init init(&m_stack_base, &m_backup_state);
  244. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  245. #if !defined(BOOST_NO_EXCEPTIONS)
  246. try{
  247. #endif
  248. #endif
  249. state_count = 0;
  250. if((m_match_flags & regex_constants::match_init) == 0)
  251. {
  252. // reset our state machine:
  253. search_base = position = base;
  254. pstate = re.get_first_state();
  255. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  256. m_presult->set_base(base);
  257. m_presult->set_named_subs(this->re.get_named_subs());
  258. m_match_flags |= regex_constants::match_init;
  259. }
  260. else
  261. {
  262. // start again:
  263. search_base = position = m_result[0].second;
  264. // If last match was null and match_not_null was not set then increment
  265. // our start position, otherwise we go into an infinite loop:
  266. if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
  267. {
  268. if(position == last)
  269. return false;
  270. else
  271. ++position;
  272. }
  273. // reset $` start:
  274. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  275. //if((base != search_base) && (base == backstop))
  276. // m_match_flags |= match_prev_avail;
  277. }
  278. if(m_match_flags & match_posix)
  279. {
  280. m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  281. m_result.set_base(base);
  282. }
  283. verify_options(re.flags(), m_match_flags);
  284. // find out what kind of expression we have:
  285. unsigned type = (m_match_flags & match_continuous) ?
  286. static_cast<unsigned int>(regbase::restart_continue)
  287. : static_cast<unsigned int>(re.get_restart_type());
  288. // call the appropriate search routine:
  289. matcher_proc_type proc = s_find_vtable[type];
  290. return (this->*proc)();
  291. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  292. }
  293. catch(...)
  294. {
  295. // unwind all pushed states, apart from anything else this
  296. // ensures that all the states are correctly destructed
  297. // not just the memory freed.
  298. while(unwind(true)){}
  299. throw;
  300. }
  301. #endif
  302. }
  303. template <class BidiIterator, class Allocator, class traits>
  304. bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
  305. {
  306. m_has_partial_match = false;
  307. m_has_found_match = false;
  308. pstate = re.get_first_state();
  309. m_presult->set_first(position);
  310. restart = position;
  311. match_all_states();
  312. if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
  313. {
  314. m_has_found_match = true;
  315. m_presult->set_second(last, 0, false);
  316. position = last;
  317. if((m_match_flags & match_posix) == match_posix)
  318. {
  319. m_result.maybe_assign(*m_presult);
  320. }
  321. }
  322. #ifdef BOOST_REGEX_MATCH_EXTRA
  323. if(m_has_found_match && (match_extra & m_match_flags))
  324. {
  325. //
  326. // we have a match, reverse the capture information:
  327. //
  328. for(unsigned i = 0; i < m_presult->size(); ++i)
  329. {
  330. typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
  331. std::reverse(seq.begin(), seq.end());
  332. }
  333. }
  334. #endif
  335. if(!m_has_found_match)
  336. position = restart; // reset search postion
  337. #ifdef BOOST_REGEX_RECURSIVE
  338. m_can_backtrack = true; // reset for further searches
  339. #endif
  340. return m_has_found_match;
  341. }
  342. template <class BidiIterator, class Allocator, class traits>
  343. bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
  344. {
  345. unsigned int len = static_cast<const re_literal*>(pstate)->length;
  346. const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
  347. //
  348. // compare string with what we stored in
  349. // our records:
  350. for(unsigned int i = 0; i < len; ++i, ++position)
  351. {
  352. if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
  353. return false;
  354. }
  355. pstate = pstate->next.p;
  356. return true;
  357. }
  358. template <class BidiIterator, class Allocator, class traits>
  359. bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
  360. {
  361. if(position == backstop)
  362. {
  363. if((m_match_flags & match_prev_avail) == 0)
  364. {
  365. if((m_match_flags & match_not_bol) == 0)
  366. {
  367. pstate = pstate->next.p;
  368. return true;
  369. }
  370. return false;
  371. }
  372. }
  373. else if(m_match_flags & match_single_line)
  374. return false;
  375. // check the previous value character:
  376. BidiIterator t(position);
  377. --t;
  378. if(position != last)
  379. {
  380. if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
  381. {
  382. pstate = pstate->next.p;
  383. return true;
  384. }
  385. }
  386. else if(is_separator(*t))
  387. {
  388. pstate = pstate->next.p;
  389. return true;
  390. }
  391. return false;
  392. }
  393. template <class BidiIterator, class Allocator, class traits>
  394. bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
  395. {
  396. if(position != last)
  397. {
  398. if(m_match_flags & match_single_line)
  399. return false;
  400. // we're not yet at the end so *first is always valid:
  401. if(is_separator(*position))
  402. {
  403. if((position != backstop) || (m_match_flags & match_prev_avail))
  404. {
  405. // check that we're not in the middle of \r\n sequence
  406. BidiIterator t(position);
  407. --t;
  408. if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
  409. {
  410. return false;
  411. }
  412. }
  413. pstate = pstate->next.p;
  414. return true;
  415. }
  416. }
  417. else if((m_match_flags & match_not_eol) == 0)
  418. {
  419. pstate = pstate->next.p;
  420. return true;
  421. }
  422. return false;
  423. }
  424. template <class BidiIterator, class Allocator, class traits>
  425. bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
  426. {
  427. if(position == last)
  428. return false;
  429. if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
  430. return false;
  431. if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
  432. return false;
  433. pstate = pstate->next.p;
  434. ++position;
  435. return true;
  436. }
  437. template <class BidiIterator, class Allocator, class traits>
  438. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
  439. {
  440. bool b; // indcates whether next character is a word character
  441. if(position != last)
  442. {
  443. // prev and this character must be opposites:
  444. b = traits_inst.isctype(*position, m_word_mask);
  445. }
  446. else
  447. {
  448. b = (m_match_flags & match_not_eow) ? true : false;
  449. }
  450. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  451. {
  452. if(m_match_flags & match_not_bow)
  453. b ^= true;
  454. else
  455. b ^= false;
  456. }
  457. else
  458. {
  459. --position;
  460. b ^= traits_inst.isctype(*position, m_word_mask);
  461. ++position;
  462. }
  463. if(b)
  464. {
  465. pstate = pstate->next.p;
  466. return true;
  467. }
  468. return false; // no match if we get to here...
  469. }
  470. template <class BidiIterator, class Allocator, class traits>
  471. bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
  472. {
  473. if(position == last)
  474. return false;
  475. // both prev and this character must be m_word_mask:
  476. bool prev = traits_inst.isctype(*position, m_word_mask);
  477. {
  478. bool b;
  479. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  480. return false;
  481. else
  482. {
  483. --position;
  484. b = traits_inst.isctype(*position, m_word_mask);
  485. ++position;
  486. }
  487. if(b == prev)
  488. {
  489. pstate = pstate->next.p;
  490. return true;
  491. }
  492. }
  493. return false;
  494. }
  495. template <class BidiIterator, class Allocator, class traits>
  496. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
  497. {
  498. if(position == last)
  499. return false; // can't be starting a word if we're already at the end of input
  500. if(!traits_inst.isctype(*position, m_word_mask))
  501. return false; // next character isn't a word character
  502. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  503. {
  504. if(m_match_flags & match_not_bow)
  505. return false; // no previous input
  506. }
  507. else
  508. {
  509. // otherwise inside buffer:
  510. BidiIterator t(position);
  511. --t;
  512. if(traits_inst.isctype(*t, m_word_mask))
  513. return false; // previous character not non-word
  514. }
  515. // OK we have a match:
  516. pstate = pstate->next.p;
  517. return true;
  518. }
  519. template <class BidiIterator, class Allocator, class traits>
  520. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
  521. {
  522. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  523. return false; // start of buffer can't be end of word
  524. BidiIterator t(position);
  525. --t;
  526. if(traits_inst.isctype(*t, m_word_mask) == false)
  527. return false; // previous character wasn't a word character
  528. if(position == last)
  529. {
  530. if(m_match_flags & match_not_eow)
  531. return false; // end of buffer but not end of word
  532. }
  533. else
  534. {
  535. // otherwise inside buffer:
  536. if(traits_inst.isctype(*position, m_word_mask))
  537. return false; // next character is a word character
  538. }
  539. pstate = pstate->next.p;
  540. return true; // if we fall through to here then we've succeeded
  541. }
  542. template <class BidiIterator, class Allocator, class traits>
  543. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
  544. {
  545. if((position != backstop) || (m_match_flags & match_not_bob))
  546. return false;
  547. // OK match:
  548. pstate = pstate->next.p;
  549. return true;
  550. }
  551. template <class BidiIterator, class Allocator, class traits>
  552. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
  553. {
  554. if((position != last) || (m_match_flags & match_not_eob))
  555. return false;
  556. // OK match:
  557. pstate = pstate->next.p;
  558. return true;
  559. }
  560. template <class BidiIterator, class Allocator, class traits>
  561. bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
  562. {
  563. //
  564. // Compare with what we previously matched.
  565. // Note that this succeeds if the backref did not partisipate
  566. // in the match, this is in line with ECMAScript, but not Perl
  567. // or PCRE.
  568. //
  569. int index = static_cast<const re_brace*>(pstate)->index;
  570. if(index >= 10000)
  571. {
  572. named_subexpressions::range_type r = re.get_data().equal_range(index);
  573. BOOST_ASSERT(r.first != r.second);
  574. do
  575. {
  576. index = r.first->index;
  577. ++r.first;
  578. }while((r.first != r.second) && ((*m_presult)[index].matched != true));
  579. }
  580. if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
  581. return false;
  582. BidiIterator i = (*m_presult)[index].first;
  583. BidiIterator j = (*m_presult)[index].second;
  584. while(i != j)
  585. {
  586. if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
  587. return false;
  588. ++i;
  589. ++position;
  590. }
  591. pstate = pstate->next.p;
  592. return true;
  593. }
  594. template <class BidiIterator, class Allocator, class traits>
  595. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
  596. {
  597. typedef typename traits::char_class_type char_class_type;
  598. // let the traits class do the work:
  599. if(position == last)
  600. return false;
  601. BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
  602. if(t != position)
  603. {
  604. pstate = pstate->next.p;
  605. position = t;
  606. return true;
  607. }
  608. return false;
  609. }
  610. template <class BidiIterator, class Allocator, class traits>
  611. bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
  612. {
  613. if(position == last)
  614. return false;
  615. if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  616. {
  617. pstate = pstate->next.p;
  618. ++position;
  619. return true;
  620. }
  621. return false;
  622. }
  623. template <class BidiIterator, class Allocator, class traits>
  624. bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
  625. {
  626. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  627. return true;
  628. }
  629. template <class BidiIterator, class Allocator, class traits>
  630. bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
  631. {
  632. if(position == last)
  633. return false;
  634. if(is_combining(traits_inst.translate(*position, icase)))
  635. return false;
  636. ++position;
  637. while((position != last) && is_combining(traits_inst.translate(*position, icase)))
  638. ++position;
  639. pstate = pstate->next.p;
  640. return true;
  641. }
  642. template <class BidiIterator, class Allocator, class traits>
  643. bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
  644. {
  645. if(m_match_flags & match_not_eob)
  646. return false;
  647. BidiIterator p(position);
  648. while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
  649. if(p != last)
  650. return false;
  651. pstate = pstate->next.p;
  652. return true;
  653. }
  654. template <class BidiIterator, class Allocator, class traits>
  655. bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
  656. {
  657. if(position == search_base)
  658. {
  659. pstate = pstate->next.p;
  660. return true;
  661. }
  662. return false;
  663. }
  664. template <class BidiIterator, class Allocator, class traits>
  665. bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
  666. {
  667. #ifdef BOOST_MSVC
  668. #pragma warning(push)
  669. #pragma warning(disable:4127)
  670. #endif
  671. if( ::boost::is_random_access_iterator<BidiIterator>::value)
  672. {
  673. std::ptrdiff_t maxlen = ::boost::BOOST_REGEX_DETAIL_NS::distance(backstop, position);
  674. if(maxlen < static_cast<const re_brace*>(pstate)->index)
  675. return false;
  676. std::advance(position, -static_cast<const re_brace*>(pstate)->index);
  677. }
  678. else
  679. {
  680. int c = static_cast<const re_brace*>(pstate)->index;
  681. while(c--)
  682. {
  683. if(position == backstop)
  684. return false;
  685. --position;
  686. }
  687. }
  688. pstate = pstate->next.p;
  689. return true;
  690. #ifdef BOOST_MSVC
  691. #pragma warning(pop)
  692. #endif
  693. }
  694. template <class BidiIterator, class Allocator, class traits>
  695. inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
  696. {
  697. // return true if marked sub-expression N has been matched:
  698. int index = static_cast<const re_brace*>(pstate)->index;
  699. bool result = false;
  700. if(index == 9999)
  701. {
  702. // Magic value for a (DEFINE) block:
  703. return false;
  704. }
  705. else if(index > 0)
  706. {
  707. // Have we matched subexpression "index"?
  708. // Check if index is a hash value:
  709. if(index >= 10000)
  710. {
  711. named_subexpressions::range_type r = re.get_data().equal_range(index);
  712. while(r.first != r.second)
  713. {
  714. if((*m_presult)[r.first->index].matched)
  715. {
  716. result = true;
  717. break;
  718. }
  719. ++r.first;
  720. }
  721. }
  722. else
  723. {
  724. result = (*m_presult)[index].matched;
  725. }
  726. pstate = pstate->next.p;
  727. }
  728. else
  729. {
  730. // Have we recursed into subexpression "index"?
  731. // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
  732. int idx = -(index+1);
  733. if(idx >= 10000)
  734. {
  735. named_subexpressions::range_type r = re.get_data().equal_range(idx);
  736. int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
  737. while(r.first != r.second)
  738. {
  739. result |= (stack_index == r.first->index);
  740. if(result)break;
  741. ++r.first;
  742. }
  743. }
  744. else
  745. {
  746. result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
  747. }
  748. pstate = pstate->next.p;
  749. }
  750. return result;
  751. }
  752. template <class BidiIterator, class Allocator, class traits>
  753. bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
  754. {
  755. // Just force a backtrack:
  756. return false;
  757. }
  758. template <class BidiIterator, class Allocator, class traits>
  759. bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
  760. {
  761. if(!recursion_stack.empty())
  762. {
  763. return skip_until_paren(recursion_stack.back().idx);
  764. }
  765. else
  766. {
  767. return skip_until_paren(INT_MAX);
  768. }
  769. }
  770. template <class BidiIterator, class Allocator, class traits>
  771. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
  772. {
  773. #ifdef BOOST_MSVC
  774. #pragma warning(push)
  775. #pragma warning(disable:4127)
  776. #endif
  777. const unsigned char* _map = re.get_map();
  778. while(true)
  779. {
  780. // skip everything we can't match:
  781. while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
  782. ++position;
  783. if(position == last)
  784. {
  785. // run out of characters, try a null match if possible:
  786. if(re.can_be_null())
  787. return match_prefix();
  788. break;
  789. }
  790. // now try and obtain a match:
  791. if(match_prefix())
  792. return true;
  793. if(position == last)
  794. return false;
  795. ++position;
  796. }
  797. return false;
  798. #ifdef BOOST_MSVC
  799. #pragma warning(pop)
  800. #endif
  801. }
  802. template <class BidiIterator, class Allocator, class traits>
  803. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
  804. {
  805. #ifdef BOOST_MSVC
  806. #pragma warning(push)
  807. #pragma warning(disable:4127)
  808. #endif
  809. // do search optimised for word starts:
  810. const unsigned char* _map = re.get_map();
  811. if((m_match_flags & match_prev_avail) || (position != base))
  812. --position;
  813. else if(match_prefix())
  814. return true;
  815. do
  816. {
  817. while((position != last) && traits_inst.isctype(*position, m_word_mask))
  818. ++position;
  819. while((position != last) && !traits_inst.isctype(*position, m_word_mask))
  820. ++position;
  821. if(position == last)
  822. break;
  823. if(can_start(*position, _map, (unsigned char)mask_any) )
  824. {
  825. if(match_prefix())
  826. return true;
  827. }
  828. if(position == last)
  829. break;
  830. } while(true);
  831. return false;
  832. #ifdef BOOST_MSVC
  833. #pragma warning(pop)
  834. #endif
  835. }
  836. template <class BidiIterator, class Allocator, class traits>
  837. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
  838. {
  839. // do search optimised for line starts:
  840. const unsigned char* _map = re.get_map();
  841. if(match_prefix())
  842. return true;
  843. while(position != last)
  844. {
  845. while((position != last) && !is_separator(*position))
  846. ++position;
  847. if(position == last)
  848. return false;
  849. ++position;
  850. if(position == last)
  851. {
  852. if(re.can_be_null() && match_prefix())
  853. return true;
  854. return false;
  855. }
  856. if( can_start(*position, _map, (unsigned char)mask_any) )
  857. {
  858. if(match_prefix())
  859. return true;
  860. }
  861. if(position == last)
  862. return false;
  863. //++position;
  864. }
  865. return false;
  866. }
  867. template <class BidiIterator, class Allocator, class traits>
  868. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
  869. {
  870. if((position == base) && ((m_match_flags & match_not_bob) == 0))
  871. return match_prefix();
  872. return false;
  873. }
  874. template <class BidiIterator, class Allocator, class traits>
  875. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
  876. {
  877. #if 0
  878. if(position == last)
  879. return false; // can't possibly match if we're at the end already
  880. unsigned type = (m_match_flags & match_continuous) ?
  881. static_cast<unsigned int>(regbase::restart_continue)
  882. : static_cast<unsigned int>(re.get_restart_type());
  883. const kmp_info<char_type>* info = access::get_kmp(re);
  884. int len = info->len;
  885. const char_type* x = info->pstr;
  886. int j = 0;
  887. while (position != last)
  888. {
  889. while((j > -1) && (x[j] != traits_inst.translate(*position, icase)))
  890. j = info->kmp_next[j];
  891. ++position;
  892. ++j;
  893. if(j >= len)
  894. {
  895. if(type == regbase::restart_fixed_lit)
  896. {
  897. std::advance(position, -j);
  898. restart = position;
  899. std::advance(restart, len);
  900. m_result.set_first(position);
  901. m_result.set_second(restart);
  902. position = restart;
  903. return true;
  904. }
  905. else
  906. {
  907. restart = position;
  908. std::advance(position, -j);
  909. if(match_prefix())
  910. return true;
  911. else
  912. {
  913. for(int k = 0; (restart != position) && (k < j); ++k, --restart)
  914. {} // dwa 10/20/2000 - warning suppression for MWCW
  915. if(restart != last)
  916. ++restart;
  917. position = restart;
  918. j = 0; //we could do better than this...
  919. }
  920. }
  921. }
  922. }
  923. if((m_match_flags & match_partial) && (position == last) && j)
  924. {
  925. // we need to check for a partial match:
  926. restart = position;
  927. std::advance(position, -j);
  928. return match_prefix();
  929. }
  930. #endif
  931. return false;
  932. }
  933. } // namespace BOOST_REGEX_DETAIL_NS
  934. } // namespace boost
  935. #ifdef BOOST_MSVC
  936. # pragma warning(pop)
  937. #endif
  938. #ifdef __BORLANDC__
  939. # pragma option pop
  940. #endif
  941. #ifdef BOOST_MSVC
  942. #pragma warning(push)
  943. #pragma warning(disable: 4103)
  944. #endif
  945. #ifdef BOOST_HAS_ABI_HEADERS
  946. # include BOOST_ABI_SUFFIX
  947. #endif
  948. #ifdef BOOST_MSVC
  949. #pragma warning(pop)
  950. #endif
  951. #endif