| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093 |
- //
- // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
- //
- // Distributed under the Boost Software License, Version 1.0. (See
- // accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- //
- #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
- #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
- #include <boost/locale/config.hpp>
- #include <boost/locale/boundary/types.hpp>
- #include <boost/locale/boundary/facets.hpp>
- #include <boost/locale/boundary/segment.hpp>
- #include <boost/locale/boundary/boundary_point.hpp>
- #include <boost/iterator/iterator_facade.hpp>
- #include <boost/type_traits/is_same.hpp>
- #include <boost/shared_ptr.hpp>
- #include <boost/cstdint.hpp>
- #include <boost/assert.hpp>
- #ifdef BOOST_MSVC
- # pragma warning(push)
- # pragma warning(disable : 4275 4251 4231 4660)
- #endif
- #include <string>
- #include <locale>
- #include <vector>
- #include <iterator>
- #include <algorithm>
- #include <stdexcept>
- #include <iostream>
- namespace boost {
- namespace locale {
-
- namespace boundary {
- ///
- /// \defgroup boundary Boundary Analysis
- ///
- /// This module contains all operations required for %boundary analysis of text: character, word, like and sentence boundaries
- ///
- /// @{
- ///
- /// \cond INTERNAL
- namespace details {
- template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
- struct mapping_traits {
- typedef typename std::iterator_traits<IteratorType>::value_type char_type;
- static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
- {
- std::basic_string<char_type> str(b,e);
- return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
- }
- };
- template<typename CharType,typename SomeIteratorType>
- struct linear_iterator_traits {
- static const bool is_linear =
- is_same<SomeIteratorType,CharType*>::value
- || is_same<SomeIteratorType,CharType const*>::value
- || is_same<SomeIteratorType,typename std::basic_string<CharType>::iterator>::value
- || is_same<SomeIteratorType,typename std::basic_string<CharType>::const_iterator>::value
- || is_same<SomeIteratorType,typename std::vector<CharType>::iterator>::value
- || is_same<SomeIteratorType,typename std::vector<CharType>::const_iterator>::value
- ;
- };
- template<typename IteratorType>
- struct mapping_traits<IteratorType,std::random_access_iterator_tag> {
- typedef typename std::iterator_traits<IteratorType>::value_type char_type;
- static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
- {
- index_type result;
- //
- // Optimize for most common cases
- //
- // C++0x requires that string is continious in memory and all known
- // string implementations
- // do this because of c_str() support.
- //
- if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e)
- {
- char_type const *begin = &*b;
- char_type const *end = begin + (e-b);
- index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end);
- result.swap(tmp);
- }
- else {
- std::basic_string<char_type> str(b,e);
- index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
- result.swap(tmp);
- }
- return result;
- }
- };
- template<typename BaseIterator>
- class mapping {
- public:
- typedef BaseIterator base_iterator;
- typedef typename std::iterator_traits<base_iterator>::value_type char_type;
- mapping(boundary_type type,
- base_iterator begin,
- base_iterator end,
- std::locale const &loc)
- :
- index_(new index_type()),
- begin_(begin),
- end_(end)
- {
- index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc);
- index_->swap(idx);
- }
- mapping()
- {
- }
- index_type const &index() const
- {
- return *index_;
- }
- base_iterator begin() const
- {
- return begin_;
- }
- base_iterator end() const
- {
- return end_;
- }
- private:
- boost::shared_ptr<index_type> index_;
- base_iterator begin_,end_;
- };
- template<typename BaseIterator>
- class segment_index_iterator :
- public boost::iterator_facade<
- segment_index_iterator<BaseIterator>,
- segment<BaseIterator>,
- boost::bidirectional_traversal_tag,
- segment<BaseIterator> const &
- >
- {
- public:
- typedef BaseIterator base_iterator;
- typedef mapping<base_iterator> mapping_type;
- typedef segment<base_iterator> segment_type;
-
- segment_index_iterator() : current_(0,0),map_(0)
- {
- }
- segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) :
- map_(map),
- mask_(mask),
- full_select_(full_select)
- {
- set(p);
- }
- segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) :
- map_(map),
- mask_(mask),
- full_select_(full_select)
- {
- if(is_begin)
- set_begin();
- else
- set_end();
- }
- segment_type const &dereference() const
- {
- return value_;
- }
- bool equal(segment_index_iterator const &other) const
- {
- return map_ == other.map_ && current_.second == other.current_.second;
- }
- void increment()
- {
- std::pair<size_t,size_t> next = current_;
- if(full_select_) {
- next.first = next.second;
- while(next.second < size()) {
- next.second++;
- if(valid_offset(next.second))
- break;
- }
- if(next.second == size())
- next.first = next.second - 1;
- }
- else {
- while(next.second < size()) {
- next.first = next.second;
- next.second++;
- if(valid_offset(next.second))
- break;
- }
- }
- update_current(next);
- }
- void decrement()
- {
- std::pair<size_t,size_t> next = current_;
- if(full_select_) {
- while(next.second >1) {
- next.second--;
- if(valid_offset(next.second))
- break;
- }
- next.first = next.second;
- while(next.first >0) {
- next.first--;
- if(valid_offset(next.first))
- break;
- }
- }
- else {
- while(next.second >1) {
- next.second--;
- if(valid_offset(next.second))
- break;
- }
- next.first = next.second - 1;
- }
- update_current(next);
- }
- private:
- void set_end()
- {
- current_.first = size() - 1;
- current_.second = size();
- value_ = segment_type(map_->end(),map_->end(),0);
- }
- void set_begin()
- {
- current_.first = current_.second = 0;
- value_ = segment_type(map_->begin(),map_->begin(),0);
- increment();
- }
- void set(base_iterator p)
- {
- size_t dist=std::distance(map_->begin(),p);
- index_type::const_iterator b=map_->index().begin(),e=map_->index().end();
- index_type::const_iterator
- boundary_point=std::upper_bound(b,e,break_info(dist));
- while(boundary_point != e && (boundary_point->rule & mask_)==0)
- boundary_point++;
- current_.first = current_.second = boundary_point - b;
-
- if(full_select_) {
- while(current_.first > 0) {
- current_.first --;
- if(valid_offset(current_.first))
- break;
- }
- }
- else {
- if(current_.first > 0)
- current_.first --;
- }
- value_.first = map_->begin();
- std::advance(value_.first,get_offset(current_.first));
- value_.second = value_.first;
- std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first));
- update_rule();
- }
- void update_current(std::pair<size_t,size_t> pos)
- {
- std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
- std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
- std::advance(value_.first,first_diff);
- std::advance(value_.second,second_diff);
- current_ = pos;
- update_rule();
- }
- void update_rule()
- {
- if(current_.second != size()) {
- value_.rule(index()[current_.second].rule);
- }
- }
- size_t get_offset(size_t ind) const
- {
- if(ind == size())
- return index().back().offset;
- return index()[ind].offset;
- }
- bool valid_offset(size_t offset) const
- {
- return offset == 0
- || offset == size() // make sure we not acess index[size]
- || (index()[offset].rule & mask_)!=0;
- }
-
- size_t size() const
- {
- return index().size();
- }
-
- index_type const &index() const
- {
- return map_->index();
- }
-
-
- segment_type value_;
- std::pair<size_t,size_t> current_;
- mapping_type const *map_;
- rule_type mask_;
- bool full_select_;
- };
-
- template<typename BaseIterator>
- class boundary_point_index_iterator :
- public boost::iterator_facade<
- boundary_point_index_iterator<BaseIterator>,
- boundary_point<BaseIterator>,
- boost::bidirectional_traversal_tag,
- boundary_point<BaseIterator> const &
- >
- {
- public:
- typedef BaseIterator base_iterator;
- typedef mapping<base_iterator> mapping_type;
- typedef boundary_point<base_iterator> boundary_point_type;
-
- boundary_point_index_iterator() : current_(0),map_(0)
- {
- }
- boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) :
- map_(map),
- mask_(mask)
- {
- if(is_begin)
- set_begin();
- else
- set_end();
- }
- boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) :
- map_(map),
- mask_(mask)
- {
- set(p);
- }
- boundary_point_type const &dereference() const
- {
- return value_;
- }
- bool equal(boundary_point_index_iterator const &other) const
- {
- return map_ == other.map_ && current_ == other.current_;
- }
- void increment()
- {
- size_t next = current_;
- while(next < size()) {
- next++;
- if(valid_offset(next))
- break;
- }
- update_current(next);
- }
- void decrement()
- {
- size_t next = current_;
- while(next>0) {
- next--;
- if(valid_offset(next))
- break;
- }
- update_current(next);
- }
- private:
- void set_end()
- {
- current_ = size();
- value_ = boundary_point_type(map_->end(),0);
- }
- void set_begin()
- {
- current_ = 0;
- value_ = boundary_point_type(map_->begin(),0);
- }
- void set(base_iterator p)
- {
- size_t dist = std::distance(map_->begin(),p);
- index_type::const_iterator b=index().begin();
- index_type::const_iterator e=index().end();
- index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist));
- if(ptr==index().end())
- current_=size()-1;
- else
- current_=ptr - index().begin();
- while(!valid_offset(current_))
- current_ ++;
- std::ptrdiff_t diff = get_offset(current_) - dist;
- std::advance(p,diff);
- value_.iterator(p);
- update_rule();
- }
- void update_current(size_t pos)
- {
- std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
- base_iterator i=value_.iterator();
- std::advance(i,diff);
- current_ = pos;
- value_.iterator(i);
- update_rule();
- }
- void update_rule()
- {
- if(current_ != size()) {
- value_.rule(index()[current_].rule);
- }
- }
- size_t get_offset(size_t ind) const
- {
- if(ind == size())
- return index().back().offset;
- return index()[ind].offset;
- }
- bool valid_offset(size_t offset) const
- {
- return offset == 0
- || offset + 1 >= size() // last and first are always valid regardless of mark
- || (index()[offset].rule & mask_)!=0;
- }
-
- size_t size() const
- {
- return index().size();
- }
-
- index_type const &index() const
- {
- return map_->index();
- }
-
-
- boundary_point_type value_;
- size_t current_;
- mapping_type const *map_;
- rule_type mask_;
- };
- } // details
- /// \endcond
- template<typename BaseIterator>
- class segment_index;
- template<typename BaseIterator>
- class boundary_point_index;
-
- ///
- /// \brief This class holds an index of segments in the text range and allows to iterate over them
- ///
- /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
- /// to the \ref segment objects.
- ///
- /// It provides two options on way of selecting segments:
- ///
- /// - \ref rule(rule_type mask) - a mask that allows to select only specific types of segments according to
- /// various masks %as \ref word_any.
- /// \n
- /// The default is to select any types of boundaries.
- /// \n
- /// For example: using word %boundary analysis, when the provided mask is \ref word_kana then the iterators
- /// would iterate only over the words containing Kana letters and \ref word_any would select all types of
- /// words excluding ranges that consist of white space and punctuation marks. So iterating over the text
- /// "to be or not to be?" with \ref word_any rule would return segments "to", "be", "or", "not", "to", "be", instead
- /// of default "to", " ", "be", " ", "or", " ", "not", " ", "to", " ", "be", "?".
- /// - \ref full_select(bool how) - a flag that defines the way a range is selected if the rule of the previous
- /// %boundary point does not fit the selected rule.
- /// \n
- /// For example: We want to fetch all sentences from the following text: "Hello! How\nare you?".
- /// \n
- /// This text contains three %boundary points separating it to sentences by different rules:
- /// - The exclamation mark "!" ends the sentence "Hello!"
- /// - The line feed that splits the sentence "How\nare you?" into two parts.
- /// - The question mark that ends the second sentence.
- /// \n
- /// If you would only change the \ref rule() to \ref sentence_term then the segment_index would
- /// provide two sentences "Hello!" and "are you?" %as only them actually terminated with required
- /// terminator "!" or "?". But changing \ref full_select() to true, the selected segment would include
- /// all the text up to previous valid %boundary point and would return two expected sentences:
- /// "Hello!" and "How\nare you?".
- ///
- /// This class allows to find a segment according to the given iterator in range using \ref find() member
- /// function.
- ///
- /// \note
- ///
- /// - Changing any of the options - \ref rule() or \ref full_select() and of course re-indexing the text
- /// invalidates existing iterators and they can't be used any more.
- /// - segment_index can be created from boundary_point_index or other segment_index that was created with
- /// same \ref boundary_type. This is very fast operation %as they shared same index
- /// and it does not require its regeneration.
- ///
- /// \see
- ///
- /// - \ref boundary_point_index
- /// - \ref segment
- /// - \ref boundary_point
- ///
- template<typename BaseIterator>
- class segment_index {
- public:
-
- ///
- /// The type of the iterator used to iterate over the original text
- ///
- typedef BaseIterator base_iterator;
- #ifdef BOOST_LOCALE_DOXYGEN
- ///
- /// The bidirectional iterator that iterates over \ref value_type objects.
- ///
- /// - The iterators may be invalidated by use of any non-const member function
- /// including but not limited to \ref rule(rule_type) and \ref full_select(bool).
- /// - The returned value_type object is valid %as long %as iterator points to it.
- /// So this following code is wrong %as t used after p was updated:
- /// \code
- /// segment_index<some_iterator>::iterator p=index.begin();
- /// segment<some_iterator> &t = *p;
- /// ++p;
- /// cout << t.str() << endl;
- /// \endcode
- ///
- typedef unspecified_iterator_type iterator;
- ///
- /// \copydoc iterator
- ///
- typedef unspecified_iterator_type const_iterator;
- #else
- typedef details::segment_index_iterator<base_iterator> iterator;
- typedef details::segment_index_iterator<base_iterator> const_iterator;
- #endif
- ///
- /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
- /// an object that represents selected segment.
- ///
- typedef segment<base_iterator> value_type;
- ///
- /// Default constructor.
- ///
- /// \note
- ///
- /// When this object is constructed by default it does not include a valid index, thus
- /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
- /// behavior
- ///
- segment_index() : mask_(0xFFFFFFFFu),full_select_(false)
- {
- }
- ///
- /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
- /// in range [begin,end) using a rule \a mask for locale \a loc.
- ///
- segment_index(boundary_type type,
- base_iterator begin,
- base_iterator end,
- rule_type mask,
- std::locale const &loc=std::locale())
- :
- map_(type,begin,end,loc),
- mask_(mask),
- full_select_(false)
- {
- }
- ///
- /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
- /// in range [begin,end) selecting all possible segments (full mask) for locale \a loc.
- ///
- segment_index(boundary_type type,
- base_iterator begin,
- base_iterator end,
- std::locale const &loc=std::locale())
- :
- map_(type,begin,end,loc),
- mask_(0xFFFFFFFFu),
- full_select_(false)
- {
- }
- ///
- /// Create a segment_index from a \ref boundary_point_index. It copies all indexing information
- /// and used default rule (all possible segments)
- ///
- /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
- /// range it is much better to create one from another rather then indexing the same
- /// range twice.
- ///
- /// \note \ref rule() flags are not copied
- ///
- segment_index(boundary_point_index<base_iterator> const &);
- ///
- /// Copy an index from a \ref boundary_point_index. It copies all indexing information
- /// and uses the default rule (all possible segments)
- ///
- /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
- /// range it is much better to create one from another rather then indexing the same
- /// range twice.
- ///
- /// \note \ref rule() flags are not copied
- ///
- segment_index const &operator = (boundary_point_index<base_iterator> const &);
-
- ///
- /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
- /// in range [begin,end) for locale \a loc.
- ///
- /// \note \ref rule() and \ref full_select() remain unchanged.
- ///
- void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
- {
- map_ = mapping_type(type,begin,end,loc);
- }
- ///
- /// Get the \ref iterator on the beginning of the segments range.
- ///
- /// Preconditions: the segment_index should have a mapping
- ///
- /// \note
- ///
- /// The returned iterator is invalidated by access to any non-const member functions of this object
- ///
- iterator begin() const
- {
- return iterator(true,&map_,mask_,full_select_);
- }
- ///
- /// Get the \ref iterator on the ending of the segments range.
- ///
- /// Preconditions: the segment_index should have a mapping
- ///
- /// The returned iterator is invalidated by access to any non-const member functions of this object
- ///
- iterator end() const
- {
- return iterator(false,&map_,mask_,full_select_);
- }
- ///
- /// Find a first valid segment following a position \a p.
- ///
- /// If \a p is inside a valid segment this segment is selected:
- ///
- /// For example: For \ref word %boundary analysis with \ref word_any rule():
- ///
- /// - "to| be or ", would point to "be",
- /// - "t|o be or ", would point to "to",
- /// - "to be or| ", would point to end.
- ///
- ///
- /// Preconditions: the segment_index should have a mapping and \a p should be valid iterator
- /// to the text in the mapped range.
- ///
- /// The returned iterator is invalidated by access to any non-const member functions of this object
- ///
- iterator find(base_iterator p) const
- {
- return iterator(p,&map_,mask_,full_select_);
- }
-
- ///
- /// Get the mask of rules that are used
- ///
- rule_type rule() const
- {
- return mask_;
- }
- ///
- /// Set the mask of rules that are used
- ///
- void rule(rule_type v)
- {
- mask_ = v;
- }
- ///
- /// Get the full_select property value - should segment include in the range
- /// values that not belong to specific \ref rule() or not.
- ///
- /// The default value is false.
- ///
- /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
- /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
- /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
- /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
- /// following part "are you?"
- ///
- bool full_select() const
- {
- return full_select_;
- }
- ///
- /// Set the full_select property value - should segment include in the range
- /// values that not belong to specific \ref rule() or not.
- ///
- /// The default value is false.
- ///
- /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
- /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
- /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
- /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
- /// following part "are you?"
- ///
- void full_select(bool v)
- {
- full_select_ = v;
- }
-
- private:
- friend class boundary_point_index<base_iterator>;
- typedef details::mapping<base_iterator> mapping_type;
- mapping_type map_;
- rule_type mask_;
- bool full_select_;
- };
- ///
- /// \brief This class holds an index of \ref boundary_point "boundary points" and allows iterating
- /// over them.
- ///
- /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
- /// to the \ref boundary_point objects.
- ///
- /// It provides an option that affects selecting %boundary points according to different rules:
- /// using \ref rule(rule_type mask) member function. It allows to set a mask that select only specific
- /// types of %boundary points like \ref sentence_term.
- ///
- /// For example for a sentence %boundary analysis of a text "Hello! How\nare you?" when the default
- /// rule is used the %boundary points would be:
- ///
- /// - "|Hello! How\nare you?"
- /// - "Hello! |How\nare you?"
- /// - "Hello! How\n|are you?"
- /// - "Hello! How\nare you?|"
- ///
- /// However if \ref rule() is set to \ref sentence_term then the selected %boundary points would be:
- ///
- /// - "|Hello! How\nare you?"
- /// - "Hello! |How\nare you?"
- /// - "Hello! How\nare you?|"
- ///
- /// Such that a %boundary point defined by a line feed character would be ignored.
- ///
- /// This class allows to find a boundary_point according to the given iterator in range using \ref find() member
- /// function.
- ///
- /// \note
- /// - Even an empty text range [x,x) considered to have a one %boundary point x.
- /// - \a a and \a b points of the range [a,b) are always considered %boundary points
- /// regardless the rules used.
- /// - Changing any of the option \ref rule() or course re-indexing the text
- /// invalidates existing iterators and they can't be used any more.
- /// - boundary_point_index can be created from segment_index or other boundary_point_index that was created with
- /// same \ref boundary_type. This is very fast operation %as they shared same index
- /// and it does not require its regeneration.
- ///
- /// \see
- ///
- /// - \ref segment_index
- /// - \ref boundary_point
- /// - \ref segment
- ///
- template<typename BaseIterator>
- class boundary_point_index {
- public:
- ///
- /// The type of the iterator used to iterate over the original text
- ///
- typedef BaseIterator base_iterator;
- #ifdef BOOST_LOCALE_DOXYGEN
- ///
- /// The bidirectional iterator that iterates over \ref value_type objects.
- ///
- /// - The iterators may be invalidated by use of any non-const member function
- /// including but not limited to \ref rule(rule_type) member function.
- /// - The returned value_type object is valid %as long %as iterator points to it.
- /// So this following code is wrong %as t used after p was updated:
- /// \code
- /// boundary_point_index<some_iterator>::iterator p=index.begin();
- /// boundary_point<some_iterator> &t = *p;
- /// ++p;
- /// rule_type r = t->rule();
- /// \endcode
- ///
- typedef unspecified_iterator_type iterator;
- ///
- /// \copydoc iterator
- ///
- typedef unspecified_iterator_type const_iterator;
- #else
- typedef details::boundary_point_index_iterator<base_iterator> iterator;
- typedef details::boundary_point_index_iterator<base_iterator> const_iterator;
- #endif
- ///
- /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
- /// an object that represents the selected \ref boundary_point "boundary point".
- ///
- typedef boundary_point<base_iterator> value_type;
-
- ///
- /// Default constructor.
- ///
- /// \note
- ///
- /// When this object is constructed by default it does not include a valid index, thus
- /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
- /// behavior
- ///
- boundary_point_index() : mask_(0xFFFFFFFFu)
- {
- }
-
- ///
- /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
- /// in range [begin,end) using a rule \a mask for locale \a loc.
- ///
- boundary_point_index(boundary_type type,
- base_iterator begin,
- base_iterator end,
- rule_type mask,
- std::locale const &loc=std::locale())
- :
- map_(type,begin,end,loc),
- mask_(mask)
- {
- }
- ///
- /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
- /// in range [begin,end) selecting all possible %boundary points (full mask) for locale \a loc.
- ///
- boundary_point_index(boundary_type type,
- base_iterator begin,
- base_iterator end,
- std::locale const &loc=std::locale())
- :
- map_(type,begin,end,loc),
- mask_(0xFFFFFFFFu)
- {
- }
- ///
- /// Create a boundary_point_index from a \ref segment_index. It copies all indexing information
- /// and uses the default rule (all possible %boundary points)
- ///
- /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
- /// range it is much better to create one from another rather then indexing the same
- /// range twice.
- ///
- /// \note \ref rule() flags are not copied
- ///
- boundary_point_index(segment_index<base_iterator> const &other);
- ///
- /// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information
- /// and keeps the current \ref rule() unchanged
- ///
- /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
- /// range it is much better to create one from another rather then indexing the same
- /// range twice.
- ///
- /// \note \ref rule() flags are not copied
- ///
- boundary_point_index const &operator=(segment_index<base_iterator> const &other);
- ///
- /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
- /// in range [begin,end) for locale \a loc.
- ///
- /// \note \ref rule() remains unchanged.
- ///
- void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
- {
- map_ = mapping_type(type,begin,end,loc);
- }
- ///
- /// Get the \ref iterator on the beginning of the %boundary points range.
- ///
- /// Preconditions: this boundary_point_index should have a mapping
- ///
- /// \note
- ///
- /// The returned iterator is invalidated by access to any non-const member functions of this object
- ///
- iterator begin() const
- {
- return iterator(true,&map_,mask_);
- }
- ///
- /// Get the \ref iterator on the ending of the %boundary points range.
- ///
- /// Preconditions: this boundary_point_index should have a mapping
- ///
- /// \note
- ///
- /// The returned iterator is invalidated by access to any non-const member functions of this object
- ///
- iterator end() const
- {
- return iterator(false,&map_,mask_);
- }
- ///
- /// Find a first valid %boundary point on a position \a p or following it.
- ///
- /// For example: For \ref word %boundary analysis of the text "to be or"
- ///
- /// - "|to be", would return %boundary point at "|to be",
- /// - "t|o be", would point to "to| be"
- ///
- /// Preconditions: the boundary_point_index should have a mapping and \a p should be valid iterator
- /// to the text in the mapped range.
- ///
- /// The returned iterator is invalidated by access to any non-const member functions of this object
- ///
- iterator find(base_iterator p) const
- {
- return iterator(p,&map_,mask_);
- }
-
- ///
- /// Get the mask of rules that are used
- ///
- rule_type rule() const
- {
- return mask_;
- }
- ///
- /// Set the mask of rules that are used
- ///
- void rule(rule_type v)
- {
- mask_ = v;
- }
- private:
- friend class segment_index<base_iterator>;
- typedef details::mapping<base_iterator> mapping_type;
- mapping_type map_;
- rule_type mask_;
- };
-
- /// \cond INTERNAL
- template<typename BaseIterator>
- segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) :
- map_(other.map_),
- mask_(0xFFFFFFFFu),
- full_select_(false)
- {
- }
-
- template<typename BaseIterator>
- boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) :
- map_(other.map_),
- mask_(0xFFFFFFFFu)
- {
- }
- template<typename BaseIterator>
- segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other)
- {
- map_ = other.map_;
- return *this;
- }
-
- template<typename BaseIterator>
- boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other)
- {
- map_ = other.map_;
- return *this;
- }
- /// \endcond
-
- typedef segment_index<std::string::const_iterator> ssegment_index; ///< convenience typedef
- typedef segment_index<std::wstring::const_iterator> wssegment_index; ///< convenience typedef
- #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
- typedef segment_index<std::u16string::const_iterator> u16ssegment_index;///< convenience typedef
- #endif
- #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
- typedef segment_index<std::u32string::const_iterator> u32ssegment_index;///< convenience typedef
- #endif
-
- typedef segment_index<char const *> csegment_index; ///< convenience typedef
- typedef segment_index<wchar_t const *> wcsegment_index; ///< convenience typedef
- #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
- typedef segment_index<char16_t const *> u16csegment_index; ///< convenience typedef
- #endif
- #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
- typedef segment_index<char32_t const *> u32csegment_index; ///< convenience typedef
- #endif
- typedef boundary_point_index<std::string::const_iterator> sboundary_point_index;///< convenience typedef
- typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index;///< convenience typedef
- #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
- typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index;///< convenience typedef
- #endif
- #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
- typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index;///< convenience typedef
- #endif
-
- typedef boundary_point_index<char const *> cboundary_point_index; ///< convenience typedef
- typedef boundary_point_index<wchar_t const *> wcboundary_point_index; ///< convenience typedef
- #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
- typedef boundary_point_index<char16_t const *> u16cboundary_point_index;///< convenience typedef
- #endif
- #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
- typedef boundary_point_index<char32_t const *> u32cboundary_point_index;///< convenience typedef
- #endif
- } // boundary
- } // locale
- } // boost
- ///
- /// \example boundary.cpp
- /// Example of using segment_index
- /// \example wboundary.cpp
- /// Example of using segment_index over wide strings
- ///
- #ifdef BOOST_MSVC
- #pragma warning(pop)
- #endif
- #endif
- // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|