genericvector.h 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221
  1. ///////////////////////////////////////////////////////////////////////
  2. // File: genericvector.h
  3. // Description: Generic vector class
  4. // Author: Daria Antonova
  5. //
  6. // (C) Copyright 2007, Google Inc.
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. //
  17. ///////////////////////////////////////////////////////////////////////
  18. //
  19. #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
  20. #define TESSERACT_CCUTIL_GENERICVECTOR_H_
  21. #include <algorithm>
  22. #include <cassert>
  23. #include <cstdio>
  24. #include <cstdlib>
  25. #include "helpers.h"
  26. #include "serialis.h"
  27. #include "strngs.h"
  28. #include "tesscallback.h"
  29. // Use PointerVector<T> below in preference to GenericVector<T*>, as that
  30. // provides automatic deletion of pointers, [De]Serialize that works, and
  31. // sort that works.
  32. template <typename T>
  33. class GenericVector {
  34. public:
  35. GenericVector() {
  36. init(kDefaultVectorSize);
  37. }
  38. GenericVector(int size, const T& init_val) {
  39. init(size);
  40. init_to_size(size, init_val);
  41. }
  42. // Copy
  43. GenericVector(const GenericVector& other) {
  44. this->init(other.size());
  45. this->operator+=(other);
  46. }
  47. GenericVector<T>& operator+=(const GenericVector& other);
  48. GenericVector<T>& operator=(const GenericVector& other);
  49. ~GenericVector();
  50. // Reserve some memory.
  51. void reserve(int size);
  52. // Double the size of the internal array.
  53. void double_the_size();
  54. // Resizes to size and sets all values to t.
  55. void init_to_size(int size, const T& t);
  56. // Resizes to size without any initialization.
  57. void resize_no_init(int size) {
  58. reserve(size);
  59. size_used_ = size;
  60. }
  61. // Return the size used.
  62. int size() const {
  63. return size_used_;
  64. }
  65. // Workaround to avoid g++ -Wsign-compare warnings.
  66. size_t unsigned_size() const {
  67. static_assert(sizeof(size_used_) <= sizeof(size_t),
  68. "Wow! sizeof(size_t) < sizeof(int32_t)!!");
  69. assert(0 <= size_used_);
  70. return static_cast<size_t>(size_used_);
  71. }
  72. int size_reserved() const {
  73. return size_reserved_;
  74. }
  75. int length() const {
  76. return size_used_;
  77. }
  78. // Return true if empty.
  79. bool empty() const {
  80. return size_used_ == 0;
  81. }
  82. // Return the object from an index.
  83. T& get(int index) const;
  84. T& back() const;
  85. T& operator[](int index) const;
  86. // Returns the last object and removes it.
  87. T pop_back();
  88. // Return the index of the T object.
  89. // This method NEEDS a compare_callback to be passed to
  90. // set_compare_callback.
  91. int get_index(const T& object) const;
  92. // Return true if T is in the array
  93. bool contains(const T& object) const;
  94. // Return true if the index is valid
  95. T contains_index(int index) const;
  96. // Push an element in the end of the array
  97. int push_back(T object);
  98. void operator+=(const T& t);
  99. // Push an element in the end of the array if the same
  100. // element is not already contained in the array.
  101. int push_back_new(const T& object);
  102. // Push an element in the front of the array
  103. // Note: This function is O(n)
  104. int push_front(const T& object);
  105. // Set the value at the given index
  106. void set(const T& t, int index);
  107. // Insert t at the given index, push other elements to the right.
  108. void insert(const T& t, int index);
  109. // Removes an element at the given index and
  110. // shifts the remaining elements to the left.
  111. void remove(int index);
  112. // Truncates the array to the given size by removing the end.
  113. // If the current size is less, the array is not expanded.
  114. void truncate(int size) {
  115. if (size < size_used_) {
  116. size_used_ = size;
  117. }
  118. }
  119. // Add a callback to be called to delete the elements when the array took
  120. // their ownership.
  121. void set_clear_callback(TessCallback1<T>* cb) {
  122. clear_cb_ = cb;
  123. }
  124. // Add a callback to be called to compare the elements when needed (contains,
  125. // get_id, ...)
  126. void set_compare_callback(TessResultCallback2<bool, T const&, T const&>* cb) {
  127. compare_cb_ = cb;
  128. }
  129. // Clear the array, calling the clear callback function if any.
  130. // All the owned callbacks are also deleted.
  131. // If you don't want the callbacks to be deleted, before calling clear, set
  132. // the callback to nullptr.
  133. void clear();
  134. // Delete objects pointed to by data_[i]
  135. void delete_data_pointers();
  136. // This method clears the current object, then, does a shallow copy of
  137. // its argument, and finally invalidates its argument.
  138. // Callbacks are moved to the current object;
  139. void move(GenericVector<T>* from);
  140. // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
  141. // The callback given must be permanent since they will be called more than
  142. // once. The given callback will be deleted at the end.
  143. // If the callbacks are nullptr, then the data is simply read/written using
  144. // fread (and swapping)/fwrite.
  145. // Returns false on error or if the callback returns false.
  146. // DEPRECATED. Use [De]Serialize[Classes] instead.
  147. bool write(FILE* f, TessResultCallback2<bool, FILE*, T const&>* cb) const;
  148. bool read(tesseract::TFile* f,
  149. TessResultCallback2<bool, tesseract::TFile*, T*>* cb);
  150. // Writes a vector of simple types to the given file. Assumes that bitwise
  151. // read/write of T will work. Returns false in case of error.
  152. // TODO(rays) Change all callers to use TFile and remove deprecated methods.
  153. bool Serialize(FILE* fp) const;
  154. bool Serialize(tesseract::TFile* fp) const;
  155. // Reads a vector of simple types from the given file. Assumes that bitwise
  156. // read/write will work with ReverseN according to sizeof(T).
  157. // Returns false in case of error.
  158. // If swap is true, assumes a big/little-endian swap is needed.
  159. // TFile is assumed to know about swapping.
  160. bool DeSerialize(bool swap, FILE* fp);
  161. bool DeSerialize(tesseract::TFile* fp);
  162. // Skips the deserialization of the vector.
  163. static bool SkipDeSerialize(tesseract::TFile* fp);
  164. // Writes a vector of classes to the given file. Assumes the existence of
  165. // bool T::Serialize(FILE* fp) const that returns false in case of error.
  166. // Returns false in case of error.
  167. bool SerializeClasses(FILE* fp) const;
  168. bool SerializeClasses(tesseract::TFile* fp) const;
  169. // Reads a vector of classes from the given file. Assumes the existence of
  170. // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
  171. // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
  172. // this function. Returns false in case of error.
  173. // If swap is true, assumes a big/little-endian swap is needed.
  174. bool DeSerializeClasses(bool swap, FILE* fp);
  175. bool DeSerializeClasses(tesseract::TFile* fp);
  176. // Calls SkipDeSerialize on the elements of the vector.
  177. static bool SkipDeSerializeClasses(tesseract::TFile* fp);
  178. // Allocates a new array of double the current_size, copies over the
  179. // information from data to the new location, deletes data and returns
  180. // the pointed to the new larger array.
  181. // This function uses memcpy to copy the data, instead of invoking
  182. // operator=() for each element like double_the_size() does.
  183. static T* double_the_size_memcpy(int current_size, T* data) {
  184. T* data_new = new T[current_size * 2];
  185. memcpy(data_new, data, sizeof(T) * current_size);
  186. delete[] data;
  187. return data_new;
  188. }
  189. // Reverses the elements of the vector.
  190. void reverse() {
  191. for (int i = 0; i < size_used_ / 2; ++i) {
  192. Swap(&data_[i], &data_[size_used_ - 1 - i]);
  193. }
  194. }
  195. // Sorts the members of this vector using the less than comparator (cmp_lt),
  196. // which compares the values. Useful for GenericVectors to primitive types.
  197. // Will not work so great for pointers (unless you just want to sort some
  198. // pointers). You need to provide a specialization to sort_cmp to use
  199. // your type.
  200. void sort();
  201. // Sort the array into the order defined by the qsort function comparator.
  202. // The comparator function is as defined by qsort, ie. it receives pointers
  203. // to two Ts and returns negative if the first element is to appear earlier
  204. // in the result and positive if it is to appear later, with 0 for equal.
  205. void sort(int (*comparator)(const void*, const void*)) {
  206. qsort(data_, size_used_, sizeof(*data_), comparator);
  207. }
  208. // Searches the array (assuming sorted in ascending order, using sort()) for
  209. // an element equal to target and returns true if it is present.
  210. // Use binary_search to get the index of target, or its nearest candidate.
  211. bool bool_binary_search(const T& target) const {
  212. int index = binary_search(target);
  213. if (index >= size_used_) {
  214. return false;
  215. }
  216. return data_[index] == target;
  217. }
  218. // Searches the array (assuming sorted in ascending order, using sort()) for
  219. // an element equal to target and returns the index of the best candidate.
  220. // The return value is conceptually the largest index i such that
  221. // data_[i] <= target or 0 if target < the whole vector.
  222. // NOTE that this function uses operator> so really the return value is
  223. // the largest index i such that data_[i] > target is false.
  224. int binary_search(const T& target) const {
  225. int bottom = 0;
  226. int top = size_used_;
  227. while (top - bottom > 1) {
  228. int middle = (bottom + top) / 2;
  229. if (data_[middle] > target) {
  230. top = middle;
  231. } else {
  232. bottom = middle;
  233. }
  234. }
  235. return bottom;
  236. }
  237. // Compact the vector by deleting elements using operator!= on basic types.
  238. // The vector must be sorted.
  239. void compact_sorted() {
  240. if (size_used_ == 0) {
  241. return;
  242. }
  243. // First element is in no matter what, hence the i = 1.
  244. int last_write = 0;
  245. for (int i = 1; i < size_used_; ++i) {
  246. // Finds next unique item and writes it.
  247. if (data_[last_write] != data_[i]) {
  248. data_[++last_write] = data_[i];
  249. }
  250. }
  251. // last_write is the index of a valid data cell, so add 1.
  252. size_used_ = last_write + 1;
  253. }
  254. // Compact the vector by deleting elements for which delete_cb returns
  255. // true. delete_cb is a permanent callback and will be deleted.
  256. void compact(TessResultCallback1<bool, int>* delete_cb) {
  257. int new_size = 0;
  258. int old_index = 0;
  259. // Until the callback returns true, the elements stay the same.
  260. while (old_index < size_used_ && !delete_cb->Run(old_index++)) {
  261. ++new_size;
  262. }
  263. // Now just copy anything else that gets false from delete_cb.
  264. for (; old_index < size_used_; ++old_index) {
  265. if (!delete_cb->Run(old_index)) {
  266. data_[new_size++] = data_[old_index];
  267. }
  268. }
  269. size_used_ = new_size;
  270. delete delete_cb;
  271. }
  272. T dot_product(const GenericVector<T>& other) const {
  273. T result = static_cast<T>(0);
  274. for (int i = std::min(size_used_, other.size_used_) - 1; i >= 0; --i) {
  275. result += data_[i] * other.data_[i];
  276. }
  277. return result;
  278. }
  279. // Returns the index of what would be the target_index_th item in the array
  280. // if the members were sorted, without actually sorting. Members are
  281. // shuffled around, but it takes O(n) time.
  282. // NOTE: uses operator< and operator== on the members.
  283. int choose_nth_item(int target_index) {
  284. // Make sure target_index is legal.
  285. if (target_index < 0) {
  286. target_index = 0; // ensure legal
  287. } else if (target_index >= size_used_) {
  288. target_index = size_used_ - 1;
  289. }
  290. unsigned int seed = 1;
  291. return choose_nth_item(target_index, 0, size_used_, &seed);
  292. }
  293. // Swaps the elements with the given indices.
  294. void swap(int index1, int index2) {
  295. if (index1 != index2) {
  296. T tmp = data_[index1];
  297. data_[index1] = data_[index2];
  298. data_[index2] = tmp;
  299. }
  300. }
  301. // Returns true if all elements of *this are within the given range.
  302. // Only uses operator<
  303. bool WithinBounds(const T& rangemin, const T& rangemax) const {
  304. for (int i = 0; i < size_used_; ++i) {
  305. if (data_[i] < rangemin || rangemax < data_[i]) {
  306. return false;
  307. }
  308. }
  309. return true;
  310. }
  311. protected:
  312. // Internal recursive version of choose_nth_item.
  313. int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
  314. // Init the object, allocating size memory.
  315. void init(int size);
  316. // We are assuming that the object generally placed in the
  317. // vector are small enough that for efficiency it makes sense
  318. // to start with a larger initial size.
  319. static const int kDefaultVectorSize = 4;
  320. int32_t size_used_{};
  321. int32_t size_reserved_{};
  322. T* data_;
  323. TessCallback1<T>* clear_cb_;
  324. // Mutable because Run method is not const
  325. mutable TessResultCallback2<bool, T const&, T const&>* compare_cb_;
  326. };
  327. namespace tesseract {
  328. // The default FileReader loads the whole file into the vector of char,
  329. // returning false on error.
  330. inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
  331. bool result = false;
  332. FILE* fp = fopen(filename, "rb");
  333. if (fp != nullptr) {
  334. fseek(fp, 0, SEEK_END);
  335. long size = ftell(fp);
  336. fseek(fp, 0, SEEK_SET);
  337. // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
  338. if (size > 0 && size < LONG_MAX) {
  339. // reserve an extra byte in case caller wants to append a '\0' character
  340. data->reserve(size + 1);
  341. data->resize_no_init(size);
  342. result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
  343. }
  344. fclose(fp);
  345. }
  346. return result;
  347. }
  348. inline bool LoadDataFromFile(const STRING& filename,
  349. GenericVector<char>* data) {
  350. return LoadDataFromFile(filename.string(), data);
  351. }
  352. // The default FileWriter writes the vector of char to the filename file,
  353. // returning false on error.
  354. inline bool SaveDataToFile(const GenericVector<char>& data,
  355. const STRING& filename) {
  356. FILE* fp = fopen(filename.string(), "wb");
  357. if (fp == nullptr) {
  358. return false;
  359. }
  360. bool result =
  361. static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
  362. fclose(fp);
  363. return result;
  364. }
  365. // Reads a file as a vector of STRING.
  366. inline bool LoadFileLinesToStrings(const STRING& filename,
  367. GenericVector<STRING>* lines) {
  368. GenericVector<char> data;
  369. if (!LoadDataFromFile(filename.string(), &data)) {
  370. return false;
  371. }
  372. STRING lines_str(&data[0], data.size());
  373. lines_str.split('\n', lines);
  374. return true;
  375. }
  376. template <typename T>
  377. bool cmp_eq(T const& t1, T const& t2) {
  378. return t1 == t2;
  379. }
  380. // Used by sort()
  381. // return < 0 if t1 < t2
  382. // return 0 if t1 == t2
  383. // return > 0 if t1 > t2
  384. template <typename T>
  385. int sort_cmp(const void* t1, const void* t2) {
  386. const T* a = static_cast<const T*>(t1);
  387. const T* b = static_cast<const T*>(t2);
  388. if (*a < *b) {
  389. return -1;
  390. }
  391. if (*b < *a) {
  392. return 1;
  393. }
  394. return 0;
  395. }
  396. // Used by PointerVector::sort()
  397. // return < 0 if t1 < t2
  398. // return 0 if t1 == t2
  399. // return > 0 if t1 > t2
  400. template <typename T>
  401. int sort_ptr_cmp(const void* t1, const void* t2) {
  402. const T* a = *static_cast<T* const*>(t1);
  403. const T* b = *static_cast<T* const*>(t2);
  404. if (*a < *b) {
  405. return -1;
  406. }
  407. if (*b < *a) {
  408. return 1;
  409. }
  410. return 0;
  411. }
  412. // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
  413. // as it provides automatic deletion and correct serialization, with the
  414. // corollary that all copy operations are deep copies of the pointed-to objects.
  415. template <typename T>
  416. class PointerVector : public GenericVector<T*> {
  417. public:
  418. PointerVector() : GenericVector<T*>() {}
  419. explicit PointerVector(int size) : GenericVector<T*>(size) {}
  420. ~PointerVector() {
  421. // Clear must be called here, even though it is called again by the base,
  422. // as the base will call the wrong clear.
  423. clear();
  424. }
  425. // Copy must be deep, as the pointers will be automatically deleted on
  426. // destruction.
  427. PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
  428. this->init(other.size());
  429. this->operator+=(other);
  430. }
  431. PointerVector<T>& operator+=(const PointerVector& other) {
  432. this->reserve(this->size_used_ + other.size_used_);
  433. for (int i = 0; i < other.size(); ++i) {
  434. this->push_back(new T(*other.data_[i]));
  435. }
  436. return *this;
  437. }
  438. PointerVector<T>& operator=(const PointerVector& other) {
  439. if (&other != this) {
  440. this->truncate(0);
  441. this->operator+=(other);
  442. }
  443. return *this;
  444. }
  445. // Removes an element at the given index and
  446. // shifts the remaining elements to the left.
  447. void remove(int index) {
  448. delete GenericVector<T*>::data_[index];
  449. GenericVector<T*>::remove(index);
  450. }
  451. // Truncates the array to the given size by removing the end.
  452. // If the current size is less, the array is not expanded.
  453. void truncate(int size) {
  454. for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
  455. delete GenericVector<T*>::data_[i];
  456. }
  457. GenericVector<T*>::truncate(size);
  458. }
  459. // Compact the vector by deleting elements for which delete_cb returns
  460. // true. delete_cb is a permanent callback and will be deleted.
  461. void compact(TessResultCallback1<bool, const T*>* delete_cb) {
  462. int new_size = 0;
  463. int old_index = 0;
  464. // Until the callback returns true, the elements stay the same.
  465. while (old_index < GenericVector<T*>::size_used_ &&
  466. !delete_cb->Run(GenericVector<T*>::data_[old_index++])) {
  467. ++new_size;
  468. }
  469. // Now just copy anything else that gets false from delete_cb.
  470. for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
  471. if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
  472. GenericVector<T*>::data_[new_size++] =
  473. GenericVector<T*>::data_[old_index];
  474. } else {
  475. delete GenericVector<T*>::data_[old_index];
  476. }
  477. }
  478. GenericVector<T*>::size_used_ = new_size;
  479. delete delete_cb;
  480. }
  481. // Clear the array, calling the clear callback function if any.
  482. // All the owned callbacks are also deleted.
  483. // If you don't want the callbacks to be deleted, before calling clear, set
  484. // the callback to nullptr.
  485. void clear() {
  486. GenericVector<T*>::delete_data_pointers();
  487. GenericVector<T*>::clear();
  488. }
  489. // Writes a vector of (pointers to) classes to the given file. Assumes the
  490. // existence of bool T::Serialize(FILE*) const that returns false in case of
  491. // error. There is no Serialize for simple types, as you would have a
  492. // normal GenericVector of those.
  493. // Returns false in case of error.
  494. bool Serialize(FILE* fp) const {
  495. int32_t used = GenericVector<T*>::size_used_;
  496. if (fwrite(&used, sizeof(used), 1, fp) != 1) {
  497. return false;
  498. }
  499. for (int i = 0; i < used; ++i) {
  500. int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
  501. if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
  502. return false;
  503. }
  504. if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
  505. return false;
  506. }
  507. }
  508. return true;
  509. }
  510. bool Serialize(TFile* fp) const {
  511. int32_t used = GenericVector<T*>::size_used_;
  512. if (fp->FWrite(&used, sizeof(used), 1) != 1) {
  513. return false;
  514. }
  515. for (int i = 0; i < used; ++i) {
  516. int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
  517. if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
  518. return false;
  519. }
  520. if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
  521. return false;
  522. }
  523. }
  524. return true;
  525. }
  526. // Reads a vector of (pointers to) classes to the given file. Assumes the
  527. // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
  528. // case of error. There is no Serialize for simple types, as you would have a
  529. // normal GenericVector of those.
  530. // If swap is true, assumes a big/little-endian swap is needed.
  531. // Also needs T::T(), as new T is used in this function.
  532. // Returns false in case of error.
  533. bool DeSerialize(bool swap, FILE* fp) {
  534. uint32_t reserved;
  535. if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
  536. return false;
  537. }
  538. if (swap) {
  539. Reverse32(&reserved);
  540. }
  541. // Arbitrarily limit the number of elements to protect against bad data.
  542. assert(reserved <= UINT16_MAX);
  543. if (reserved > UINT16_MAX) {
  544. return false;
  545. }
  546. GenericVector<T*>::reserve(reserved);
  547. truncate(0);
  548. for (uint32_t i = 0; i < reserved; ++i) {
  549. int8_t non_null;
  550. if (fread(&non_null, sizeof(non_null), 1, fp) != 1) {
  551. return false;
  552. }
  553. T* item = nullptr;
  554. if (non_null != 0) {
  555. item = new T;
  556. if (!item->DeSerialize(swap, fp)) {
  557. delete item;
  558. return false;
  559. }
  560. this->push_back(item);
  561. } else {
  562. // Null elements should keep their place in the vector.
  563. this->push_back(nullptr);
  564. }
  565. }
  566. return true;
  567. }
  568. bool DeSerialize(TFile* fp) {
  569. int32_t reserved;
  570. if (!DeSerializeSize(fp, &reserved)) {
  571. return false;
  572. }
  573. GenericVector<T*>::reserve(reserved);
  574. truncate(0);
  575. for (int i = 0; i < reserved; ++i) {
  576. if (!DeSerializeElement(fp)) {
  577. return false;
  578. }
  579. }
  580. return true;
  581. }
  582. // Enables deserialization of a selection of elements. Note that in order to
  583. // retain the integrity of the stream, the caller must call some combination
  584. // of DeSerializeElement and DeSerializeSkip of the exact number returned in
  585. // *size, assuming a true return.
  586. static bool DeSerializeSize(TFile* fp, int32_t* size) {
  587. return fp->FReadEndian(size, sizeof(*size), 1) == 1;
  588. }
  589. // Reads and appends to the vector the next element of the serialization.
  590. bool DeSerializeElement(TFile* fp) {
  591. int8_t non_null;
  592. if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
  593. return false;
  594. }
  595. T* item = nullptr;
  596. if (non_null != 0) {
  597. item = new T;
  598. if (!item->DeSerialize(fp)) {
  599. delete item;
  600. return false;
  601. }
  602. this->push_back(item);
  603. } else {
  604. // Null elements should keep their place in the vector.
  605. this->push_back(nullptr);
  606. }
  607. return true;
  608. }
  609. // Skips the next element of the serialization.
  610. static bool DeSerializeSkip(TFile* fp) {
  611. int8_t non_null;
  612. if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
  613. return false;
  614. }
  615. if (non_null != 0) {
  616. if (!T::SkipDeSerialize(fp)) {
  617. return false;
  618. }
  619. }
  620. return true;
  621. }
  622. // Sorts the items pointed to by the members of this vector using
  623. // t::operator<().
  624. void sort() {
  625. this->GenericVector<T*>::sort(&sort_ptr_cmp<T>);
  626. }
  627. };
  628. } // namespace tesseract
  629. // A useful vector that uses operator== to do comparisons.
  630. template <typename T>
  631. class GenericVectorEqEq : public GenericVector<T> {
  632. public:
  633. GenericVectorEqEq() {
  634. GenericVector<T>::set_compare_callback(
  635. NewPermanentTessCallback(tesseract::cmp_eq<T>));
  636. }
  637. explicit GenericVectorEqEq(int size) : GenericVector<T>(size) {
  638. GenericVector<T>::set_compare_callback(
  639. NewPermanentTessCallback(tesseract::cmp_eq<T>));
  640. }
  641. };
  642. template <typename T>
  643. void GenericVector<T>::init(int size) {
  644. size_used_ = 0;
  645. if (size <= 0) {
  646. data_ = nullptr;
  647. size_reserved_ = 0;
  648. } else {
  649. if (size < kDefaultVectorSize) {
  650. size = kDefaultVectorSize;
  651. }
  652. data_ = new T[size];
  653. size_reserved_ = size;
  654. }
  655. clear_cb_ = nullptr;
  656. compare_cb_ = nullptr;
  657. }
  658. template <typename T>
  659. GenericVector<T>::~GenericVector() {
  660. clear();
  661. }
  662. // Reserve some memory. If the internal array contains elements, they are
  663. // copied.
  664. template <typename T>
  665. void GenericVector<T>::reserve(int size) {
  666. if (size_reserved_ >= size || size <= 0) {
  667. return;
  668. }
  669. if (size < kDefaultVectorSize) {
  670. size = kDefaultVectorSize;
  671. }
  672. T* new_array = new T[size];
  673. for (int i = 0; i < size_used_; ++i) {
  674. new_array[i] = data_[i];
  675. }
  676. delete[] data_;
  677. data_ = new_array;
  678. size_reserved_ = size;
  679. }
  680. template <typename T>
  681. void GenericVector<T>::double_the_size() {
  682. if (size_reserved_ == 0) {
  683. reserve(kDefaultVectorSize);
  684. } else {
  685. reserve(2 * size_reserved_);
  686. }
  687. }
  688. // Resizes to size and sets all values to t.
  689. template <typename T>
  690. void GenericVector<T>::init_to_size(int size, const T& t) {
  691. reserve(size);
  692. size_used_ = size;
  693. for (int i = 0; i < size; ++i) {
  694. data_[i] = t;
  695. }
  696. }
  697. // Return the object from an index.
  698. template <typename T>
  699. T& GenericVector<T>::get(int index) const {
  700. assert(index >= 0 && index < size_used_);
  701. return data_[index];
  702. }
  703. template <typename T>
  704. T& GenericVector<T>::operator[](int index) const {
  705. assert(index >= 0 && index < size_used_);
  706. return data_[index];
  707. }
  708. template <typename T>
  709. T& GenericVector<T>::back() const {
  710. assert(size_used_ > 0);
  711. return data_[size_used_ - 1];
  712. }
  713. // Returns the last object and removes it.
  714. template <typename T>
  715. T GenericVector<T>::pop_back() {
  716. assert(size_used_ > 0);
  717. return data_[--size_used_];
  718. }
  719. // Return the object from an index.
  720. template <typename T>
  721. void GenericVector<T>::set(const T& t, int index) {
  722. assert(index >= 0 && index < size_used_);
  723. data_[index] = t;
  724. }
  725. // Shifts the rest of the elements to the right to make
  726. // space for the new elements and inserts the given element
  727. // at the specified index.
  728. template <typename T>
  729. void GenericVector<T>::insert(const T& t, int index) {
  730. assert(index >= 0 && index <= size_used_);
  731. if (size_reserved_ == size_used_) {
  732. double_the_size();
  733. }
  734. for (int i = size_used_; i > index; --i) {
  735. data_[i] = data_[i - 1];
  736. }
  737. data_[index] = t;
  738. size_used_++;
  739. }
  740. // Removes an element at the given index and
  741. // shifts the remaining elements to the left.
  742. template <typename T>
  743. void GenericVector<T>::remove(int index) {
  744. assert(index >= 0 && index < size_used_);
  745. for (int i = index; i < size_used_ - 1; ++i) {
  746. data_[i] = data_[i + 1];
  747. }
  748. size_used_--;
  749. }
  750. // Return true if the index is valindex
  751. template <typename T>
  752. T GenericVector<T>::contains_index(int index) const {
  753. return index >= 0 && index < size_used_;
  754. }
  755. // Return the index of the T object.
  756. template <typename T>
  757. int GenericVector<T>::get_index(const T& object) const {
  758. for (int i = 0; i < size_used_; ++i) {
  759. assert(compare_cb_ != nullptr);
  760. if (compare_cb_->Run(object, data_[i])) {
  761. return i;
  762. }
  763. }
  764. return -1;
  765. }
  766. // Return true if T is in the array
  767. template <typename T>
  768. bool GenericVector<T>::contains(const T& object) const {
  769. return get_index(object) != -1;
  770. }
  771. // Add an element in the array
  772. template <typename T>
  773. int GenericVector<T>::push_back(T object) {
  774. int index = 0;
  775. if (size_used_ == size_reserved_) {
  776. double_the_size();
  777. }
  778. index = size_used_++;
  779. data_[index] = object;
  780. return index;
  781. }
  782. template <typename T>
  783. int GenericVector<T>::push_back_new(const T& object) {
  784. int index = get_index(object);
  785. if (index >= 0) {
  786. return index;
  787. }
  788. return push_back(object);
  789. }
  790. // Add an element in the array (front)
  791. template <typename T>
  792. int GenericVector<T>::push_front(const T& object) {
  793. if (size_used_ == size_reserved_) {
  794. double_the_size();
  795. }
  796. for (int i = size_used_; i > 0; --i) {
  797. data_[i] = data_[i - 1];
  798. }
  799. data_[0] = object;
  800. ++size_used_;
  801. return 0;
  802. }
  803. template <typename T>
  804. void GenericVector<T>::operator+=(const T& t) {
  805. push_back(t);
  806. }
  807. template <typename T>
  808. GenericVector<T>& GenericVector<T>::operator+=(const GenericVector& other) {
  809. this->reserve(size_used_ + other.size_used_);
  810. for (int i = 0; i < other.size(); ++i) {
  811. this->operator+=(other.data_[i]);
  812. }
  813. return *this;
  814. }
  815. template <typename T>
  816. GenericVector<T>& GenericVector<T>::operator=(const GenericVector& other) {
  817. if (&other != this) {
  818. this->truncate(0);
  819. this->operator+=(other);
  820. }
  821. return *this;
  822. }
  823. // Clear the array, calling the callback function if any.
  824. template <typename T>
  825. void GenericVector<T>::clear() {
  826. if (size_reserved_ > 0 && clear_cb_ != nullptr) {
  827. for (int i = 0; i < size_used_; ++i) {
  828. clear_cb_->Run(data_[i]);
  829. }
  830. }
  831. delete[] data_;
  832. data_ = nullptr;
  833. size_used_ = 0;
  834. size_reserved_ = 0;
  835. delete clear_cb_;
  836. clear_cb_ = nullptr;
  837. delete compare_cb_;
  838. compare_cb_ = nullptr;
  839. }
  840. template <typename T>
  841. void GenericVector<T>::delete_data_pointers() {
  842. for (int i = 0; i < size_used_; ++i) {
  843. delete data_[i];
  844. }
  845. }
  846. template <typename T>
  847. bool GenericVector<T>::write(
  848. FILE* f, TessResultCallback2<bool, FILE*, T const&>* cb) const {
  849. if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
  850. return false;
  851. }
  852. if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
  853. return false;
  854. }
  855. if (cb != nullptr) {
  856. for (int i = 0; i < size_used_; ++i) {
  857. if (!cb->Run(f, data_[i])) {
  858. delete cb;
  859. return false;
  860. }
  861. }
  862. delete cb;
  863. } else {
  864. if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
  865. return false;
  866. }
  867. }
  868. return true;
  869. }
  870. template <typename T>
  871. bool GenericVector<T>::read(
  872. tesseract::TFile* f, TessResultCallback2<bool, tesseract::TFile*, T*>* cb) {
  873. int32_t reserved;
  874. if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
  875. return false;
  876. }
  877. reserve(reserved);
  878. if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
  879. return false;
  880. }
  881. if (cb != nullptr) {
  882. for (int i = 0; i < size_used_; ++i) {
  883. if (!cb->Run(f, data_ + i)) {
  884. delete cb;
  885. return false;
  886. }
  887. }
  888. delete cb;
  889. } else {
  890. if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
  891. return false;
  892. }
  893. }
  894. return true;
  895. }
  896. // Writes a vector of simple types to the given file. Assumes that bitwise
  897. // read/write of T will work. Returns false in case of error.
  898. template <typename T>
  899. bool GenericVector<T>::Serialize(FILE* fp) const {
  900. if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
  901. return false;
  902. }
  903. if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
  904. return false;
  905. }
  906. return true;
  907. }
  908. template <typename T>
  909. bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
  910. if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
  911. return false;
  912. }
  913. if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
  914. return false;
  915. }
  916. return true;
  917. }
  918. // Reads a vector of simple types from the given file. Assumes that bitwise
  919. // read/write will work with ReverseN according to sizeof(T).
  920. // Returns false in case of error.
  921. // If swap is true, assumes a big/little-endian swap is needed.
  922. template <typename T>
  923. bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
  924. uint32_t reserved;
  925. if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
  926. return false;
  927. }
  928. if (swap) {
  929. Reverse32(&reserved);
  930. }
  931. // Arbitrarily limit the number of elements to protect against bad data.
  932. assert(reserved <= UINT16_MAX);
  933. if (reserved > UINT16_MAX) {
  934. return false;
  935. }
  936. reserve(reserved);
  937. size_used_ = reserved;
  938. if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
  939. return false;
  940. }
  941. if (swap) {
  942. for (int i = 0; i < size_used_; ++i) {
  943. ReverseN(&data_[i], sizeof(data_[i]));
  944. }
  945. }
  946. return true;
  947. }
  948. template <typename T>
  949. bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
  950. uint32_t reserved;
  951. if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
  952. return false;
  953. }
  954. // Arbitrarily limit the number of elements to protect against bad data.
  955. const uint32_t limit = 50000000;
  956. assert(reserved <= limit);
  957. if (reserved > limit) {
  958. return false;
  959. }
  960. reserve(reserved);
  961. size_used_ = reserved;
  962. return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
  963. }
  964. template <typename T>
  965. bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) {
  966. uint32_t reserved;
  967. if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
  968. return false;
  969. }
  970. return (uint32_t)fp->FRead(nullptr, sizeof(T), reserved) == reserved;
  971. }
  972. // Writes a vector of classes to the given file. Assumes the existence of
  973. // bool T::Serialize(FILE* fp) const that returns false in case of error.
  974. // Returns false in case of error.
  975. template <typename T>
  976. bool GenericVector<T>::SerializeClasses(FILE* fp) const {
  977. if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
  978. return false;
  979. }
  980. for (int i = 0; i < size_used_; ++i) {
  981. if (!data_[i].Serialize(fp)) {
  982. return false;
  983. }
  984. }
  985. return true;
  986. }
  987. template <typename T>
  988. bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const {
  989. if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
  990. return false;
  991. }
  992. for (int i = 0; i < size_used_; ++i) {
  993. if (!data_[i].Serialize(fp)) {
  994. return false;
  995. }
  996. }
  997. return true;
  998. }
  999. // Reads a vector of classes from the given file. Assumes the existence of
  1000. // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
  1001. // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
  1002. // this function. Returns false in case of error.
  1003. // If swap is true, assumes a big/little-endian swap is needed.
  1004. template <typename T>
  1005. bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
  1006. int32_t reserved;
  1007. if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
  1008. return false;
  1009. }
  1010. if (swap) {
  1011. Reverse32(&reserved);
  1012. }
  1013. T empty;
  1014. init_to_size(reserved, empty);
  1015. for (int i = 0; i < reserved; ++i) {
  1016. if (!data_[i].DeSerialize(swap, fp)) {
  1017. return false;
  1018. }
  1019. }
  1020. return true;
  1021. }
  1022. template <typename T>
  1023. bool GenericVector<T>::DeSerializeClasses(tesseract::TFile* fp) {
  1024. int32_t reserved;
  1025. if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
  1026. return false;
  1027. }
  1028. T empty;
  1029. init_to_size(reserved, empty);
  1030. for (int i = 0; i < reserved; ++i) {
  1031. if (!data_[i].DeSerialize(fp)) {
  1032. return false;
  1033. }
  1034. }
  1035. return true;
  1036. }
  1037. template <typename T>
  1038. bool GenericVector<T>::SkipDeSerializeClasses(tesseract::TFile* fp) {
  1039. int32_t reserved;
  1040. if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
  1041. return false;
  1042. }
  1043. for (int i = 0; i < reserved; ++i) {
  1044. if (!T::SkipDeSerialize(fp)) {
  1045. return false;
  1046. }
  1047. }
  1048. return true;
  1049. }
  1050. // This method clear the current object, then, does a shallow copy of
  1051. // its argument, and finally invalidates its argument.
  1052. template <typename T>
  1053. void GenericVector<T>::move(GenericVector<T>* from) {
  1054. this->clear();
  1055. this->data_ = from->data_;
  1056. this->size_reserved_ = from->size_reserved_;
  1057. this->size_used_ = from->size_used_;
  1058. this->compare_cb_ = from->compare_cb_;
  1059. this->clear_cb_ = from->clear_cb_;
  1060. from->data_ = nullptr;
  1061. from->clear_cb_ = nullptr;
  1062. from->compare_cb_ = nullptr;
  1063. from->size_used_ = 0;
  1064. from->size_reserved_ = 0;
  1065. }
  1066. template <typename T>
  1067. void GenericVector<T>::sort() {
  1068. sort(&tesseract::sort_cmp<T>);
  1069. }
  1070. // Internal recursive version of choose_nth_item.
  1071. // The algorithm used comes from "Algorithms" by Sedgewick:
  1072. // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
  1073. // The principle is to choose a random pivot, and move everything less than
  1074. // the pivot to its left, and everything greater than the pivot to the end
  1075. // of the array, then recurse on the part that contains the desired index, or
  1076. // just return the answer if it is in the equal section in the middle.
  1077. // The random pivot guarantees average linear time for the same reason that
  1078. // n times vector::push_back takes linear time on average.
  1079. // target_index, start and and end are all indices into the full array.
  1080. // Seed is a seed for rand_r for thread safety purposes. Its value is
  1081. // unimportant as the random numbers do not affect the result except
  1082. // between equal answers.
  1083. template <typename T>
  1084. int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
  1085. unsigned int* seed) {
  1086. // Number of elements to process.
  1087. int num_elements = end - start;
  1088. // Trivial cases.
  1089. if (num_elements <= 1) {
  1090. return start;
  1091. }
  1092. if (num_elements == 2) {
  1093. if (data_[start] < data_[start + 1]) {
  1094. return target_index > start ? start + 1 : start;
  1095. }
  1096. return target_index > start ? start : start + 1;
  1097. }
  1098. // Place the pivot at start.
  1099. #ifndef rand_r // _MSC_VER, ANDROID
  1100. srand(*seed);
  1101. # define rand_r(seed) rand()
  1102. #endif // _MSC_VER
  1103. int pivot = rand_r(seed) % num_elements + start;
  1104. swap(pivot, start);
  1105. // The invariant condition here is that items [start, next_lesser) are less
  1106. // than the pivot (which is at index next_lesser) and items
  1107. // [prev_greater, end) are greater than the pivot, with items
  1108. // [next_lesser, prev_greater) being equal to the pivot.
  1109. int next_lesser = start;
  1110. int prev_greater = end;
  1111. for (int next_sample = start + 1; next_sample < prev_greater;) {
  1112. if (data_[next_sample] < data_[next_lesser]) {
  1113. swap(next_lesser++, next_sample++);
  1114. } else if (data_[next_sample] == data_[next_lesser]) {
  1115. ++next_sample;
  1116. } else {
  1117. swap(--prev_greater, next_sample);
  1118. }
  1119. }
  1120. // Now the invariant is set up, we recurse on just the section that contains
  1121. // the desired index.
  1122. if (target_index < next_lesser) {
  1123. return choose_nth_item(target_index, start, next_lesser, seed);
  1124. }
  1125. if (target_index < prev_greater) {
  1126. return next_lesser; // In equal bracket.
  1127. }
  1128. return choose_nth_item(target_index, prev_greater, end, seed);
  1129. }
  1130. #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_