renderer.h 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. ///////////////////////////////////////////////////////////////////////
  2. // File: renderer.h
  3. // Description: Rendering interface to inject into TessBaseAPI
  4. //
  5. // (C) Copyright 2011, Google Inc.
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. //
  16. ///////////////////////////////////////////////////////////////////////
  17. #ifndef TESSERACT_API_RENDERER_H_
  18. #define TESSERACT_API_RENDERER_H_
  19. // To avoid collision with other typenames include the ABSOLUTE MINIMUM
  20. // complexity of includes here. Use forward declarations wherever possible
  21. // and hide includes of complex types in baseapi.cpp.
  22. #include <string> // for std::string
  23. #include "genericvector.h"
  24. #include "platform.h"
  25. struct Pix;
  26. namespace tesseract {
  27. class TessBaseAPI;
  28. /**
  29. * Interface for rendering tesseract results into a document, such as text,
  30. * HOCR or pdf. This class is abstract. Specific classes handle individual
  31. * formats. This interface is then used to inject the renderer class into
  32. * tesseract when processing images.
  33. *
  34. * For simplicity implementing this with tesesract version 3.01,
  35. * the renderer contains document state that is cleared from document
  36. * to document just as the TessBaseAPI is. This way the base API can just
  37. * delegate its rendering functionality to injected renderers, and the
  38. * renderers can manage the associated state needed for the specific formats
  39. * in addition to the heuristics for producing it.
  40. */
  41. class TESS_API TessResultRenderer {
  42. public:
  43. virtual ~TessResultRenderer();
  44. // Takes ownership of pointer so must be new'd instance.
  45. // Renderers aren't ordered, but appends the sequences of next parameter
  46. // and existing next(). The renderers should be unique across both lists.
  47. void insert(TessResultRenderer* next);
  48. // Returns the next renderer or nullptr.
  49. TessResultRenderer* next() {
  50. return next_;
  51. }
  52. /**
  53. * Starts a new document with the given title.
  54. * This clears the contents of the output data.
  55. * Title should use UTF-8 encoding.
  56. */
  57. bool BeginDocument(const char* title);
  58. /**
  59. * Adds the recognized text from the source image to the current document.
  60. * Invalid if BeginDocument not yet called.
  61. *
  62. * Note that this API is a bit weird but is designed to fit into the
  63. * current TessBaseAPI implementation where the api has lots of state
  64. * information that we might want to add in.
  65. */
  66. bool AddImage(TessBaseAPI* api);
  67. /**
  68. * Finishes the document and finalizes the output data
  69. * Invalid if BeginDocument not yet called.
  70. */
  71. bool EndDocument();
  72. const char* file_extension() const {
  73. return file_extension_;
  74. }
  75. const char* title() const {
  76. return title_.c_str();
  77. }
  78. // Is everything fine? Otherwise something went wrong.
  79. bool happy() {
  80. return happy_;
  81. }
  82. /**
  83. * Returns the index of the last image given to AddImage
  84. * (i.e. images are incremented whether the image succeeded or not)
  85. *
  86. * This is always defined. It means either the number of the
  87. * current image, the last image ended, or in the completed document
  88. * depending on when in the document lifecycle you are looking at it.
  89. * Will return -1 if a document was never started.
  90. */
  91. int imagenum() const {
  92. return imagenum_;
  93. }
  94. protected:
  95. /**
  96. * Called by concrete classes.
  97. *
  98. * outputbase is the name of the output file excluding
  99. * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
  100. *
  101. * extension indicates the file extension to be used for output
  102. * files. For example "pdf" will produce a .pdf file, and "hocr"
  103. * will produce .hocr files.
  104. */
  105. TessResultRenderer(const char* outputbase, const char* extension);
  106. // Hook for specialized handling in BeginDocument()
  107. virtual bool BeginDocumentHandler();
  108. // This must be overridden to render the OCR'd results
  109. virtual bool AddImageHandler(TessBaseAPI* api) = 0;
  110. // Hook for specialized handling in EndDocument()
  111. virtual bool EndDocumentHandler();
  112. // Renderers can call this to append '\0' terminated strings into
  113. // the output string returned by GetOutput.
  114. // This method will grow the output buffer if needed.
  115. void AppendString(const char* s);
  116. // Renderers can call this to append binary byte sequences into
  117. // the output string returned by GetOutput. Note that s is not necessarily
  118. // '\0' terminated (and can contain '\0' within it).
  119. // This method will grow the output buffer if needed.
  120. void AppendData(const char* s, int len);
  121. private:
  122. const char* file_extension_; // standard extension for generated output
  123. STRING title_; // title of document being renderered
  124. int imagenum_; // index of last image added
  125. FILE* fout_; // output file pointer
  126. TessResultRenderer* next_; // Can link multiple renderers together
  127. bool happy_; // I get grumpy when the disk fills up, etc.
  128. };
  129. /**
  130. * Renders tesseract output into a plain UTF-8 text string
  131. */
  132. class TESS_API TessTextRenderer : public TessResultRenderer {
  133. public:
  134. explicit TessTextRenderer(const char* outputbase);
  135. protected:
  136. bool AddImageHandler(TessBaseAPI* api) override;
  137. };
  138. /**
  139. * Renders tesseract output into an hocr text string
  140. */
  141. class TESS_API TessHOcrRenderer : public TessResultRenderer {
  142. public:
  143. explicit TessHOcrRenderer(const char* outputbase, bool font_info);
  144. explicit TessHOcrRenderer(const char* outputbase);
  145. protected:
  146. bool BeginDocumentHandler() override;
  147. bool AddImageHandler(TessBaseAPI* api) override;
  148. bool EndDocumentHandler() override;
  149. private:
  150. bool font_info_; // whether to print font information
  151. };
  152. /**
  153. * Renders tesseract output into an alto text string
  154. */
  155. class TESS_API TessAltoRenderer : public TessResultRenderer {
  156. public:
  157. explicit TessAltoRenderer(const char* outputbase);
  158. protected:
  159. bool BeginDocumentHandler() override;
  160. bool AddImageHandler(TessBaseAPI* api) override;
  161. bool EndDocumentHandler() override;
  162. };
  163. /**
  164. * Renders Tesseract output into a TSV string
  165. */
  166. class TESS_API TessTsvRenderer : public TessResultRenderer {
  167. public:
  168. explicit TessTsvRenderer(const char* outputbase, bool font_info);
  169. explicit TessTsvRenderer(const char* outputbase);
  170. protected:
  171. bool BeginDocumentHandler() override;
  172. bool AddImageHandler(TessBaseAPI* api) override;
  173. bool EndDocumentHandler() override;
  174. private:
  175. bool font_info_; // whether to print font information
  176. };
  177. /**
  178. * Renders tesseract output into searchable PDF
  179. */
  180. class TESS_API TessPDFRenderer : public TessResultRenderer {
  181. public:
  182. // datadir is the location of the TESSDATA. We need it because
  183. // we load a custom PDF font from this location.
  184. TessPDFRenderer(const char* outputbase, const char* datadir,
  185. bool textonly = false);
  186. protected:
  187. bool BeginDocumentHandler() override;
  188. bool AddImageHandler(TessBaseAPI* api) override;
  189. bool EndDocumentHandler() override;
  190. private:
  191. // We don't want to have every image in memory at once,
  192. // so we store some metadata as we go along producing
  193. // PDFs one page at a time. At the end, that metadata is
  194. // used to make everything that isn't easily handled in a
  195. // streaming fashion.
  196. long int obj_; // counter for PDF objects
  197. GenericVector<long int> offsets_; // offset of every PDF object in bytes
  198. GenericVector<long int> pages_; // object number for every /Page object
  199. std::string datadir_; // where to find the custom font
  200. bool textonly_; // skip images if set
  201. // Bookkeeping only. DIY = Do It Yourself.
  202. void AppendPDFObjectDIY(size_t objectsize);
  203. // Bookkeeping + emit data.
  204. void AppendPDFObject(const char* data);
  205. // Create the /Contents object for an entire page.
  206. char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
  207. // Turn an image into a PDF object. Only transcode if we have to.
  208. static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
  209. char** pdf_object, long int* pdf_object_size,
  210. int jpg_quality);
  211. };
  212. /**
  213. * Renders tesseract output into a plain UTF-8 text string
  214. */
  215. class TESS_API TessUnlvRenderer : public TessResultRenderer {
  216. public:
  217. explicit TessUnlvRenderer(const char* outputbase);
  218. protected:
  219. bool AddImageHandler(TessBaseAPI* api) override;
  220. };
  221. /**
  222. * Renders tesseract output into a plain UTF-8 text string for LSTMBox
  223. */
  224. class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
  225. public:
  226. explicit TessLSTMBoxRenderer(const char* outputbase);
  227. protected:
  228. bool AddImageHandler(TessBaseAPI* api) override;
  229. };
  230. /**
  231. * Renders tesseract output into a plain UTF-8 text string
  232. */
  233. class TESS_API TessBoxTextRenderer : public TessResultRenderer {
  234. public:
  235. explicit TessBoxTextRenderer(const char* outputbase);
  236. protected:
  237. bool AddImageHandler(TessBaseAPI* api) override;
  238. };
  239. /**
  240. * Renders tesseract output into a plain UTF-8 text string in WordStr format
  241. */
  242. class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
  243. public:
  244. explicit TessWordStrBoxRenderer(const char* outputbase);
  245. protected:
  246. bool AddImageHandler(TessBaseAPI* api) override;
  247. };
  248. #ifndef DISABLED_LEGACY_ENGINE
  249. /**
  250. * Renders tesseract output into an osd text string
  251. */
  252. class TESS_API TessOsdRenderer : public TessResultRenderer {
  253. public:
  254. explicit TessOsdRenderer(const char* outputbase);
  255. protected:
  256. bool AddImageHandler(TessBaseAPI* api) override;
  257. };
  258. #endif // ndef DISABLED_LEGACY_ENGINE
  259. } // namespace tesseract.
  260. #endif // TESSERACT_API_RENDERER_H_