thresholder.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. ///////////////////////////////////////////////////////////////////////
  2. // File: thresholder.h
  3. // Description: Base API for thresholding images in tesseract.
  4. // Author: Ray Smith
  5. //
  6. // (C) Copyright 2008, Google Inc.
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. //
  17. ///////////////////////////////////////////////////////////////////////
  18. #ifndef TESSERACT_CCMAIN_THRESHOLDER_H_
  19. #define TESSERACT_CCMAIN_THRESHOLDER_H_
  20. #include "platform.h"
  21. #include "publictypes.h"
  22. struct Pix;
  23. namespace tesseract {
  24. /// Base class for all tesseract image thresholding classes.
  25. /// Specific classes can add new thresholding methods by
  26. /// overriding ThresholdToPix.
  27. /// Each instance deals with a single image, but the design is intended to
  28. /// be useful for multiple calls to SetRectangle and ThresholdTo* if
  29. /// desired.
  30. class TESS_API ImageThresholder {
  31. public:
  32. ImageThresholder();
  33. virtual ~ImageThresholder();
  34. /// Destroy the Pix if there is one, freeing memory.
  35. virtual void Clear();
  36. /// Return true if no image has been set.
  37. bool IsEmpty() const;
  38. /// SetImage makes a copy of all the image data, so it may be deleted
  39. /// immediately after this call.
  40. /// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
  41. /// Palette color images will not work properly and must be converted to
  42. /// 24 bit.
  43. /// Binary images of 1 bit per pixel may also be given but they must be
  44. /// byte packed with the MSB of the first byte being the first pixel, and a
  45. /// one pixel is WHITE. For binary images set bytes_per_pixel=0.
  46. void SetImage(const unsigned char* imagedata, int width, int height,
  47. int bytes_per_pixel, int bytes_per_line);
  48. /// Store the coordinates of the rectangle to process for later use.
  49. /// Doesn't actually do any thresholding.
  50. void SetRectangle(int left, int top, int width, int height);
  51. /// Get enough parameters to be able to rebuild bounding boxes in the
  52. /// original image (not just within the rectangle).
  53. /// Left and top are enough with top-down coordinates, but
  54. /// the height of the rectangle and the image are needed for bottom-up.
  55. virtual void GetImageSizes(int* left, int* top, int* width, int* height,
  56. int* imagewidth, int* imageheight);
  57. /// Return true if the source image is color.
  58. bool IsColor() const {
  59. return pix_channels_ >= 3;
  60. }
  61. /// Returns true if the source image is binary.
  62. bool IsBinary() const {
  63. return pix_channels_ == 0;
  64. }
  65. int GetScaleFactor() const {
  66. return scale_;
  67. }
  68. // Set the resolution of the source image in pixels per inch.
  69. // This should be called right after SetImage(), and will let us return
  70. // appropriate font sizes for the text.
  71. void SetSourceYResolution(int ppi) {
  72. yres_ = ppi;
  73. estimated_res_ = ppi;
  74. }
  75. int GetSourceYResolution() const {
  76. return yres_;
  77. }
  78. int GetScaledYResolution() const {
  79. return scale_ * yres_;
  80. }
  81. // Set the resolution of the source image in pixels per inch, as estimated
  82. // by the thresholder from the text size found during thresholding.
  83. // This value will be used to set internal size thresholds during recognition
  84. // and will not influence the output "point size." The default value is
  85. // the same as the source resolution. (yres_)
  86. void SetEstimatedResolution(int ppi) {
  87. estimated_res_ = ppi;
  88. }
  89. // Returns the estimated resolution, including any active scaling.
  90. // This value will be used to set internal size thresholds during recognition.
  91. int GetScaledEstimatedResolution() const {
  92. return scale_ * estimated_res_;
  93. }
  94. /// Pix vs raw, which to use? Pix is the preferred input for efficiency,
  95. /// since raw buffers are copied.
  96. /// SetImage for Pix clones its input, so the source pix may be pixDestroyed
  97. /// immediately after, but may not go away until after the Thresholder has
  98. /// finished with it.
  99. void SetImage(const Pix* pix);
  100. /// Threshold the source image as efficiently as possible to the output Pix.
  101. /// Creates a Pix and sets pix to point to the resulting pointer.
  102. /// Caller must use pixDestroy to free the created Pix.
  103. /// Returns false on error.
  104. virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
  105. // Gets a pix that contains an 8 bit threshold value at each pixel. The
  106. // returned pix may be an integer reduction of the binary image such that
  107. // the scale factor may be inferred from the ratio of the sizes, even down
  108. // to the extreme of a 1x1 pixel thresholds image.
  109. // Ideally the 8 bit threshold should be the exact threshold used to generate
  110. // the binary image in ThresholdToPix, but this is not a hard constraint.
  111. // Returns nullptr if the input is binary. PixDestroy after use.
  112. virtual Pix* GetPixRectThresholds();
  113. /// Get a clone/copy of the source image rectangle.
  114. /// The returned Pix must be pixDestroyed.
  115. /// This function will be used in the future by the page layout analysis, and
  116. /// the layout analysis that uses it will only be available with Leptonica,
  117. /// so there is no raw equivalent.
  118. Pix* GetPixRect();
  119. // Get a clone/copy of the source image rectangle, reduced to greyscale,
  120. // and at the same resolution as the output binary.
  121. // The returned Pix must be pixDestroyed.
  122. // Provided to the classifier to extract features from the greyscale image.
  123. virtual Pix* GetPixRectGrey();
  124. protected:
  125. // ----------------------------------------------------------------------
  126. // Utility functions that may be useful components for other thresholders.
  127. /// Common initialization shared between SetImage methods.
  128. virtual void Init();
  129. /// Return true if we are processing the full image.
  130. bool IsFullImage() const {
  131. return rect_left_ == 0 && rect_top_ == 0 &&
  132. rect_width_ == image_width_ && rect_height_ == image_height_;
  133. }
  134. // Otsu thresholds the rectangle, taking the rectangle from *this.
  135. void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const;
  136. /// Threshold the rectangle, taking everything except the src_pix
  137. /// from the class, using thresholds/hi_values to the output pix.
  138. /// NOTE that num_channels is the size of the thresholds and hi_values
  139. // arrays and also the bytes per pixel in src_pix.
  140. void ThresholdRectToPix(Pix* src_pix, int num_channels,
  141. const int* thresholds, const int* hi_values,
  142. Pix** pix) const;
  143. protected:
  144. /// Clone or other copy of the source Pix.
  145. /// The pix will always be PixDestroy()ed on destruction of the class.
  146. Pix* pix_;
  147. int image_width_; ///< Width of source pix_.
  148. int image_height_; ///< Height of source pix_.
  149. int pix_channels_; ///< Number of 8-bit channels in pix_.
  150. int pix_wpl_; ///< Words per line of pix_.
  151. // Limits of image rectangle to be processed.
  152. int scale_; ///< Scale factor from original image.
  153. int yres_; ///< y pixels/inch in source image.
  154. int estimated_res_; ///< Resolution estimate from text size.
  155. int rect_left_;
  156. int rect_top_;
  157. int rect_width_;
  158. int rect_height_;
  159. };
  160. } // namespace tesseract.
  161. #endif // TESSERACT_CCMAIN_THRESHOLDER_H_