tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
alignedblob.h
1 // File: alignedblob.h
3 // Description: A class to find vertically aligned blobs in a BBGrid,
4 // and a struct to hold control parameters.
5 // Author: Ray Smith
6 // Created: Fri Mar 21 15:03:01 PST 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H_
22 #define TESSERACT_TEXTORD_ALIGNEDBLOB_H_
23 
24 #include "bbgrid.h"
25 #include "blobbox.h"
26 #include "strngs.h"
27 #include "tabvector.h"
28 
29 extern INT_VAR_H(textord_debug_bugs, 0,
30  "Turn on output related to bugs in tab finding");
31 extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding");
32 extern BOOL_VAR_H(textord_debug_printable, false,
33  "Make debug windows printable");
34 
35 namespace tesseract {
36 
37 // Simple structure to hold the search parameters for AlignedBlob.
38 // The members are mostly derived from constants, which are
39 // conditioned on the alignment parameter.
40 // For finding vertical lines, a different set of constants are
41 // used, conditioned on the different constructor.
43  // Constructor to set the parameters for finding aligned and ragged tabs.
44  // Vertical_x and vertical_y are the current estimates of the true vertical
45  // direction (up) in the image. Height is the height of the starter blob.
46  // v_gap_multiple is the multiple of height that will be used as a limit
47  // on vertical gap before giving up and calling the line ended.
48  // resolution is the original image resolution, and align0 indicates the
49  // type of tab stop to be found.
50  AlignedBlobParams(int vertical_x, int vertical_y, int height,
51  int v_gap_multiple, int min_gutter_width, int resolution,
52  TabAlignment alignment0);
53  // Constructor to set the parameters for finding vertical lines.
54  // Vertical_x and vertical_y are the current estimates of the true vertical
55  // direction (up) in the image. Width is the width of the starter blob.
56  AlignedBlobParams(int vertical_x, int vertical_y, int width);
57 
58  // Fit the vertical vector into an ICOORD, which is 16 bit.
59  void set_vertical(int vertical_x, int vertical_y);
60 
61  double gutter_fraction; // Multiple of height used for min_gutter.
62  bool right_tab; // We are looking at right edges.
63  bool ragged; // We are looking for a ragged (vs aligned) edge.
64  TabAlignment alignment; // The type we are trying to produce.
65  TabType confirmed_type; // Type to flag blobs if accepted.
66  int max_v_gap; // Max vertical gap to be tolerated.
67  int min_gutter; // Minimum gutter between columns.
68  // Tolerances allowed on horizontal alignment of aligned edges.
69  int l_align_tolerance; // Left edges.
70  int r_align_tolerance; // Right edges.
71  // Conditions for accepting a line.
72  int min_points; // Minimum number of points to be OK.
73  int min_length; // Min length of completed line.
74 
75  ICOORD vertical; // Current estimate of logical vertical.
76 };
77 
78 // The AlignedBlob class contains code to find vertically aligned blobs.
79 // This is factored out into a separate class, so it can be used by both
80 // vertical line finding (LineFind) and tabstop finding (TabFind).
81 class AlignedBlob : public BlobGrid {
82  public:
83  AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright);
84  virtual ~AlignedBlob();
85 
86  // Return true if the given coordinates are within the test rectangle
87  // and the debug level is at least the given detail level.
88  static bool WithinTestRegion(int detail_level, int x, int y);
89 
90  // Display the tab codes of the BLOBNBOXes in this grid.
91  ScrollView* DisplayTabs(const char* window_name, ScrollView* tab_win);
92 
93  // Finds a vector corresponding to a set of vertically aligned blob edges
94  // running through the given box. The type of vector returned and the
95  // search parameters are determined by the AlignedBlobParams.
96  // vertical_x and y are updated with an estimate of the real
97  // vertical direction. (skew finding.)
98  // Returns nullptr if no decent vector can be found.
99  TabVector* FindVerticalAlignment(AlignedBlobParams align_params,
100  BLOBNBOX* bbox,
101  int* vertical_x, int* vertical_y);
102 
103  private:
104  // Find a set of blobs that are aligned in the given vertical
105  // direction with the given blob. Returns a list of aligned
106  // blobs and the number in the list.
107  // For other parameters see FindAlignedBlob below.
108  int AlignTabs(const AlignedBlobParams& params,
109  bool top_to_bottom, BLOBNBOX* bbox,
110  BLOBNBOX_CLIST* good_points, int* end_y);
111 
112  // Search vertically for a blob that is aligned with the input bbox.
113  // The search parameters are determined by AlignedBlobParams.
114  // top_to_bottom tells whether to search down or up.
115  // The return value is nullptr if nothing was found in the search box
116  // or if a blob was found in the gutter. On a nullptr return, end_y
117  // is set to the edge of the search box or the leading edge of the
118  // gutter blob if one was found.
119  BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p,
120  bool top_to_bottom, BLOBNBOX* bbox,
121  int x_start, int* end_y);
122 };
123 
124 } // namespace tesseract.
125 
126 #endif // TESSERACT_TEXTORD_ALIGNEDBLOB_H_
TabType confirmed_type
Definition: alignedblob.h:65
void set_vertical(int vertical_x, int vertical_y)
Definition: alignedblob.cpp:135
Definition: alignedblob.h:81
TabAlignment
Definition: tabvector.h:45
Definition: tabvector.h:112
ICOORD vertical
Definition: alignedblob.h:75
bool right_tab
Definition: alignedblob.h:62
Definition: baseapi.cpp:94
AlignedBlobParams(int vertical_x, int vertical_y, int height, int v_gap_multiple, int min_gutter_width, int resolution, TabAlignment alignment0)
Definition: alignedblob.cpp:74
int min_length
Definition: alignedblob.h:73
Definition: blobbox.h:144
Definition: scrollview.h:102
int max_v_gap
Definition: alignedblob.h:66
Definition: blobgrid.h:33
TabAlignment alignment
Definition: alignedblob.h:64
int r_align_tolerance
Definition: alignedblob.h:70
int min_gutter
Definition: alignedblob.h:67
integer coordinate
Definition: points.h:32
Definition: alignedblob.h:42
bool ragged
Definition: alignedblob.h:63
double gutter_fraction
Definition: alignedblob.h:61
int min_points
Definition: alignedblob.h:72
int l_align_tolerance
Definition: alignedblob.h:69