tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
topitch.h
1 /**********************************************************************
2  * File: topitch.h (Formerly to_pitch.h)
3  * Description: Code to determine fixed pitchness and the pitch if fixed.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1993, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #ifndef TOPITCH_H
20 #define TOPITCH_H
21 
22 #include "blobbox.h"
23 
24 namespace tesseract {
25 class Tesseract;
26 }
27 extern BOOL_VAR_H (textord_debug_pitch_test, FALSE,
28 "Debug on fixed pitch test");
29 extern BOOL_VAR_H (textord_debug_pitch_metric, FALSE,
30 "Write full metric stuff");
31 extern BOOL_VAR_H (textord_show_row_cuts, FALSE, "Draw row-level cuts");
32 extern BOOL_VAR_H (textord_show_page_cuts, FALSE, "Draw page-level cuts");
33 extern BOOL_VAR_H (textord_pitch_cheat, FALSE,
34 "Use correct answer for fixed/prop");
35 extern BOOL_VAR_H (textord_blockndoc_fixed, TRUE,
36 "Attempt whole doc/block fixed pitch");
37 extern BOOL_VAR_H (textord_fast_pitch_test, FALSE,
38 "Do even faster pitch algorithm");
39 extern double_VAR_H (textord_projection_scale, 0.125,
40 "Ding rate for mid-cuts");
41 extern double_VAR_H (textord_balance_factor, 2.0,
42 "Ding rate for unbalanced char cells");
43 
44 void compute_fixed_pitch(ICOORD page_tr, // top right
45  TO_BLOCK_LIST* port_blocks, // input list
46  float gradient, // page skew
47  FCOORD rotation, // for drawing
48  bool testing_on); // correct orientation
49 void fix_row_pitch( //get some value
50  TO_ROW *bad_row, //row to fix
51  TO_BLOCK *bad_block, //block of bad_row
52  TO_BLOCK_LIST *blocks, //blocks to scan
53  int32_t row_target, //number of row
54  int32_t block_target //number of block
55  );
56 void compute_block_pitch(TO_BLOCK* block, // input list
57  FCOORD rotation, // for drawing
58  int32_t block_index, // block number
59  bool testing_on); // correct orientation
60 bool compute_rows_pitch( //find line stats
61  TO_BLOCK* block, //block to do
62  int32_t block_index, //block number
63  bool testing_on //correct orientation
64 );
65 bool try_doc_fixed( //determine pitch
66  ICOORD page_tr, //top right
67  TO_BLOCK_LIST* port_blocks, //input list
68  float gradient //page skew
69 );
70 bool try_block_fixed( //find line stats
71  TO_BLOCK* block, //block to do
72  int32_t block_index //block number
73 );
74 bool try_rows_fixed( //find line stats
75  TO_BLOCK* block, //block to do
76  int32_t block_index, //block number
77  bool testing_on //correct orientation
78 );
79 void print_block_counts( //find line stats
80  TO_BLOCK *block, //block to do
81  int32_t block_index //block number
82  );
83 void count_block_votes( //find line stats
84  TO_BLOCK *block, //block to do
85  int32_t &def_fixed, //add to counts
86  int32_t &def_prop,
87  int32_t &maybe_fixed,
88  int32_t &maybe_prop,
89  int32_t &corr_fixed,
90  int32_t &corr_prop,
91  int32_t &dunno);
92 bool row_pitch_stats( //find line stats
93  TO_ROW* row, //current row
94  int32_t maxwidth, //of spaces
95  bool testing_on //correct orientation
96 );
97 bool find_row_pitch( //find lines
98  TO_ROW* row, //row to do
99  int32_t maxwidth, //max permitted space
100  int32_t dm_gap, //ignorable gaps
101  TO_BLOCK* block, //block of row
102  int32_t block_index, //block_number
103  int32_t row_index, //number of row
104  bool testing_on //correct orientation
105 );
106 bool fixed_pitch_row( //find lines
107  TO_ROW* row, //row to do
108  BLOCK* block,
109  int32_t block_index //block_number
110 );
111 bool count_pitch_stats( //find lines
112  TO_ROW* row, //row to do
113  STATS* gap_stats, //blob gaps
114  STATS* pitch_stats, //centre-centre stats
115  float initial_pitch, //guess at pitch
116  float min_space, //estimate space size
117  bool ignore_outsize, //discard big objects
118  bool split_outsize, //split big objects
119  int32_t dm_gap //ignorable gaps
120 );
121 float tune_row_pitch( //find fp cells
122  TO_ROW* row, //row to do
123  STATS* projection, //vertical projection
124  int16_t projection_left, //edge of projection
125  int16_t projection_right, //edge of projection
126  float space_size, //size of blank
127  float& initial_pitch, //guess at pitch
128  float& best_sp_sd, //space sd
129  int16_t& best_mid_cuts, //no of cheap cuts
130  ICOORDELT_LIST* best_cells, //row cells
131  bool testing_on //inidividual words
132 );
133 float tune_row_pitch2( //find fp cells
134  TO_ROW* row, //row to do
135  STATS* projection, //vertical projection
136  int16_t projection_left, //edge of projection
137  int16_t projection_right, //edge of projection
138  float space_size, //size of blank
139  float& initial_pitch, //guess at pitch
140  float& best_sp_sd, //space sd
141  int16_t& best_mid_cuts, //no of cheap cuts
142  ICOORDELT_LIST* best_cells, //row cells
143  bool testing_on //inidividual words
144 );
145 float compute_pitch_sd( //find fp cells
146  TO_ROW* row, //row to do
147  STATS* projection, //vertical projection
148  int16_t projection_left, //edge
149  int16_t projection_right, //edge
150  float space_size, //size of blank
151  float initial_pitch, //guess at pitch
152  float& sp_sd, //space sd
153  int16_t& mid_cuts, //no of free cuts
154  ICOORDELT_LIST* row_cells, //list of chop pts
155  bool testing_on, //inidividual words
156  int16_t start = 0, //start of good range
157  int16_t end = 0 //end of good range
158 );
159 float compute_pitch_sd2( //find fp cells
160  TO_ROW* row, //row to do
161  STATS* projection, //vertical projection
162  int16_t projection_left, //edge
163  int16_t projection_right, //edge
164  float initial_pitch, //guess at pitch
165  int16_t& occupation, //no of occupied cells
166  int16_t& mid_cuts, //no of free cuts
167  ICOORDELT_LIST* row_cells, //list of chop pts
168  bool testing_on, //inidividual words
169  int16_t start = 0, //start of good range
170  int16_t end = 0 //end of good range
171 );
172 void print_pitch_sd( //find fp cells
173  TO_ROW *row, //row to do
174  STATS *projection, //vertical projection
175  int16_t projection_left, //edges //size of blank
176  int16_t projection_right,
177  float space_size,
178  float initial_pitch //guess at pitch
179  );
180 void find_repeated_chars(TO_BLOCK* block, // Block to search.
181  bool testing_on); // Debug mode.
182 void plot_fp_word( //draw block of words
183  TO_BLOCK *block, //block to draw
184  float pitch, //pitch to draw with
185  float nonspace //for space threshold
186  );
187 #endif
Definition: baseapi.cpp:94
double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5, "Max fraction of mean blob width allowed for vertical gaps in vertical text")
Definition: ocrblock.h:30
BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection")
Definition: blobbox.h:556
Definition: statistc.h:33
integer coordinate
Definition: points.h:32
Definition: blobbox.h:705
Definition: points.h:189