42 extern BOOL_VAR_H(textord_heavy_nr, FALSE,
"Vigorously remove noise");
43 extern BOOL_VAR_H (textord_show_initial_rows, FALSE,
44 "Display row accumulation");
45 extern BOOL_VAR_H (textord_show_parallel_rows, FALSE,
46 "Display page correlated rows");
47 extern BOOL_VAR_H (textord_show_expanded_rows, FALSE,
48 "Display rows after expanding");
49 extern BOOL_VAR_H (textord_show_final_rows, FALSE,
50 "Display rows after final fitting");
51 extern BOOL_VAR_H (textord_show_final_blobs, FALSE,
52 "Display blob bounds after pre-ass");
53 extern BOOL_VAR_H (textord_test_landscape, FALSE,
"Tests refer to land/port");
54 extern BOOL_VAR_H (textord_parallel_baselines, TRUE,
55 "Force parallel baselines");
56 extern BOOL_VAR_H (textord_straight_baselines, FALSE,
57 "Force straight baselines");
58 extern BOOL_VAR_H (textord_quadratic_baselines, FALSE,
59 "Use quadratic splines");
60 extern BOOL_VAR_H (textord_old_baselines, TRUE,
"Use old baseline algorithm");
61 extern BOOL_VAR_H (textord_old_xheight, TRUE,
"Use old xheight algorithm");
62 extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE,
"Use spline baseline");
63 extern BOOL_VAR_H (textord_fix_makerow_bug, TRUE,
64 "Prevent multiple baselines");
65 extern BOOL_VAR_H (textord_cblob_blockocc, TRUE,
66 "Use new projection for underlines");
67 extern BOOL_VAR_H (textord_debug_xheights, FALSE,
"Test xheight algorithms");
68 extern INT_VAR_H (textord_test_x, -INT32_MAX,
"coord of test pt");
69 extern INT_VAR_H (textord_test_y, -INT32_MAX,
"coord of test pt");
70 extern INT_VAR_H (textord_min_blobs_in_row, 4,
71 "Min blobs before gradient counted");
72 extern INT_VAR_H (textord_spline_minblobs, 8,
73 "Min blobs in each spline segment");
74 extern INT_VAR_H (textord_spline_medianwin, 6,
75 "Size of window for spline segmentation");
76 extern INT_VAR_H (textord_min_xheight, 10,
"Min credible pixel xheight");
77 extern double_VAR_H (textord_spline_shift_fraction, 0.02,
78 "Fraction of line spacing for quad");
79 extern double_VAR_H (textord_spline_outlier_fraction, 0.1,
80 "Fraction of line spacing for outlier");
81 extern double_VAR_H (textord_skew_ile, 0.5,
"Ile of gradients for page skew");
82 extern double_VAR_H (textord_skew_lag, 0.75,
83 "Lag for skew on row accumulation");
84 extern double_VAR_H (textord_linespace_iqrlimit, 0.2,
85 "Max iqr/median for linespace");
86 extern double_VAR_H (textord_width_limit, 8,
87 "Max width of blobs to make rows");
88 extern double_VAR_H (textord_chop_width, 1.5,
"Max width before chopping");
89 extern double_VAR_H (textord_minxh, 0.25,
90 "fraction of linesize for min xheight");
91 extern double_VAR_H (textord_min_linesize, 1.25,
92 "* blob height for initial linesize");
93 extern double_VAR_H (textord_excess_blobsize, 1.3,
94 "New row made if blob makes row this big");
95 extern double_VAR_H (textord_occupancy_threshold, 0.4,
96 "Fraction of neighbourhood");
97 extern double_VAR_H (textord_underline_width, 2.0,
98 "Multiple of line_size for underline");
99 extern double_VAR_H(textord_min_blob_height_fraction, 0.75,
100 "Min blob height/top to include blob top into xheight stats");
101 extern double_VAR_H (textord_xheight_mode_fraction, 0.4,
102 "Min pile height to make xheight");
103 extern double_VAR_H (textord_ascheight_mode_fraction, 0.15,
104 "Min pile height to make ascheight");
105 extern double_VAR_H (textord_ascx_ratio_min, 1.2,
"Min cap/xheight");
106 extern double_VAR_H (textord_ascx_ratio_max, 1.7,
"Max cap/xheight");
107 extern double_VAR_H (textord_descx_ratio_min, 0.15,
"Min desc/xheight");
108 extern double_VAR_H (textord_descx_ratio_max, 0.6,
"Max desc/xheight");
109 extern double_VAR_H (textord_xheight_error_margin, 0.1,
"Accepted variation");
110 extern INT_VAR_H (textord_lms_line_trials, 12,
"Number of linew fits to do");
111 extern BOOL_VAR_H (textord_new_initial_xheight, TRUE,
112 "Use test xheight mechanism");
113 extern BOOL_VAR_H(textord_debug_blob, FALSE,
"Print test blob information");
115 inline void get_min_max_xheight(
int block_linesize,
116 int *min_height,
int *max_height) {
117 *min_height =
static_cast<int32_t
>(floor(block_linesize * textord_minxh));
118 if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
119 *max_height =
static_cast<int32_t
>(ceil(block_linesize * 3.0));
122 inline ROW_CATEGORY get_row_category(
const TO_ROW *row) {
123 if (row->
xheight <= 0)
return ROW_INVALID;
124 return (row->
ascrise > 0) ? ROW_ASCENDERS_FOUND :
125 (row->
descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
128 inline bool within_error_margin(
float test,
float num,
float margin) {
129 return (test >= num * (1 - margin) && test <= num * (1 + margin));
132 void fill_heights(
TO_ROW *row,
float gradient,
int min_height,
133 int max_height,
STATS *heights,
STATS *floating_heights);
135 float make_single_row(
ICOORD page_tr,
bool allow_sub_blobs,
TO_BLOCK* block,
136 TO_BLOCK_LIST* blocks);
137 float make_rows(
ICOORD page_tr,
138 TO_BLOCK_LIST *port_blocks);
139 void make_initial_textrows(
ICOORD page_tr,
143 void fit_lms_line(
TO_ROW *row);
144 void compute_page_skew(TO_BLOCK_LIST *blocks,
147 void vigorous_noise_removal(
TO_BLOCK* block);
148 void cleanup_rows_making(
ICOORD page_tr,
154 void delete_non_dropout_rows(
161 bool find_best_dropout_row(
169 TBOX deskew_block_coords(
173 void compute_line_occupation(
181 void compute_occupation_threshold(
188 void compute_dropout_distances(
201 void adjust_row_limits(
204 void compute_row_stats(
208 float median_block_xheight(
213 int compute_xheight_from_modes(
214 STATS *heights,
STATS *floating_heights,
bool cap_only,
int min_height,
215 int max_height,
float *xheight,
float *ascrise);
217 int32_t compute_row_descdrop(
TO_ROW *row,
219 int xheight_blob_count,
221 int32_t compute_height_modes(
STATS *heights,
226 void correct_row_xheight(
TO_ROW *row,
230 void separate_underlines(
TO_BLOCK* block,
234 void pre_associate_blobs(
ICOORD page_tr,
238 void fit_parallel_rows(
TO_BLOCK* block,
243 void fit_parallel_lms(
float gradient,
245 void make_baseline_spline(
TO_ROW *row,
247 bool segment_baseline(
253 double *linear_spline_baseline (
259 void assign_blobs_to_rows(
268 OVERLAP_STATE most_overlapping_row(TO_ROW_IT* row_it,
281 int row_spacing_order(
285 void mark_repeated_chars(
TO_ROW *row);
float ascrise
Definition: blobbox.h:672
Definition: blobbox.h:556
Definition: statistc.h:33
integer coordinate
Definition: points.h:32
float descdrop
Definition: blobbox.h:673
Definition: blobbox.h:705
float xheight
Definition: blobbox.h:670