19 #ifndef TESSERACT_WORDREC_WORDREC_H_ 20 #define TESSERACT_WORDREC_WORDREC_H_ 22 #ifdef DISABLED_LEGACY_ENGINE 24 #include "config_auto.h" 32 #include "genericvector.h" 47 namespace tesseract {
class LMPainPoints; }
48 namespace tesseract {
class TessdataManager; }
49 namespace tesseract {
struct BestChoiceBundle; }
63 class Wordrec :
public Classify {
67 BOOL_VAR_H(wordrec_debug_blamer,
false,
"Print blamer debug messages");
69 BOOL_VAR_H(wordrec_run_blamer,
false,
"Try to set the blame for errors");
76 void program_editup(
const char *textbase, TessdataManager *init_classifier,
77 TessdataManager *init_dict);
88 #else // DISABLED_LEGACY_ENGINE not defined 90 #include "associate.h" 93 #include "language_model.h" 115 : classified_row_(-1),
116 revisit_whole_column_(false),
117 column_classified_(false) {}
122 column_classified_ =
true;
128 classified_row_ = row;
134 revisit_whole_column_ =
true;
139 classified_row_ = -1;
140 revisit_whole_column_ =
false;
141 column_classified_ =
false;
147 return revisit_whole_column_ || column_classified_ || classified_row_ >= 0;
151 return row == classified_row_ || column_classified_;
155 return revisit_whole_column_ || column_classified_ ? -1 : classified_row_;
196 "Merge the fragments in the ratings matrix and delete them " 198 BOOL_VAR_H(wordrec_no_block, FALSE,
"Don't output block information");
199 BOOL_VAR_H(wordrec_enable_assoc, TRUE,
"Associator Enable");
201 "force associator to run regardless of what enable_assoc is." 202 "This is used for CJK where component grouping is necessary.");
203 double_VAR_H(wordrec_worst_state, 1,
"Worst segmentation state");
205 "Use information from fragments to guide chopping process");
206 INT_VAR_H(repair_unchopped_blobs, 1,
"Fix blobs that aren't chopped");
207 double_VAR_H(tessedit_certainty_threshold, -2.25,
"Good blob limit");
210 BOOL_VAR_H(chop_vertical_creep, 0,
"Vertical creep");
211 INT_VAR_H(chop_split_length, 10000,
"Split Length");
212 INT_VAR_H(chop_same_distance, 2,
"Same distance");
213 INT_VAR_H(chop_min_outline_points, 6,
"Min Number of Points on Outline");
214 INT_VAR_H(chop_seam_pile_size, 150,
"Max number of seams in seam_pile");
215 BOOL_VAR_H(chop_new_seam_pile, 1,
"Use new seam_pile");
216 INT_VAR_H(chop_inside_angle, -50,
"Min Inside Angle Bend");
217 INT_VAR_H(chop_min_outline_area, 2000,
"Min Outline Area");
218 double_VAR_H(chop_split_dist_knob, 0.5,
"Split length adjustment");
219 double_VAR_H(chop_overlap_knob, 0.9,
"Split overlap adjustment");
220 double_VAR_H(chop_center_knob, 0.15,
"Split center adjustment");
221 INT_VAR_H(chop_centered_maxwidth, 90,
"Width of (smaller) chopped blobs " 222 "above which we don't care that a chop is not near the center.");
223 double_VAR_H(chop_sharpness_knob, 0.06,
"Split sharpness adjustment");
224 double_VAR_H(chop_width_change_knob, 5.0,
"Width change adjustment");
226 double_VAR_H(chop_good_split, 50.0,
"Good split limit");
227 INT_VAR_H(chop_x_y_weight, 3,
"X / Y length weight");
228 INT_VAR_H(segment_adjust_debug, 0,
"Segmentation adjustment debug");
229 BOOL_VAR_H(assume_fixed_pitch_char_segment, FALSE,
230 "include fixed-pitch heuristics in char segmentation");
231 INT_VAR_H(wordrec_debug_level, 0,
"Debug level for wordrec");
233 "Max number of broken pieces to associate");
234 BOOL_VAR_H(wordrec_skip_no_truth_words,
false,
235 "Only run OCR for words that had truth recorded in BlamerBundle");
236 BOOL_VAR_H(wordrec_debug_blamer,
false,
"Print blamer debug messages");
237 BOOL_VAR_H(wordrec_run_blamer,
false,
"Try to set the blame for errors");
238 INT_VAR_H(segsearch_debug_level, 0,
"SegSearch debug level");
239 INT_VAR_H(segsearch_max_pain_points, 2000,
240 "Maximum number of pain points stored in the queue");
241 INT_VAR_H(segsearch_max_futile_classifications, 10,
242 "Maximum number of pain point classifications per word.");
244 "Maximum character width-to-height ratio");
246 "Save alternative paths found during chopping " 247 "and segmentation search");
265 const WERD_CHOICE_LIST &best_choices,
268 (this->*
fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
370 EDGEPT_CLIST *new_points);
379 WERD_RES *word_res,
int *blob_number);
382 bool split_next_to_fragment,
398 float rating_ceiling,
399 bool split_next_to_fragment);
405 PRIORITY priority,
SEAM **seam_result,
TBLOB *blob,
417 EDGEPT_CLIST *new_points,
434 const char* description,
452 int16_t num_frag_parts,
455 BLOB_CHOICE_LIST *choice_lists);
460 int16_t num_frag_parts,
461 BLOB_CHOICE_LIST *choice_lists,
472 BLOB_CHOICE_LIST *filtered_choices);
486 const WERD_CHOICE_LIST &best_choices,
492 return (language_model_->AcceptableChoiceFound() ||
493 num_futile_classifications >=
494 segsearch_max_futile_classifications);
523 float rating_cert_scale,
535 const char* pain_point_type,
558 #endif // DISABLED_LEGACY_ENGINE 560 #endif // TESSERACT_WORDREC_WORDREC_H_ Definition: genericheap.h:58
void set_pass2()
Definition: tface.cpp:99
BLOB_CHOICE_LIST * classify_blob(TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle)
Definition: wordclass.cpp:54
bool SegSearchDone(int num_futile_classifications)
Definition: wordrec.h:491
Definition: lm_pain_points.h:57
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:485
int SingleRow() const
Definition: wordrec.h:154
void SetColumnClassified()
Definition: wordrec.h:121
void Clear()
Definition: wordrec.h:138
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:73
void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
PRIORITY point_priority(EDGEPT *point)
Definition: chop.cpp:53
Definition: wordrec.h:192
ICOORD tail
Definition: wordrec.h:185
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:112
void DoSegSearch(WERD_RES *word_res)
Definition: segsearch.cpp:37
void try_point_pairs(EDGEPT *points[50], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:305
double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state")
PRIORITY pass2_ok_split
Definition: wordrec.h:477
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:249
void try_vertical_splits(EDGEPT *points[50], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:343
virtual ~Wordrec()=default
Definition: classify.h:103
int end_recog()
Definition: tface.cpp:60
void get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists)
Definition: pieces.cpp:280
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
Definition: chop.cpp:87
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:137
Definition: unicharset.h:146
bool WorkToDo() const
Definition: wordrec.h:146
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams)
Definition: chopper.cpp:266
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:224
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
Definition: outlines.cpp:45
Definition: baseapi.cpp:94
void SaveAltChoices(const LIST &best_choices, WERD_RES *word)
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM *> &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:55
BLOB_CHOICE_LIST * call_matcher(TBLOB *blob)
Definition: tface.cpp:139
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
Definition: segsearch.cpp:312
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
INT_VAR_H(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped")
Wordrec()
Definition: wordrec.cpp:47
void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices)
Definition: pieces.cpp:104
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:627
Bundle together all the things pertaining to the best choice/state.
Definition: lm_state.h:217
int select_blob_to_split(const GenericVector< BLOB_CHOICE *> &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:539
void cc_recog(WERD_RES *word)
Definition: tface.cpp:111
void set_pass1()
Definition: tface.cpp:87
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE *> &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
Definition: chopper.cpp:328
int dict_word(const WERD_CHOICE &word)
Definition: tface.cpp:127
int classified_row_
Definition: wordrec.h:163
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:274
Definition: tessdatamanager.h:126
void merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings)
Definition: pieces.cpp:137
void RevisitWholeColumn()
Definition: wordrec.h:133
ICOORD head
Definition: wordrec.h:184
void SetBlobClassified(int row)
Definition: wordrec.h:127
void new_max_point(EDGEPT *local_max, PointHeap *points)
Definition: chop.cpp:243
FRAGMENT()
Definition: wordrec.h:179
Definition: wordrec.h:112
EDGEPT * pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
Definition: chop.cpp:122
EDGEPT * headpt
Definition: wordrec.h:186
integer coordinate
Definition: points.h:32
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:43
void program_editup(const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
Definition: tface.cpp:38
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
Definition: chopper.cpp:455
void new_min_point(EDGEPT *local_min, PointHeap *points)
Definition: chop.cpp:219
void program_editdown(int32_t elasped_time)
Definition: tface.cpp:73
Definition: pageres.h:169
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:476
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:181
bool is_inside_angle(EDGEPT *pt)
Definition: chop.cpp:77
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:392
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
Definition: segsearch.cpp:329
bool column_classified_
Definition: wordrec.h:171
Definition: wordrec.h:176
SEAM * chop_one_blob(const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE *> &blob_choices, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:372
Definition: oldlist.h:124
SegSearchPending()
Definition: wordrec.h:114
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:205
bool revisit_whole_column_
Definition: wordrec.h:167
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:160
BOOL_VAR_H(merge_fragments_in_matrix, TRUE, "Merge the fragments in the ratings matrix and delete them " "after merging")
UNICHARSET unicharset
Definition: ccutil.h:68
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams)
Definition: chopper.cpp:212
PRIORITY grade_sharpness(SPLIT *split)
Definition: gradechop.cpp:74
void merge_fragments(MATRIX *ratings, int16_t num_blobs)
Definition: pieces.cpp:312
bool IsRowJustClassified(int row) const
Definition: wordrec.h:150
GenericVector< int > blame_reasons_
Definition: wordrec.h:483
void add_point_to_list(PointHeap *point_heap, EDGEPT *point)
Definition: chop.cpp:63
EDGEPT * tailpt
Definition: wordrec.h:187
PRIORITY grade_split_length(SPLIT *split)
Definition: gradechop.cpp:51
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:264
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:272