|
| Tesseract () |
|
| ~Tesseract () |
|
Dict & | getDict () override |
|
void | Clear () |
|
void | ResetAdaptiveClassifier () |
|
void | ResetDocumentDictionary () |
|
void | SetEquationDetect (EquationDetect *detector) |
|
const FCOORD & | reskew () const |
|
Pix ** | mutable_pix_binary () |
|
Pix * | pix_binary () const |
|
Pix * | pix_grey () const |
|
void | set_pix_grey (Pix *grey_pix) |
|
Pix * | pix_original () const |
|
void | set_pix_original (Pix *original_pix) |
|
Pix * | BestPix () const |
|
void | set_pix_thresholds (Pix *thresholds) |
|
int | source_resolution () const |
|
void | set_source_resolution (int ppi) |
|
int | ImageWidth () const |
|
int | ImageHeight () const |
|
Pix * | scaled_color () const |
|
int | scaled_factor () const |
|
void | SetScaledColor (int factor, Pix *color) |
|
const Textord & | textord () const |
|
Textord * | mutable_textord () |
|
bool | right_to_left () const |
|
int | num_sub_langs () const |
|
Tesseract * | get_sub_lang (int index) const |
|
bool | AnyTessLang () const |
|
bool | AnyLSTMLang () const |
|
void | SetBlackAndWhitelist () |
|
void | PrepareForPageseg () |
|
void | PrepareForTessOCR (BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr) |
|
int | SegmentPage (const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr) |
|
void | SetupWordScripts (BLOCK_LIST *blocks) |
|
int | AutoPageSeg (PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) |
|
ColumnFinder * | SetupPageSegAndDetectOrientation (PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix) |
|
void | PrerecAllWordsPar (const GenericVector< WordData > &words) |
|
void | TrainLineRecognizer (const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list) |
|
void | TrainFromBoxes (const GenericVector< TBOX > &boxes, const GenericVector< STRING > &texts, BLOCK_LIST *block_list, DocumentData *training_data) |
|
ImageData * | GetLineData (const TBOX &line_box, const GenericVector< TBOX > &boxes, const GenericVector< STRING > &texts, int start_box, int end_box, const BLOCK &block) |
|
ImageData * | GetRectImage (const TBOX &box, const BLOCK &block, int padding, TBOX *revised_box) const |
|
void | LSTMRecognizeWord (const BLOCK &block, ROW *row, WERD_RES *word, PointerVector< WERD_RES > *words) |
|
void | SearchWords (PointerVector< WERD_RES > *words) |
|
bool | ProcessTargetWord (const TBOX &word_box, const TBOX &target_word_box, const char *word_config, int pass) |
|
void | SetupAllWordsPassN (int pass_n, const TBOX *target_word_box, const char *word_config, PAGE_RES *page_res, GenericVector< WordData > *words) |
|
void | SetupWordPassN (int pass_n, WordData *word) |
|
bool | RecogAllWordsPassN (int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT *pr_it, GenericVector< WordData > *words) |
|
bool | recog_all_words (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses) |
|
void | rejection_passes (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config) |
|
void | bigram_correction_pass (PAGE_RES *page_res) |
|
void | blamer_pass (PAGE_RES *page_res) |
|
void | script_pos_pass (PAGE_RES *page_res) |
|
int | RetryWithLanguage (const WordData &word_data, WordRecognizer recognizer, bool debug, WERD_RES **in_word, PointerVector< WERD_RES > *best_words) |
|
bool | ReassignDiacritics (int pass, PAGE_RES_IT *pr_it, bool *make_next_word_fuzzy) |
|
void | AssignDiacriticsToOverlappingBlobs (const GenericVector< C_OUTLINE *> &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< bool > *overlapped_any_blob, GenericVector< C_BLOB *> *target_blobs) |
|
void | AssignDiacriticsToNewBlobs (const GenericVector< C_OUTLINE *> &outlines, int pass, WERD *real_word, PAGE_RES_IT *pr_it, GenericVector< bool > *word_wanted, GenericVector< C_BLOB *> *target_blobs) |
|
bool | SelectGoodDiacriticOutlines (int pass, float certainty_threshold, PAGE_RES_IT *pr_it, C_BLOB *blob, const GenericVector< C_OUTLINE *> &outlines, int num_outlines, GenericVector< bool > *ok_outlines) |
|
float | ClassifyBlobPlusOutlines (const GenericVector< bool > &ok_outlines, const GenericVector< C_OUTLINE *> &outlines, int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str) |
|
float | ClassifyBlobAsWord (int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str, float *c2) |
|
void | classify_word_and_language (int pass_n, PAGE_RES_IT *pr_it, WordData *word_data) |
|
void | classify_word_pass1 (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) |
|
void | recog_pseudo_word (PAGE_RES *page_res, TBOX &selection_box) |
|
void | fix_rep_char (PAGE_RES_IT *page_res_it) |
|
ACCEPTABLE_WERD_TYPE | acceptable_word_string (const UNICHARSET &char_set, const char *s, const char *lengths) |
|
void | match_word_pass_n (int pass_n, WERD_RES *word, ROW *row, BLOCK *block) |
|
void | classify_word_pass2 (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) |
|
void | ReportXhtFixResult (bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word) |
|
bool | RunOldFixXht (WERD_RES *word, BLOCK *block, ROW *row) |
|
bool | TrainedXheightFix (WERD_RES *word, BLOCK *block, ROW *row) |
|
bool | TestNewNormalization (int original_misfits, float baseline_shift, float new_x_ht, WERD_RES *word, BLOCK *block, ROW *row) |
|
bool | recog_interactive (PAGE_RES_IT *pr_it) |
|
void | set_word_fonts (WERD_RES *word) |
|
void | font_recognition_pass (PAGE_RES *page_res) |
|
void | dictionary_correction_pass (PAGE_RES *page_res) |
|
bool | check_debug_pt (WERD_RES *word, int location) |
|
bool | SubAndSuperscriptFix (WERD_RES *word_res) |
|
void | GetSubAndSuperscriptCandidates (const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold) |
|
WERD_RES * | TrySuperscriptSplits (int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing) |
|
bool | BelievableSuperscript (bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const |
|
void | output_pass (PAGE_RES_IT &page_res_it, const TBOX *target_word_box) |
|
void | write_results (PAGE_RES_IT &page_res_it, char newline_type, bool force_eol) |
|
void | set_unlv_suspects (WERD_RES *word) |
|
UNICHAR_ID | get_rep_char (WERD_RES *word) |
|
bool | acceptable_number_string (const char *s, const char *lengths) |
|
int16_t | count_alphanums (const WERD_CHOICE &word) |
|
int16_t | count_alphas (const WERD_CHOICE &word) |
|
void | read_config_file (const char *filename, SetParamConstraint constraint) |
|
int | init_tesseract (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr) |
|
int | init_tesseract (const char *datapath, const char *language, OcrEngineMode oem) |
|
int | init_tesseract_internal (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr) |
|
void | SetupUniversalFontIds () |
|
int | init_tesseract_lm (const char *arg0, const char *textbase, const char *language, TessdataManager *mgr) |
|
void | recognize_page (STRING &image_name) |
|
void | end_tesseract () |
|
bool | init_tesseract_lang_data (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr) |
|
void | ParseLanguageString (const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load) |
|
SVMenuNode * | build_menu_new () |
|
void | pgeditor_main (int width, int height, PAGE_RES *page_res) |
|
void | process_image_event (const SVEvent &event) |
|
bool | process_cmd_win_event (int32_t cmd_event, char *new_value) |
|
void | debug_word (PAGE_RES *page_res, const TBOX &selection_box) |
|
void | do_re_display (bool(tesseract::Tesseract::*word_painter)(PAGE_RES_IT *pr_it)) |
|
bool | word_display (PAGE_RES_IT *pr_it) |
|
bool | word_bln_display (PAGE_RES_IT *pr_it) |
|
bool | word_blank_and_set_display (PAGE_RES_IT *pr_its) |
|
bool | word_set_display (PAGE_RES_IT *pr_it) |
|
bool | word_dumper (PAGE_RES_IT *pr_it) |
|
void | blob_feature_display (PAGE_RES *page_res, const TBOX &selection_box) |
|
void | make_reject_map (WERD_RES *word, ROW *row, int16_t pass) |
|
bool | one_ell_conflict (WERD_RES *word_res, bool update_map) |
|
int16_t | first_alphanum_index (const char *word, const char *word_lengths) |
|
int16_t | first_alphanum_offset (const char *word, const char *word_lengths) |
|
int16_t | alpha_count (const char *word, const char *word_lengths) |
|
bool | word_contains_non_1_digit (const char *word, const char *word_lengths) |
|
void | dont_allow_1Il (WERD_RES *word) |
|
int16_t | count_alphanums (WERD_RES *word) |
|
void | flip_0O (WERD_RES *word) |
|
bool | non_0_digit (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) |
|
bool | non_O_upper (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) |
|
bool | repeated_nonalphanum_wd (WERD_RES *word, ROW *row) |
|
void | nn_match_word (WERD_RES *word, ROW *row) |
|
void | nn_recover_rejects (WERD_RES *word, ROW *row) |
|
void | set_done (WERD_RES *word, int16_t pass) |
|
int16_t | safe_dict_word (const WERD_RES *werd_res) |
|
void | flip_hyphens (WERD_RES *word) |
|
void | reject_I_1_L (WERD_RES *word) |
|
void | reject_edge_blobs (WERD_RES *word) |
|
void | reject_mostly_rejects (WERD_RES *word) |
|
bool | word_adaptable (WERD_RES *word, uint16_t mode) |
|
void | recog_word_recursive (WERD_RES *word) |
|
void | recog_word (WERD_RES *word) |
|
void | split_and_recog_word (WERD_RES *word) |
|
void | split_word (WERD_RES *word, int split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const |
|
void | join_words (WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const |
|
void | match_current_words (WERD_RES_LIST &words, ROW *row, BLOCK *block) |
|
int16_t | fp_eval_word_spacing (WERD_RES_LIST &word_res_list) |
|
void | dump_words (WERD_RES_LIST &perm, int16_t score, int16_t mode, bool improved) |
|
bool | fixspace_thinks_word_done (WERD_RES *word) |
|
GARBAGE_LEVEL | garbage_word (WERD_RES *word, BOOL8 ok_dict_word) |
|
bool | potential_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level, bool ok_dict_word) |
|
void | tilde_crunch (PAGE_RES_IT &page_res_it) |
|
void | unrej_good_quality_words (PAGE_RES_IT &page_res_it) |
|
void | doc_and_block_rejection (PAGE_RES_IT &page_res_it, bool good_quality_doc) |
|
void | quality_based_rejection (PAGE_RES_IT &page_res_it, bool good_quality_doc) |
|
void | convert_bad_unlv_chs (WERD_RES *word_res) |
|
void | tilde_delete (PAGE_RES_IT &page_res_it) |
|
int16_t | word_blob_quality (WERD_RES *word, ROW *row) |
|
void | word_char_quality (WERD_RES *word, ROW *row, int16_t *match_count, int16_t *accepted_match_count) |
|
void | unrej_good_chs (WERD_RES *word, ROW *row) |
|
int16_t | count_outline_errs (char c, int16_t outline_count) |
|
int16_t | word_outline_errs (WERD_RES *word) |
|
bool | terrible_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level) |
|
CRUNCH_MODE | word_deletable (WERD_RES *word, int16_t &delete_mode) |
|
int16_t | failure_count (WERD_RES *word) |
|
bool | noise_outlines (TWERD *word) |
|
void | tess_segment_pass_n (int pass_n, WERD_RES *word) |
|
PAGE_RES * | ApplyBoxes (const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list) |
|
void | PreenXHeights (BLOCK_LIST *block_list) |
|
PAGE_RES * | SetupApplyBoxes (const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list) |
|
void | MaximallyChopWord (const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res) |
|
bool | ResegmentCharBox (PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, const TBOX *next_box, const char *correct_text) |
|
bool | ResegmentWordBox (BLOCK_LIST *block_list, const TBOX &box, const TBOX *next_box, const char *correct_text) |
|
void | ReSegmentByClassification (PAGE_RES *page_res) |
|
bool | ConvertStringToUnichars (const char *utf8, GenericVector< UNICHAR_ID > *class_ids) |
|
bool | FindSegmentation (const GenericVector< UNICHAR_ID > &target_text, WERD_RES *word_res) |
|
void | SearchForText (const GenericVector< BLOB_CHOICE_LIST *> *choices, int choices_pos, int choices_length, const GenericVector< UNICHAR_ID > &target_text, int text_index, float rating, GenericVector< int > *segmentation, float *best_rating, GenericVector< int > *best_segmentation) |
|
void | TidyUp (PAGE_RES *page_res) |
|
void | ReportFailedBox (int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg) |
|
void | CorrectClassifyWords (PAGE_RES *page_res) |
|
void | ApplyBoxTraining (const STRING &fontname, PAGE_RES *page_res) |
|
int | CountMisfitTops (WERD_RES *word_res) |
|
float | ComputeCompatibleXheight (WERD_RES *word_res, float *baseline_shift) |
|
| BOOL_VAR_H (tessedit_resegment_from_boxes, false, "Take segmentation and labeling from box file") |
|
| BOOL_VAR_H (tessedit_resegment_from_line_boxes, false, "Conversion of word/line box file to char box file") |
|
| BOOL_VAR_H (tessedit_train_from_boxes, false, "Generate training data from boxed chars") |
|
| BOOL_VAR_H (tessedit_make_boxes_from_boxes, false, "Generate more boxes from boxed chars") |
|
| BOOL_VAR_H (tessedit_train_line_recognizer, false, "Break input into lines and remap boxes if present") |
|
| BOOL_VAR_H (tessedit_dump_pageseg_images, false, "Dump intermediate images made during page segmentation") |
|
| INT_VAR_H (tessedit_pageseg_mode, PSM_SINGLE_BLOCK, "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," " 5=line, 6=word, 7=char" " (Values from PageSegMode enum in publictypes.h)") |
|
| INT_VAR_H (tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT, "Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults" " to loading and running the most accurate available.") |
|
| STRING_VAR_H (tessedit_char_blacklist, "", "Blacklist of chars not to recognize") |
|
| STRING_VAR_H (tessedit_char_whitelist, "", "Whitelist of chars to recognize") |
|
| STRING_VAR_H (tessedit_char_unblacklist, "", "List of chars to override tessedit_char_blacklist") |
|
| BOOL_VAR_H (tessedit_ambigs_training, false, "Perform training for ambiguities") |
|
| INT_VAR_H (pageseg_devanagari_split_strategy, tesseract::ShiroRekhaSplitter::NO_SPLIT, "Whether to use the top-line splitting process for Devanagari " "documents while performing page-segmentation.") |
|
| INT_VAR_H (ocr_devanagari_split_strategy, tesseract::ShiroRekhaSplitter::NO_SPLIT, "Whether to use the top-line splitting process for Devanagari " "documents while performing ocr.") |
|
| STRING_VAR_H (tessedit_write_params_to_file, "", "Write all parameters to the given file.") |
|
| BOOL_VAR_H (tessedit_adaption_debug, false, "Generate and print debug information for adaption") |
|
| INT_VAR_H (bidi_debug, 0, "Debug level for BiDi") |
|
| INT_VAR_H (applybox_debug, 1, "Debug level") |
|
| INT_VAR_H (applybox_page, 0, "Page number to apply boxes from") |
|
| STRING_VAR_H (applybox_exposure_pattern, ".exp", "Exposure value follows this pattern in the image" " filename. The name of the image files are expected" " to be in the form [lang].[fontname].exp[num].tif") |
|
| BOOL_VAR_H (applybox_learn_chars_and_char_frags_mode, false, "Learn both character fragments (as is done in the" " special low exposure mode) as well as unfragmented" " characters.") |
|
| BOOL_VAR_H (applybox_learn_ngrams_mode, false, "Each bounding box is assumed to contain ngrams. Only" " learn the ngrams whose outlines overlap horizontally.") |
|
| BOOL_VAR_H (tessedit_display_outwords, false, "Draw output words") |
|
| BOOL_VAR_H (tessedit_dump_choices, false, "Dump char choices") |
|
| BOOL_VAR_H (tessedit_timing_debug, false, "Print timing stats") |
|
| BOOL_VAR_H (tessedit_fix_fuzzy_spaces, true, "Try to improve fuzzy spaces") |
|
| BOOL_VAR_H (tessedit_unrej_any_wd, false, "Don't bother with word plausibility") |
|
| BOOL_VAR_H (tessedit_fix_hyphens, true, "Crunch double hyphens?") |
|
| BOOL_VAR_H (tessedit_redo_xheight, true, "Check/Correct x-height") |
|
| BOOL_VAR_H (tessedit_enable_doc_dict, true, "Add words to the document dictionary") |
|
| BOOL_VAR_H (tessedit_debug_fonts, false, "Output font info per char") |
|
| BOOL_VAR_H (tessedit_debug_block_rejection, false, "Block and Row stats") |
|
| BOOL_VAR_H (tessedit_enable_bigram_correction, true, "Enable correction based on the word bigram dictionary.") |
|
| BOOL_VAR_H (tessedit_enable_dict_correction, false, "Enable single word correction based on the dictionary.") |
|
| INT_VAR_H (tessedit_bigram_debug, 0, "Amount of debug output for bigram " "correction.") |
|
| BOOL_VAR_H (enable_noise_removal, true, "Remove and conditionally reassign small outlines when they" " confuse layout analysis, determining diacritics vs noise") |
|
| INT_VAR_H (debug_noise_removal, 0, "Debug reassignment of small outlines") |
|
| double_VAR_H (noise_cert_basechar, -8.0, "Hingepoint for base char certainty") |
|
| double_VAR_H (noise_cert_disjoint, -2.5, "Hingepoint for disjoint certainty") |
|
| double_VAR_H (noise_cert_punc, -2.5, "Threshold for new punc char certainty") |
|
| double_VAR_H (noise_cert_factor, 0.375, "Scaling on certainty diff from Hingepoint") |
|
| INT_VAR_H (noise_maxperblob, 8, "Max diacritics to apply to a blob") |
|
| INT_VAR_H (noise_maxperword, 16, "Max diacritics to apply to a word") |
|
| INT_VAR_H (debug_x_ht_level, 0, "Reestimate debug") |
|
| BOOL_VAR_H (debug_acceptable_wds, false, "Dump word pass/fail chk") |
|
| STRING_VAR_H (chs_leading_punct, "('`\, "Leading punctuation") |
|
| STRING_VAR_H (chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation") |
|
| STRING_VAR_H (chs_trailing_punct2, ")'`\, "2nd Trailing punctuation") |
|
| double_VAR_H (quality_rej_pc, 0.08, "good_quality_doc lte rejection limit") |
|
| double_VAR_H (quality_blob_pc, 0.0, "good_quality_doc gte good blobs limit") |
|
| double_VAR_H (quality_outline_pc, 1.0, "good_quality_doc lte outline error limit") |
|
| double_VAR_H (quality_char_pc, 0.95, "good_quality_doc gte good char limit") |
|
| INT_VAR_H (quality_min_initial_alphas_reqd, 2, "alphas in a good word") |
|
| INT_VAR_H (tessedit_tess_adaption_mode, 0x27, "Adaptation decision algorithm for tess") |
|
| BOOL_VAR_H (tessedit_minimal_rej_pass1, false, "Do minimal rejection on pass 1 output") |
|
| BOOL_VAR_H (tessedit_test_adaption, false, "Test adaption criteria") |
|
| BOOL_VAR_H (tessedit_matcher_log, false, "Log matcher activity") |
|
| INT_VAR_H (tessedit_test_adaption_mode, 3, "Adaptation decision algorithm for tess") |
|
| BOOL_VAR_H (test_pt, false, "Test for point") |
|
| double_VAR_H (test_pt_x, 99999.99, "xcoord") |
|
| double_VAR_H (test_pt_y, 99999.99, "ycoord") |
|
| INT_VAR_H (multilang_debug_level, 0, "Print multilang debug info.") |
|
| INT_VAR_H (paragraph_debug_level, 0, "Print paragraph debug info.") |
|
| BOOL_VAR_H (paragraph_text_based, true, "Run paragraph detection on the post-text-recognition " "(more accurate)") |
|
| BOOL_VAR_H (lstm_use_matrix, 1, "Use ratings matrix/beam searct with lstm") |
|
| STRING_VAR_H (outlines_odd, "%| ", "Non standard number of outlines") |
|
| STRING_VAR_H (outlines_2, "ij!?%\;", "Non standard number of outlines") |
|
| BOOL_VAR_H (docqual_excuse_outline_errs, false, "Allow outline errs in unrejection?") |
|
| BOOL_VAR_H (tessedit_good_quality_unrej, true, "Reduce rejection on good docs") |
|
| BOOL_VAR_H (tessedit_use_reject_spaces, true, "Reject spaces?") |
|
| double_VAR_H (tessedit_reject_doc_percent, 65.00, "%rej allowed before rej whole doc") |
|
| double_VAR_H (tessedit_reject_block_percent, 45.00, "%rej allowed before rej whole block") |
|
| double_VAR_H (tessedit_reject_row_percent, 40.00, "%rej allowed before rej whole row") |
|
| double_VAR_H (tessedit_whole_wd_rej_row_percent, 70.00, "Number of row rejects in whole word rejects" "which prevents whole row rejection") |
|
| BOOL_VAR_H (tessedit_preserve_blk_rej_perfect_wds, true, "Only rej partially rejected words in block rejection") |
|
| BOOL_VAR_H (tessedit_preserve_row_rej_perfect_wds, true, "Only rej partially rejected words in row rejection") |
|
| BOOL_VAR_H (tessedit_dont_blkrej_good_wds, false, "Use word segmentation quality metric") |
|
| BOOL_VAR_H (tessedit_dont_rowrej_good_wds, false, "Use word segmentation quality metric") |
|
| INT_VAR_H (tessedit_preserve_min_wd_len, 2, "Only preserve wds longer than this") |
|
| BOOL_VAR_H (tessedit_row_rej_good_docs, true, "Apply row rejection to good docs") |
|
| double_VAR_H (tessedit_good_doc_still_rowrej_wd, 1.1, "rej good doc wd if more than this fraction rejected") |
|
| BOOL_VAR_H (tessedit_reject_bad_qual_wds, true, "Reject all bad quality wds") |
|
| BOOL_VAR_H (tessedit_debug_doc_rejection, false, "Page stats") |
|
| BOOL_VAR_H (tessedit_debug_quality_metrics, false, "Output data to debug file") |
|
| BOOL_VAR_H (bland_unrej, false, "unrej potential with no checks") |
|
| double_VAR_H (quality_rowrej_pc, 1.1, "good_quality_doc gte good char limit") |
|
| BOOL_VAR_H (unlv_tilde_crunching, false, "Mark v.bad words for tilde crunch") |
|
| BOOL_VAR_H (hocr_font_info, false, "Add font info to hocr output") |
|
| BOOL_VAR_H (crunch_early_merge_tess_fails, true, "Before word crunch?") |
|
| BOOL_VAR_H (crunch_early_convert_bad_unlv_chs, false, "Take out ~^ early?") |
|
| double_VAR_H (crunch_terrible_rating, 80.0, "crunch rating lt this") |
|
| BOOL_VAR_H (crunch_terrible_garbage, true, "As it says") |
|
| double_VAR_H (crunch_poor_garbage_cert, -9.0, "crunch garbage cert lt this") |
|
| double_VAR_H (crunch_poor_garbage_rate, 60, "crunch garbage rating lt this") |
|
| double_VAR_H (crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this") |
|
| double_VAR_H (crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this") |
|
| BOOL_VAR_H (crunch_pot_garbage, true, "POTENTIAL crunch garbage") |
|
| double_VAR_H (crunch_del_rating, 60, "POTENTIAL crunch rating lt this") |
|
| double_VAR_H (crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this") |
|
| double_VAR_H (crunch_del_min_ht, 0.7, "Del if word ht lt xht x this") |
|
| double_VAR_H (crunch_del_max_ht, 3.0, "Del if word ht gt xht x this") |
|
| double_VAR_H (crunch_del_min_width, 3.0, "Del if word width lt xht x this") |
|
| double_VAR_H (crunch_del_high_word, 1.5, "Del if word gt xht x this above bl") |
|
| double_VAR_H (crunch_del_low_word, 0.5, "Del if word gt xht x this below bl") |
|
| double_VAR_H (crunch_small_outlines_size, 0.6, "Small if lt xht x this") |
|
| INT_VAR_H (crunch_rating_max, 10, "For adj length in rating per ch") |
|
| INT_VAR_H (crunch_pot_indicators, 1, "How many potential indicators needed") |
|
| BOOL_VAR_H (crunch_leave_ok_strings, true, "Don't touch sensible strings") |
|
| BOOL_VAR_H (crunch_accept_ok, true, "Use acceptability in okstring") |
|
| BOOL_VAR_H (crunch_leave_accept_strings, false, "Don't pot crunch sensible strings") |
|
| BOOL_VAR_H (crunch_include_numerals, false, "Fiddle alpha figures") |
|
| INT_VAR_H (crunch_leave_lc_strings, 4, "Don't crunch words with long lower case strings") |
|
| INT_VAR_H (crunch_leave_uc_strings, 4, "Don't crunch words with long lower case strings") |
|
| INT_VAR_H (crunch_long_repetitions, 3, "Crunch words with long repetitions") |
|
| INT_VAR_H (crunch_debug, 0, "As it says") |
|
| INT_VAR_H (fixsp_non_noise_limit, 1, "How many non-noise blbs either side?") |
|
| double_VAR_H (fixsp_small_outlines_size, 0.28, "Small if lt xht x this") |
|
| BOOL_VAR_H (tessedit_prefer_joined_punct, false, "Reward punctuation joins") |
|
| INT_VAR_H (fixsp_done_mode, 1, "What constitues done for spacing") |
|
| INT_VAR_H (debug_fix_space_level, 0, "Contextual fixspace debug") |
|
| STRING_VAR_H (numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers") |
|
| INT_VAR_H (x_ht_acceptance_tolerance, 8, "Max allowed deviation of blob top outside of font data") |
|
| INT_VAR_H (x_ht_min_change, 8, "Min change in xht before actually trying it") |
|
| INT_VAR_H (superscript_debug, 0, "Debug level for sub & superscript fixer") |
|
| double_VAR_H (superscript_worse_certainty, 2.0, "How many times worse " "certainty does a superscript position glyph need to be for us " "to try classifying it as a char with a different baseline?") |
|
| double_VAR_H (superscript_bettered_certainty, 0.97, "What reduction in " "badness do we think sufficient to choose a superscript over " "what we'd thought. For example, a value of 0.6 means we want " "to reduce badness of certainty by 40%") |
|
| double_VAR_H (superscript_scaledown_ratio, 0.4, "A superscript scaled down more than this is unbelievably " "small. For example, 0.3 means we expect the font size to " "be no smaller than 30% of the text line font size.") |
|
| double_VAR_H (subscript_max_y_top, 0.5, "Maximum top of a character measured as a multiple of x-height " "above the baseline for us to reconsider whether it's a " "subscript.") |
|
| double_VAR_H (superscript_min_y_bottom, 0.3, "Minimum bottom of a character measured as a multiple of " "x-height above the baseline for us to reconsider whether it's " "a superscript.") |
|
| BOOL_VAR_H (tessedit_write_block_separators, false, "Write block separators in output") |
|
| BOOL_VAR_H (tessedit_write_rep_codes, false, "Write repetition char code") |
|
| BOOL_VAR_H (tessedit_write_unlv, false, "Write .unlv output file") |
|
| BOOL_VAR_H (tessedit_create_txt, false, "Write .txt output file") |
|
| BOOL_VAR_H (tessedit_create_hocr, false, "Write .html hOCR output file") |
|
| BOOL_VAR_H (tessedit_create_tsv, false, "Write .tsv output file") |
|
| BOOL_VAR_H (tessedit_create_pdf, false, "Write .pdf output file") |
|
| BOOL_VAR_H (textonly_pdf, false, "Create PDF with only one invisible text layer") |
|
| INT_VAR_H (jpg_quality, 85, "Set JPEG quality level") |
|
| INT_VAR_H (user_defined_dpi, 0, "Specify DPI for input image") |
|
| INT_VAR_H (min_characters_to_try, 50, "Specify minimum characters to try during OSD") |
|
| STRING_VAR_H (unrecognised_char, "|", "Output char for unidentified blobs") |
|
| INT_VAR_H (suspect_level, 99, "Suspect marker level") |
|
| INT_VAR_H (suspect_space_level, 100, "Min suspect level for rejecting spaces") |
|
| INT_VAR_H (suspect_short_words, 2, "Don't Suspect dict wds longer than this") |
|
| BOOL_VAR_H (suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected") |
|
| double_VAR_H (suspect_rating_per_ch, 999.9, "Don't touch bad rating limit") |
|
| double_VAR_H (suspect_accept_rating, -999.9, "Accept good rating limit") |
|
| BOOL_VAR_H (tessedit_minimal_rejection, false, "Only reject tess failures") |
|
| BOOL_VAR_H (tessedit_zero_rejection, false, "Don't reject ANYTHING") |
|
| BOOL_VAR_H (tessedit_word_for_word, false, "Make output have exactly one word per WERD") |
|
| BOOL_VAR_H (tessedit_zero_kelvin_rejection, false, "Don't reject ANYTHING AT ALL") |
|
| BOOL_VAR_H (tessedit_consistent_reps, true, "Force all rep chars the same") |
|
| INT_VAR_H (tessedit_reject_mode, 0, "Rejection algorithm") |
|
| BOOL_VAR_H (tessedit_rejection_debug, false, "Adaption debug") |
|
| BOOL_VAR_H (tessedit_flip_0O, true, "Contextual 0O O0 flips") |
|
| double_VAR_H (tessedit_lower_flip_hyphen, 1.5, "Aspect ratio dot/hyphen test") |
|
| double_VAR_H (tessedit_upper_flip_hyphen, 1.8, "Aspect ratio dot/hyphen test") |
|
| BOOL_VAR_H (rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector") |
|
| BOOL_VAR_H (rej_1Il_use_dict_word, false, "Use dictword test") |
|
| BOOL_VAR_H (rej_1Il_trust_permuter_type, true, "Don't double check") |
|
| BOOL_VAR_H (rej_use_tess_accepted, true, "Individual rejection control") |
|
| BOOL_VAR_H (rej_use_tess_blanks, true, "Individual rejection control") |
|
| BOOL_VAR_H (rej_use_good_perm, true, "Individual rejection control") |
|
| BOOL_VAR_H (rej_use_sensible_wd, false, "Extend permuter check") |
|
| BOOL_VAR_H (rej_alphas_in_number_perm, false, "Extend permuter check") |
|
| double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract") |
|
| INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit") |
|
| STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\5", "Allow NN to unrej") |
|
| STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set") |
|
| INT_VAR_H (min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this") |
|
| BOOL_VAR_H (tessedit_create_boxfile, false, "Output text with boxes") |
|
| INT_VAR_H (tessedit_page_number, -1, "-1 -> All pages, else specific page to process") |
|
| BOOL_VAR_H (tessedit_write_images, false, "Capture the image from the IPE") |
|
| BOOL_VAR_H (interactive_display_mode, false, "Run interactively?") |
|
| STRING_VAR_H (file_type, ".tif", "Filename extension") |
|
| BOOL_VAR_H (tessedit_override_permuter, true, "According to dict_word") |
|
| STRING_VAR_H (tessedit_load_sublangs, "", "List of languages to load with this one") |
|
| BOOL_VAR_H (tessedit_use_primary_params_model, false, "In multilingual mode use params model of the primary language") |
|
| double_VAR_H (min_orientation_margin, 7.0, "Min acceptable orientation margin") |
|
| BOOL_VAR_H (textord_tabfind_show_vlines, false, "Debug line finding") |
|
| BOOL_VAR_H (textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model") |
|
| BOOL_VAR_H (poly_allow_detailed_fx, false, "Allow feature extractors to see the original outline") |
|
| BOOL_VAR_H (tessedit_init_config_only, false, "Only initialize with the config file. Useful if the instance is " "not going to be used for OCR but say only for layout analysis.") |
|
| BOOL_VAR_H (textord_equation_detect, false, "Turn on equation detector") |
|
| BOOL_VAR_H (textord_tabfind_vertical_text, true, "Enable vertical detection") |
|
| BOOL_VAR_H (textord_tabfind_force_vertical_text, false, "Force using vertical text page mode") |
|
| double_VAR_H (textord_tabfind_vertical_text_ratio, 0.5, "Fraction of textlines deemed vertical to use vertical page " "mode") |
|
| double_VAR_H (textord_tabfind_aligned_gap_fraction, 0.75, "Fraction of height used as a minimum gap for aligned blobs.") |
|
| INT_VAR_H (tessedit_parallelize, 0, "Run in parallel where possible") |
|
| BOOL_VAR_H (preserve_interword_spaces, false, "Preserve multiple interword spaces") |
|
| STRING_VAR_H (page_separator, "\, "Page separator(default is form feed control character)") |
|
| INT_VAR_H (lstm_choice_mode, 0, "Allows to include alternative symbols choices in the hOCR output. " "Valid input values are 0, 1 and 2. 0 is the default value. " "With 1 the alternative symbol choices per timestep are included. " "With 2 the alternative symbol choices are accumulated per character.") |
|
FILE * | init_recog_training (const STRING &fname) |
|
void | recog_training_segmented (const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) |
|
void | ambigs_classify_and_output (const char *label, PAGE_RES_IT *pr_it, FILE *output_file) |
|
|
The basic measure is the number of characters in contextually confirmed words. (I.e the word is done) If all words are contextually confirmed the evaluation is deemed perfect.
Some fiddles are done to handle "1"s as these are VERY frequent causes of fuzzy spaces. The problem with the basic measure is that "561 63" would score the same as "56163", though given our knowledge that the space is fuzzy, and that there is a "1" next to the fuzzy space, we need to ensure that "56163" is preferred.
The solution is to NOT COUNT the score of any word which has a digit at one end and a "1Il" as the character the other side of the space.
Conversely, any character next to a "1" within a word is counted as a positive score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of the "1" joined). "56163" would score 7 - all chars in a numeric word + 2 sides of a "1" joined.
The joined 1 rule is applied to any word REGARDLESS of contextual confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2.
|
bool | digit_or_numeric_punct (WERD_RES *word, int char_position) |
|
int16_t | eval_word_spacing (WERD_RES_LIST &word_res_list) |
|
|
Test the current word to see if it can be split by deleting noise blobs. If so, do the business. Return with the iterator pointing to the same place if the word is unchanged, or the last of the replacement words.
|
void | fix_noisy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) |
|
void | fix_sp_fp_word (WERD_RES_IT &word_res_it, ROW *row, BLOCK *block) |
|
int16_t | worst_noise_blob (WERD_RES *word_res, float *worst_noise_score) |
|
float | blob_noise_score (TBLOB *blob) |
|
void | break_noisiest_blob_word (WERD_RES_LIST &words) |
|
|
Walk over the page finding sequences of words joined by fuzzy spaces. Extract them as a sublist, process the sublist to find the optimal arrangement of spaces then replace the sublist in the ROW_RES.
- Parameters
-
| monitor | progress monitor |
| word_count | count of words in doc |
[out] | page_res | |
|
void | fix_fuzzy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) |
|
void | fix_fuzzy_spaces (ETEXT_DESC *monitor, int32_t word_count, PAGE_RES *page_res) |
|
|
Walk the current block list applying the specified word processor function to each word that overlaps the selection_box.
|
void | process_selected_words (PAGE_RES *page_res, TBOX &selection_box, bool(tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it)) |
|
|
Add the given word to the document dictionary
|
void | tess_add_doc_word (WERD_CHOICE *word_choice) |
|
|
- Returns
- true if the word is regarded as "good enough".
- Parameters
-
word_choice | after context |
raw_choice | before context |
|
bool | tess_acceptable_word (WERD_RES *word) |
|
| BOOL_VAR_H (merge_fragments_in_matrix, TRUE, "Merge the fragments in the ratings matrix and delete them " "after merging") |
|
| BOOL_VAR_H (wordrec_no_block, FALSE, "Don't output block information") |
|
| BOOL_VAR_H (wordrec_enable_assoc, TRUE, "Associator Enable") |
|
| BOOL_VAR_H (force_word_assoc, FALSE, "force associator to run regardless of what enable_assoc is." "This is used for CJK where component grouping is necessary.") |
|
| double_VAR_H (wordrec_worst_state, 1, "Worst segmentation state") |
|
| BOOL_VAR_H (fragments_guide_chopper, FALSE, "Use information from fragments to guide chopping process") |
|
| INT_VAR_H (repair_unchopped_blobs, 1, "Fix blobs that aren't chopped") |
|
| double_VAR_H (tessedit_certainty_threshold, -2.25, "Good blob limit") |
|
| INT_VAR_H (chop_debug, 0, "Chop debug") |
|
| BOOL_VAR_H (chop_enable, 1, "Chop enable") |
|
| BOOL_VAR_H (chop_vertical_creep, 0, "Vertical creep") |
|
| INT_VAR_H (chop_split_length, 10000, "Split Length") |
|
| INT_VAR_H (chop_same_distance, 2, "Same distance") |
|
| INT_VAR_H (chop_min_outline_points, 6, "Min Number of Points on Outline") |
|
| INT_VAR_H (chop_seam_pile_size, 150, "Max number of seams in seam_pile") |
|
| BOOL_VAR_H (chop_new_seam_pile, 1, "Use new seam_pile") |
|
| INT_VAR_H (chop_inside_angle, -50, "Min Inside Angle Bend") |
|
| INT_VAR_H (chop_min_outline_area, 2000, "Min Outline Area") |
|
| double_VAR_H (chop_split_dist_knob, 0.5, "Split length adjustment") |
|
| double_VAR_H (chop_overlap_knob, 0.9, "Split overlap adjustment") |
|
| double_VAR_H (chop_center_knob, 0.15, "Split center adjustment") |
|
| INT_VAR_H (chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center.") |
|
| double_VAR_H (chop_sharpness_knob, 0.06, "Split sharpness adjustment") |
|
| double_VAR_H (chop_width_change_knob, 5.0, "Width change adjustment") |
|
| double_VAR_H (chop_ok_split, 100.0, "OK split limit") |
|
| double_VAR_H (chop_good_split, 50.0, "Good split limit") |
|
| INT_VAR_H (chop_x_y_weight, 3, "X / Y length weight") |
|
| INT_VAR_H (segment_adjust_debug, 0, "Segmentation adjustment debug") |
|
| BOOL_VAR_H (assume_fixed_pitch_char_segment, FALSE, "include fixed-pitch heuristics in char segmentation") |
|
| INT_VAR_H (wordrec_debug_level, 0, "Debug level for wordrec") |
|
| INT_VAR_H (wordrec_max_join_chunks, 4, "Max number of broken pieces to associate") |
|
| BOOL_VAR_H (wordrec_skip_no_truth_words, false, "Only run OCR for words that had truth recorded in BlamerBundle") |
|
| BOOL_VAR_H (wordrec_debug_blamer, false, "Print blamer debug messages") |
|
| BOOL_VAR_H (wordrec_run_blamer, false, "Try to set the blame for errors") |
|
| INT_VAR_H (segsearch_debug_level, 0, "SegSearch debug level") |
|
| INT_VAR_H (segsearch_max_pain_points, 2000, "Maximum number of pain points stored in the queue") |
|
| INT_VAR_H (segsearch_max_futile_classifications, 10, "Maximum number of pain point classifications per word.") |
|
| double_VAR_H (segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio") |
|
| BOOL_VAR_H (save_alt_choices, true, "Save alternative paths found during chopping " "and segmentation search") |
|
| Wordrec () |
|
virtual | ~Wordrec ()=default |
|
void | SaveAltChoices (const LIST &best_choices, WERD_RES *word) |
|
void | FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
|
void | CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
|
void | SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) |
|
void | InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) |
|
void | DoSegSearch (WERD_RES *word_res) |
|
void | add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams) |
|
void | choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) |
|
void | combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue) |
|
SEAM * | pick_good_seam (TBLOB *blob) |
|
void | try_point_pairs (EDGEPT *points[50], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) |
|
void | try_vertical_splits (EDGEPT *points[50], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob) |
|
PRIORITY | grade_split_length (SPLIT *split) |
|
PRIORITY | grade_sharpness (SPLIT *split) |
|
bool | near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt) |
|
virtual BLOB_CHOICE_LIST * | classify_piece (const GenericVector< SEAM *> &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle) |
|
void | merge_fragments (MATRIX *ratings, int16_t num_blobs) |
|
void | get_fragment_lists (int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists) |
|
void | merge_and_put_fragment_lists (int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) |
|
void | fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) |
|
void | program_editup (const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict) |
|
void | cc_recog (WERD_RES *word) |
|
void | program_editdown (int32_t elasped_time) |
|
void | set_pass1 () |
|
void | set_pass2 () |
|
int | end_recog () |
|
BLOB_CHOICE_LIST * | call_matcher (TBLOB *blob) |
|
int | dict_word (const WERD_CHOICE &word) |
|
BLOB_CHOICE_LIST * | classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle) |
|
PRIORITY | point_priority (EDGEPT *point) |
|
void | add_point_to_list (PointHeap *point_heap, EDGEPT *point) |
|
bool | is_inside_angle (EDGEPT *pt) |
|
int | angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) |
|
EDGEPT * | pick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) |
|
void | prioritize_points (TESSLINE *outline, PointHeap *points) |
|
void | new_min_point (EDGEPT *local_min, PointHeap *points) |
|
void | new_max_point (EDGEPT *local_max, PointHeap *points) |
|
void | vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points) |
|
SEAM * | attempt_blob_chop (TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams) |
|
SEAM * | chop_numbered_blob (TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams) |
|
SEAM * | chop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) |
|
SEAM * | improve_one_blob (const GenericVector< BLOB_CHOICE *> &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number) |
|
SEAM * | chop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE *> &blob_choices, WERD_RES *word_res, int *blob_number) |
|
void | chop_word_main (WERD_RES *word) |
|
void | improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending) |
|
int | select_blob_to_split (const GenericVector< BLOB_CHOICE *> &blob_choices, float rating_ceiling, bool split_next_to_fragment) |
|
int | select_blob_to_split_from_fixpt (DANGERR *fixpt) |
|
| Classify () |
|
virtual | ~Classify () |
|
const ShapeTable * | shape_table () const |
|
void | SetStaticClassifier (ShapeClassifier *static_classifier) |
|
void | AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices) |
|
bool | LargeSpeckle (const TBLOB &blob) |
|
ADAPT_TEMPLATES | NewAdaptedTemplates (bool InitFromUnicharset) |
|
int | GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId) |
|
int | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results) |
|
void | ReadNewCutoffs (TFile *fp, CLASS_CUTOFF_ARRAY Cutoffs) |
|
void | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
|
void | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
|
ADAPT_TEMPLATES | ReadAdaptedTemplates (TFile *File) |
|
float | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch) |
|
void | FreeNormProtos () |
|
NORM_PROTOS * | ReadNormProtos (TFile *fp) |
|
void | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) |
|
INT_TEMPLATES | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) |
|
void | LearnWord (const char *fontname, WERD_RES *word) |
|
void | LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) |
|
void | InitAdaptiveClassifier (TessdataManager *mgr) |
|
void | InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) |
|
void | AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results) |
|
void | MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results) |
|
void | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results) |
|
double | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors) |
|
void | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) |
|
void | AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results) |
|
int | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) |
|
void | DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results) |
|
PROTO_ID | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) |
|
int | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) |
|
void | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob) |
|
void | PrintAdaptiveMatchResults (const ADAPT_RESULTS &results) |
|
void | RemoveExtraPuncs (ADAPT_RESULTS *Results) |
|
void | RemoveBadMatches (ADAPT_RESULTS *Results) |
|
void | SetAdaptiveThreshold (float Threshold) |
|
void | ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features) |
|
STRING | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const |
|
int | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const |
|
int | ShapeIDToClassID (int shape_id) const |
|
UNICHAR_ID * | BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) |
|
int | CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results) |
|
int | CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results) |
|
UNICHAR_ID * | GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass) |
|
void | DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results) |
|
void | AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates) |
|
void | DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class) |
|
bool | AdaptableWord (WERD_RES *word) |
|
void | EndAdaptiveClassifier () |
|
void | SettupPass1 () |
|
void | SettupPass2 () |
|
void | AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices) |
|
void | ClassifyAsNoise (ADAPT_RESULTS *Results) |
|
void | ResetAdaptiveClassifierInternal () |
|
void | SwitchAdaptiveClassifier () |
|
void | StartBackupAdaptiveClassifier () |
|
int | GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array) |
|
void | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array) |
|
bool | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) |
|
void | UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob) |
|
bool | AdaptiveClassifierIsFull () const |
|
bool | AdaptiveClassifierIsEmpty () const |
|
bool | LooksLikeGarbage (TBLOB *blob) |
|
void | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) |
|
void | ClearCharNormArray (uint8_t *char_norm_array) |
|
void | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array) |
|
void | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) |
|
INT_TEMPLATES | ReadIntTemplates (TFile *fp) |
|
void | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) |
|
CLASS_ID | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) |
|
void | ShowMatchDisplay () |
|
UnicityTable< FontInfo > & | get_fontinfo_table () |
|
const UnicityTable< FontInfo > & | get_fontinfo_table () const |
|
UnicityTable< FontSet > & | get_fontset_table () |
|
void | NormalizeOutlines (LIST Outlines, float *XScale, float *YScale) |
|
FEATURE_SET | ExtractOutlineFeatures (TBLOB *Blob) |
|
FEATURE_SET | ExtractPicoFeatures (TBLOB *Blob) |
|
FEATURE_SET | ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) |
|
FEATURE_SET | ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info) |
|
void | LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) |
|
bool | WriteTRFile (const STRING &filename) |
|
| BOOL_VAR_H (allow_blob_division, true, "Use divisible blobs chopping") |
|
| BOOL_VAR_H (prioritize_division, FALSE, "Prioritize blob division over chopping") |
|
| INT_VAR_H (tessedit_single_match, FALSE, "Top choice only from CP") |
|
| BOOL_VAR_H (classify_enable_learning, true, "Enable adaptive classifier") |
|
| INT_VAR_H (classify_debug_level, 0, "Classify debug level") |
|
| INT_VAR_H (classify_norm_method, character, "Normalization Method ...") |
|
| double_VAR_H (classify_char_norm_range, 0.2, "Character Normalization Range ...") |
|
| double_VAR_H (classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...") |
|
| double_VAR_H (classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...") |
|
| double_VAR_H (classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...") |
|
| double_VAR_H (classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...") |
|
| double_VAR_H (classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings") |
|
| double_VAR_H (classify_max_certainty_margin, 5.5, "Veto difference between classifier certainties") |
|
| BOOL_VAR_H (tess_cn_matching, 0, "Character Normalized Matching") |
|
| BOOL_VAR_H (tess_bn_matching, 0, "Baseline Normalized Matching") |
|
| BOOL_VAR_H (classify_enable_adaptive_matcher, 1, "Enable adaptive classifier") |
|
| BOOL_VAR_H (classify_use_pre_adapted_templates, 0, "Use pre-adapted classifier templates") |
|
| BOOL_VAR_H (classify_save_adapted_templates, 0, "Save adapted templates to a file") |
|
| BOOL_VAR_H (classify_enable_adaptive_debugger, 0, "Enable match debugger") |
|
| BOOL_VAR_H (classify_nonlinear_norm, 0, "Non-linear stroke-density normalization") |
|
| INT_VAR_H (matcher_debug_level, 0, "Matcher Debug Level") |
|
| INT_VAR_H (matcher_debug_flags, 0, "Matcher Debug Flags") |
|
| INT_VAR_H (classify_learning_debug_level, 0, "Learning Debug Level: ") |
|
| double_VAR_H (matcher_good_threshold, 0.125, "Good Match (0-1)") |
|
| double_VAR_H (matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)") |
|
| double_VAR_H (matcher_perfect_threshold, 0.02, "Perfect Match (0-1)") |
|
| double_VAR_H (matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)") |
|
| double_VAR_H (matcher_rating_margin, 0.1, "New template margin (0-1)") |
|
| double_VAR_H (matcher_avg_noise_size, 12.0, "Avg. noise blob length: ") |
|
| INT_VAR_H (matcher_permanent_classes_min, 1, "Min # of permanent classes") |
|
| INT_VAR_H (matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold") |
|
| INT_VAR_H (matcher_sufficient_examples_for_prototyping, 5, "Enable adaption even if the ambiguities have not been seen") |
|
| double_VAR_H (matcher_clustering_max_angle_delta, 0.015, "Maximum angle delta for prototype clustering") |
|
| double_VAR_H (classify_misfit_junk_penalty, 0.0, "Penalty to apply when a non-alnum is vertically out of " "its expected textline position") |
|
| double_VAR_H (rating_scale, 1.5, "Rating scaling factor") |
|
| double_VAR_H (certainty_scale, 20.0, "Certainty scaling factor") |
|
| double_VAR_H (tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used") |
|
| double_VAR_H (classify_adapted_pruning_factor, 2.5, "Prune poor adapted results this much worse than best result") |
|
| double_VAR_H (classify_adapted_pruning_threshold, -1.0, "Threshold at which classify_adapted_pruning_factor starts") |
|
| INT_VAR_H (classify_adapt_proto_threshold, 230, "Threshold for good protos during adaptive 0-255") |
|
| INT_VAR_H (classify_adapt_feature_threshold, 230, "Threshold for good features during adaptive 0-255") |
|
| BOOL_VAR_H (disable_character_fragments, TRUE, "Do not include character fragments in the" " results of the classifier") |
|
| double_VAR_H (classify_character_fragments_garbage_certainty_threshold, -3.0, "Exclude fragments that do not match any whole character" " with at least this certainty") |
|
| BOOL_VAR_H (classify_debug_character_fragments, FALSE, "Bring up graphical debugging windows for fragments training") |
|
| BOOL_VAR_H (matcher_debug_separate_windows, FALSE, "Use two different windows for debugging the matching: " "One for the protos and one for the features.") |
|
| STRING_VAR_H (classify_learn_debug_str, "", "Class str to debug learning") |
|
| INT_VAR_H (classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255") |
|
| INT_VAR_H (classify_class_pruner_multiplier, 15, "Class Pruner Multiplier 0-255: ") |
|
| INT_VAR_H (classify_cp_cutoff_strength, 7, "Class Pruner CutoffStrength: ") |
|
| INT_VAR_H (classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: ") |
|
| INT_VAR_H (il1_adaption_test, 0, "Don't adapt to i/I at beginning of word") |
|
| BOOL_VAR_H (classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].") |
|
| double_VAR_H (speckle_large_max_size, 0.30, "Max large speckle size") |
|
| double_VAR_H (speckle_rating_penalty, 10.0, "Penalty to add to worst rating for noise") |
|
| CCStruct ()=default |
|
virtual | ~CCStruct () |
|
| CUtil ()=default |
|
virtual | ~CUtil () |
|
void | read_variables (const char *filename, bool global_only) |
|
| CCUtil () |
|
virtual | ~CCUtil () |
|
void | main_setup (const char *argv0, const char *basename) |
| CCUtil::main_setup - set location of tessdata and name of image. More...
|
|
ParamsVectors * | params () |
|