26 #include <sys/types.h> 30 #include "genericvector.h" 38 #include "unicharset.h" 98 prev_word_best_choice =
nullptr;
104 PAGE_RES(
bool merge_similar_words,
105 BLOCK_LIST *block_list,
108 ~PAGE_RES () =
default;
151 ROW_RES(
bool merge_similar_words,
ROW *the_row);
224 std::vector<std::vector<std::pair<const char*, float>>>
timesteps;
361 const char*
BestUTF8(
int blob_index,
bool in_rtl_context)
const {
362 if (blob_index < 0 || best_choice ==
nullptr ||
363 blob_index >= best_choice->
length())
365 UNICHAR_ID
id = best_choice->
unichar_id(blob_index);
366 if (id < 0 || id >= uch_set->
size())
368 UNICHAR_ID mirrored = uch_set->
get_mirror(
id);
369 if (in_rtl_context && mirrored > 0)
375 if (blob_index < 0 || blob_index >= raw_choice->
length())
377 UNICHAR_ID
id = raw_choice->
unichar_id(blob_index);
378 if (id < 0 || id >= uch_set->
size())
384 if (best_choice ==
nullptr ||
385 blob_index >= best_choice->
length() ||
392 if (uch_set ==
nullptr || best_choice ==
nullptr || best_choice->
length() < 1)
394 for (
int id = 0;
id < best_choice->
length();
id++) {
396 if (unichar_id < 0 || unichar_id >= uch_set->
size())
409 if (uch_set ==
nullptr || best_choice ==
nullptr || best_choice->
length() < 1)
411 for (
int id = 0;
id < best_choice->
length();
id++) {
413 if (unichar_id < 0 || unichar_id >= uch_set->
size())
429 void InitNonPointers();
433 void ClearWordChoices();
440 void CopySimpleFields(
const WERD_RES& source);
446 void InitForRetryRecognition(
const WERD_RES& source);
463 bool SetupForRecognition(
const UNICHARSET& unicharset_in,
466 const TBOX* norm_box,
bool numeric_mode,
467 bool use_body_size,
bool allow_detailed_fx,
473 void SetupBasicsFromChoppedWord(
const UNICHARSET &unicharset_in);
480 void SetupWordScript(
const UNICHARSET& unicharset_in);
483 void SetupBlamerBundle();
486 void SetupBlobWidthsAndGaps();
491 void InsertSeam(
int blob_number,
SEAM* seam);
495 bool AlternativeChoiceAdjustmentsWorseThan(
float threshold)
const;
503 bool StatesAllValid();
507 void DebugWordChoices(
bool debug,
const char* word_to_debug);
510 void DebugTopChoice(
const char* msg)
const;
514 void FilterWordChoices(
int debug_level);
531 void ComputeAdaptionThresholds(
float certainty_scale,
546 bool LogNewCookedChoice(
int max_num_choices,
bool debug,
550 void PrintBestChoices()
const;
554 int GetBlobsWidth(
int start_blob,
int last_blob);
556 int GetBlobsGap(
int blob_index);
567 BLOB_CHOICE_LIST* GetBlobChoices(
int index)
const;
577 void ConsumeWordResults(
WERD_RES* word);
585 void RebuildBestState();
589 void CloneChoppedToRebuild();
596 void SetScriptPositions();
608 void FakeClassifyWord(
int blob_count,
BLOB_CHOICE** choices);
612 void FakeWordFromRatings(PermuterType permuter);
615 void BestChoiceToCorrectText();
622 bool ConditionalBlobMerge(
628 void MergeAdjacentBlobs(
int index);
632 UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2);
637 UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2);
640 bool HyphenBoxesOverlap(
const TBOX& box1,
const TBOX& box2);
645 UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2);
646 void merge_tess_fails();
661 word->set_flag(W_BOL, word->flag(W_BOL) || word_res->
word->
flag(W_BOL));
662 word->set_flag(W_EOL, word->flag(W_EOL) || word_res->
word->
flag(W_EOL));
668 bool PiecesAllNatural(
int start,
int count)
const;
682 page_res = the_page_res;
690 bool operator !=(
const PAGE_RES_IT &other)
const {
return !(*
this == other); }
699 return start_page(
false);
702 return start_page(
true);
704 WERD_RES *start_page(
bool empty_ok);
725 void DeleteCurrentWord();
729 void MakeCurrentWordFuzzy();
732 return internal_forward(
false,
false);
736 return internal_forward(
false,
true);
743 return prev_word_res;
749 return prev_block_res;
761 return next_word_res;
767 return next_block_res;
769 void rej_stat_word();
770 void ResetWordIterator();
773 WERD_RES *internal_forward(
bool new_block,
bool empty_ok);
int32_t char_count
Definition: pageres.h:79
Definition: unicharset.h:159
bool unichars_in_script_order() const
Definition: ratngs.h:535
ROW_RES * prev_row_res
Definition: pageres.h:776
Definition: pageres.h:115
int32_t char_count
Definition: pageres.h:118
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:310
GenericVector< int > best_state
Definition: pageres.h:271
GenericVector< STRING > correct_text
Definition: pageres.h:275
WERD_CHOICE ** prev_word_best_choice
Definition: pageres.h:85
bool bold
Definition: pageres.h:125
ROW_RES * row_res
Definition: pageres.h:780
int8_t bold
Definition: pageres.h:302
Definition: fontinfo.h:62
int32_t char_count
Definition: pageres.h:144
ROW_RES_LIST row_res_list
Definition: pageres.h:128
ROW_RES * row() const
Definition: pageres.h:754
const char * BestUTF8(int blob_index, bool in_rtl_context) const
Definition: pageres.h:361
GenericVector< SEAM * > seam_array
Definition: pageres.h:217
float x_height
Definition: pageres.h:122
bool guessed_caps_ht
Definition: pageres.h:309
tesseract::Tesseract * tesseract
Definition: pageres.h:282
float space_certainty
Definition: pageres.h:316
WERD_CHOICE * raw_choice
Definition: pageres.h:240
bool reject_spaces
Definition: pageres.h:336
ROW * blob_row
Definition: pageres.h:200
TWERD * chopped_word
Definition: pageres.h:215
WERD_CHOICE * best_choice
Definition: pageres.h:235
Direction
Definition: unicharset.h:157
WERD_RES(const WERD_RES &source)
Definition: pageres.h:349
UNICHARSET::Direction SymbolDirection(int blob_index) const
Definition: pageres.h:383
Definition: unicharset.h:146
ROW_RES_IT row_res_it
Definition: pageres.h:788
WERD_RES * restart_page()
Definition: pageres.h:698
int size() const
Definition: unicharset.h:336
WERD_RES()
Definition: pageres.h:338
bool italic
Definition: pageres.h:126
BLOCK_RES * block_res
Definition: pageres.h:781
WERD * word
Definition: pageres.h:189
Definition: unicharset.h:163
ROW * row
Definition: pageres.h:143
int8_t italic
Definition: pageres.h:301
int32_t whole_word_rej_count
Definition: pageres.h:146
BlamerBundle * blamer_bundle
Definition: pageres.h:246
Definition: baseapi.cpp:94
bool font_assigned
Definition: pageres.h:123
MATRIX * ratings
Definition: pageres.h:231
PAGE_RES()
Definition: pageres.h:102
bool AnyRtlCharsInWord() const
Definition: pageres.h:391
Definition: unicharset.h:158
const char * RawUTF8(int blob_index) const
Definition: pageres.h:374
ROW_RES * next_row_res
Definition: pageres.h:784
Definition: rejctmap.h:201
const FontInfo * fontinfo2
Definition: pageres.h:305
int16_t font_class
Definition: pageres.h:120
WERD_RES * next_word() const
Definition: pageres.h:760
bool UnicharsInReadingOrder() const
Definition: pageres.h:425
WERD_RES * forward_with_empties()
Definition: pageres.h:735
int length() const
Definition: ratngs.h:303
bool tess_failed
Definition: pageres.h:288
int32_t rej_count
Definition: pageres.h:119
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:685
bool small_caps
Definition: pageres.h:299
ScriptPos
Definition: ratngs.h:262
void copy_on(WERD_RES *word_res)
Definition: pageres.h:660
Definition: pageres.h:675
Definition: unicharset.h:171
Definition: ocrblock.h:30
bool tess_accepted
Definition: pageres.h:296
Definition: unicharset.h:168
ROW_RES * prev_row() const
Definition: pageres.h:745
WERD_RES * prev_word_res
Definition: pageres.h:775
int32_t rej_count
Definition: pageres.h:145
WERD_RES * word_res
Definition: pageres.h:779
bool combination
Definition: pageres.h:334
DENORM denorm
Definition: pageres.h:204
WERD_RES_LIST word_res_list
Definition: pageres.h:147
float x_height
Definition: pageres.h:311
BLOCK_RES * next_block() const
Definition: pageres.h:766
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:692
TWERD * rebuild_word
Definition: pageres.h:260
int16_t row_count
Definition: pageres.h:121
BLOCK_RES * prev_block_res
Definition: pageres.h:777
WERD_RES * prev_word() const
Definition: pageres.h:742
WERD_RES * forward()
Definition: pageres.h:731
float baseline_shift
Definition: pageres.h:313
const UNICHARSET * uch_set
Definition: pageres.h:206
WERD_RES * restart_page_with_empties()
Definition: pageres.h:701
GenericVector< int > blame_reasons
Definition: pageres.h:87
bool guessed_x_ht
Definition: pageres.h:308
int8_t fontinfo_id_count
Definition: pageres.h:306
bool tess_would_adapt
Definition: pageres.h:297
BLOCK_RES_LIST block_res_list
Definition: pageres.h:81
Definition: normalis.h:50
static WERD_RES * deep_copy(const WERD_RES *src)
Definition: pageres.h:649
Definition: pageres.h:169
bool AnyLtrCharsInWord() const
Definition: pageres.h:408
WERD_RES_IT word_res_it
Definition: pageres.h:789
GenericVector< int > blob_widths
Definition: pageres.h:219
bool odd_size
Definition: pageres.h:300
PAGE_RES_IT(PAGE_RES *the_page_res)
Definition: pageres.h:681
WERD_CHOICE * ep_choice
Definition: pageres.h:286
BLOCK_RES * prev_block() const
Definition: pageres.h:748
bool done
Definition: pageres.h:298
Definition: tesseractclass.h:173
void init_to_size(int size, const T &t)
Definition: genericvector.h:708
const FontInfo * fontinfo
Definition: pageres.h:304
int32_t rej_count
Definition: pageres.h:80
tesseract::BoxWord * box_word
Definition: pageres.h:266
BLOCK_RES * block() const
Definition: pageres.h:757
void Init()
Definition: pageres.h:94
GenericVector< int > blob_gaps
Definition: pageres.h:222
bool part_of_combo
Definition: pageres.h:335
WERD_RES * next_word_res
Definition: pageres.h:783
BLOCK_RES * next_block_res
Definition: pageres.h:785
WERD_RES(WERD *the_word)
Definition: pageres.h:342
bool rejected
Definition: pageres.h:82
tesseract::BoxWord * bln_boxes
Definition: pageres.h:198
MATRIX * DeepCopy() const
Definition: matrix.cpp:99
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:298
int8_t fontinfo_id2_count
Definition: pageres.h:307
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:224
BLOCK * block
Definition: pageres.h:117
BLOCK_RES_IT block_res_it
Definition: pageres.h:787
REJMAP reject_map
Definition: pageres.h:287
WERD_RES * word() const
Definition: pageres.h:751
WERD_CHOICE_LIST best_choices
Definition: pageres.h:243
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
float caps_height
Definition: pageres.h:312
PAGE_RES * page_res
Definition: pageres.h:677
Definition: pageres.h:141
GenericVector< STRING > misadaption_log
Definition: pageres.h:92
ROW_RES * next_row() const
Definition: pageres.h:763
bool flag(WERD_FLAGS mask) const
Definition: werd.h:126