22 #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ 23 #define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ 25 #include "associate.h" 28 #include "genericvector.h" 29 #include "lm_consistency.h" 34 #include "unicharset.h" 74 : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc),
75 ngram_and_classifier_cost(ncc) {}
101 const char *debug_uch)
102 : cost(c), curr_b(b), parent_vse(pe), competing_vse(nullptr),
103 ratings_sum(b->rating()),
104 min_certainty(b->certainty()), adapted(b->IsAdapted()), length(1),
105 outline_length(ol), consistency_info(ci), associate_stats(as),
106 top_choice_flags(tcf), dawg_info(d), ngram_info(n),
108 debug_str = (debug_uch ==
nullptr) ?
nullptr :
new STRING();
117 if (debug_uch !=
nullptr) *debug_str += *(pe->
debug_str);
119 if (debug_str !=
nullptr && debug_uch !=
nullptr) *debug_str += debug_uch;
128 static int Compare(
const void *e1,
const void *e2) {
133 return (ve1->
cost < ve2->
cost) ? -1 : 1;
136 if (dawg_info !=
nullptr && consistency_info.NumInconsistentCase() == 0) {
139 return consistency_info.Consistent();
144 if (curr_b ==
nullptr)
return false;
145 UNICHAR_ID unichar_id = curr_b->unichar_id();
151 void Print(
const char *msg)
const;
197 viterbi_state_entries_prunable_length(0),
198 viterbi_state_entries_prunable_max_cost(FLT_MAX),
199 viterbi_state_entries_length(0) {}
205 void Print(
const char *msg);
219 : updated(false), best_vse(nullptr) {
220 beam.reserve(matrix_dimension);
221 for (
int i = 0; i < matrix_dimension; ++i)
240 #endif // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_ Definition: lm_state.h:63
LanguageModelNgramInfo * ngram_info
Definition: lm_state.h:184
PermuterType permuter
Definition: lm_state.h:67
LMConsistencyInfo consistency_info
Definition: lm_state.h:171
~ViterbiStateEntry()
Definition: lm_state.h:121
LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
Definition: lm_state.h:73
~LanguageModelState()
Definition: lm_state.h:200
float min_certainty
Definition: lm_state.h:167
Definition: lm_state.h:93
STRING context
Definition: lm_state.h:76
int viterbi_state_entries_length
Total number of entries in viterbi_state_entries.
Definition: lm_state.h:213
Struct to store information maintained by various language model components.
Definition: lm_state.h:195
float ngram_cost
-ln(P_ngram_model(path))
Definition: lm_state.h:86
Definition: unicharset.h:146
bool Consistent() const
Definition: lm_state.h:135
LanguageModelDawgInfo(const DawgPositionVector *a, PermuterType pt)
Definition: lm_state.h:64
float outline_length
Definition: lm_state.h:170
Definition: lm_consistency.h:39
int context_unichar_step_len
Definition: lm_state.h:79
BestChoiceBundle(int matrix_dimension)
Definition: lm_state.h:218
Definition: baseapi.cpp:94
int length
Definition: lm_state.h:169
int viterbi_state_entries_prunable_length
Number and max cost of prunable paths in viterbi_state_entries.
Definition: lm_state.h:210
int adapted
Definition: lm_state.h:168
LanguageModelFlagsType top_choice_flags
Definition: lm_state.h:176
BLOB_CHOICE * curr_b
Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
Definition: lm_state.h:158
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:486
ViterbiStateEntry * best_vse
Best ViterbiStateEntry and BLOB_CHOICE.
Definition: lm_state.h:235
Bundle together all the things pertaining to the best choice/state.
Definition: lm_state.h:217
bool updated
Definition: lm_state.h:186
bool updated
Flag to indicate whether anything was changed.
Definition: lm_state.h:227
ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, const char *debug_uch)
Definition: lm_state.h:94
ViterbiStateEntry * competing_vse
Definition: lm_state.h:162
static int Compare(const void *e1, const void *e2)
Definition: lm_state.h:128
ViterbiStateEntry_LIST viterbi_state_entries
Storage for the Viterbi state.
Definition: lm_state.h:208
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
Definition: lm_state.h:229
bool pruned
Definition: lm_state.h:84
~BestChoiceBundle()
Definition: lm_state.h:224
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:507
AssociateStats associate_stats
Definition: lm_state.h:172
DawgPositionVector active_dawgs
Definition: lm_state.h:66
STRING * debug_str
Definition: lm_state.h:189
PointerVector< LanguageModelState > beam
Definition: lm_state.h:233
ViterbiStateEntry * parent_vse
Definition: lm_state.h:159
float ngram_and_classifier_cost
-[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
Definition: lm_state.h:88
Definition: genericvector.h:457
LanguageModelState()
Definition: lm_state.h:196
unsigned char LanguageModelFlagsType
Used for expressing various language model flags.
Definition: lm_state.h:39
LanguageModelDawgInfo * dawg_info
Definition: lm_state.h:180
bool HasAlnumChoice(const UNICHARSET &unicharset)
Definition: lm_state.h:143
float viterbi_state_entries_prunable_max_cost
Definition: lm_state.h:211
float ratings_sum
Definition: lm_state.h:166
float cost
Definition: lm_state.h:155
Definition: lm_state.h:72
Definition: associate.h:36