21 #ifndef TESSERACT_CCUTIL_AMBIGS_H_ 22 #define TESSERACT_CCUTIL_AMBIGS_H_ 27 #include "unicharset.h" 28 #include "genericvector.h" 30 #define MAX_AMBIG_SIZE 10 40 "Illegal ambiguity specification on line %d\n";
42 "Illegal unichar %s in ambiguity specification\n";
62 static inline int compare(
const UNICHAR_ID *ptr1,
const UNICHAR_ID *ptr2) {
64 const UNICHAR_ID val1 = *ptr1++;
65 const UNICHAR_ID val2 = *ptr2++;
67 if (val1 == INVALID_UNICHAR_ID)
return -1;
68 if (val2 == INVALID_UNICHAR_ID)
return 1;
69 if (val1 < val2)
return -1;
72 if (val1 == INVALID_UNICHAR_ID)
return 0;
79 const UNICHAR_ID uid) {
80 for (
int i = 0; i < uid_vec.
size(); ++i)
81 if (uid_vec[i] == uid)
return i;
88 static inline int copy(
const UNICHAR_ID src[], UNICHAR_ID dst[]) {
92 }
while (dst[i++] != INVALID_UNICHAR_ID);
98 static inline void print(
const UNICHAR_ID array[],
100 const UNICHAR_ID *ptr = array;
101 if (*ptr == INVALID_UNICHAR_ID) tprintf(
"[Empty]");
102 while (*ptr != INVALID_UNICHAR_ID) {
107 while (*ptr != INVALID_UNICHAR_ID) tprintf(
"%d ", *ptr++);
126 if (result != 0)
return result;
131 UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
132 UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE + 1];
145 UnicharAmbigs() =
default;
147 replace_ambigs_.delete_data_pointers();
148 dang_ambigs_.delete_data_pointers();
149 one_to_one_definite_ambigs_.delete_data_pointers();
156 void InitUnicharAmbigs(
const UNICHARSET& unicharset,
157 bool use_ambigs_for_adaption);
174 void LoadUnicharAmbigs(
const UNICHARSET& encoder_set,
175 TFile *ambigs_file,
int debug_level,
176 bool use_ambigs_for_adaption,
UNICHARSET *unicharset);
180 UNICHAR_ID unichar_id)
const {
181 if (one_to_one_definite_ambigs_.empty())
return nullptr;
182 return one_to_one_definite_ambigs_[unichar_id];
191 UNICHAR_ID unichar_id)
const {
192 if (ambigs_for_adaption_.empty())
return nullptr;
193 return ambigs_for_adaption_[unichar_id];
200 UNICHAR_ID unichar_id)
const {
201 if (reverse_ambigs_for_adaption_.empty())
return nullptr;
202 return reverse_ambigs_for_adaption_[unichar_id];
206 bool ParseAmbiguityLine(
int line_num,
int version,
int debug_level,
208 int *test_ambig_part_size,
209 UNICHAR_ID *test_unichar_ids,
210 int *replacement_ambig_part_size,
211 char *replacement_string,
int *type);
213 int test_ambig_part_size, UNICHAR_ID *test_unichar_ids,
214 int replacement_ambig_part_size,
215 const char *replacement_string,
int type,
227 #endif // TESSERACT_CCUTIL_AMBIGS_H_ static int find_in(const UnicharIdVector &uid_vec, const UNICHAR_ID uid)
Definition: ambigs.h:78
UNICHAR_ID correct_fragments[10+1]
Definition: ambigs.h:132
static const char kAmbigDelimiters[]
Definition: ambigs.h:38
int wrong_ngram_size
Definition: ambigs.h:135
UnicharAmbigsVector replace_ambigs_
Definition: ambigs.h:219
AmbigType
Definition: ambigs.h:44
AmbigType type
Definition: ambigs.h:134
GenericVector< UnicharIdVector * > one_to_one_definite_ambigs_
Definition: ambigs.h:220
static int compare_ambig_specs(const void *spec1, const void *spec2)
Definition: ambigs.h:122
Definition: unicharset.h:146
static int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2)
Definition: ambigs.h:62
static int copy(const UNICHAR_ID src[], UNICHAR_ID dst[])
Definition: ambigs.h:88
Definition: serialis.h:77
UnicharAmbigsVector dang_ambigs_
Definition: ambigs.h:218
Definition: baseapi.cpp:94
static const char kAmbigNgramSeparator[]
Definition: ambigs.h:37
static const int kUnigramAmbigsBufferSize
Definition: ambigs.h:36
~UnicharAmbigs()
Definition: ambigs.h:146
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:199
const UnicharIdVector * OneToOneDefiniteAmbigs(UNICHAR_ID unichar_id) const
Definition: ambigs.h:179
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
Definition: ambigs.h:98
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:190
const UnicharAmbigsVector & replace_ambigs() const
Definition: ambigs.h:153
GenericVector< UnicharIdVector * > reverse_ambigs_for_adaption_
Definition: ambigs.h:222
int size() const
Definition: genericvector.h:71
UNICHAR_ID wrong_ngram[10+1]
Definition: ambigs.h:131
const UnicharAmbigsVector & dang_ambigs() const
Definition: ambigs.h:152
GenericVector< UnicharIdVector * > ambigs_for_adaption_
Definition: ambigs.h:221
static const char kIllegalUnicharMsg[]
Definition: ambigs.h:41
static const char kIllegalMsg[]
Definition: ambigs.h:39
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:290
UNICHAR_ID correct_ngram_id
Definition: ambigs.h:133