22 #ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 23 #define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ 25 #include <unordered_map> 29 #include "unicharset.h" 44 void Set(
int index,
int value) {
50 void Set3(
int code0,
int code1,
int code2) {
74 for (
int i = 0; i <
length_; ++i) {
83 for (
int i = 0; i < code.
length_; ++i) {
84 result ^= code(i) << (7 * i);
136 static const int kFirstHangul = 0xac00;
138 static const int kNumHangul = 11172;
141 static const int kLCount = 19;
142 static const int kVCount = 21;
143 static const int kTCount = 28;
149 bool ComputeEncoding(
const UNICHARSET& unicharset,
int null_id,
150 STRING* radical_stroke_table);
153 void SetupPassThrough(
const UNICHARSET& unicharset);
165 int EncodeUnichar(
int unichar_id,
RecodedCharID* code)
const;
174 auto it = next_codes_.find(code);
175 return it == next_codes_.end() ? nullptr : it->second;
180 auto it = final_codes_.find(code);
181 return it == final_codes_.end() ? nullptr : it->second;
202 static bool DecomposeHangul(
int unicode,
int* leading,
int* vowel,
207 void DefragmentCodeValues(
int encoded_null);
209 void ComputeCodeRange();
219 std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash>
225 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
230 std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*,
239 #endif // TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
Definition: unicharcompress.h:179
std::unordered_map< RecodedCharID, GenericVectorEqEq< int > *, RecodedCharID::RecodedCharIDHash > next_codes_
Definition: unicharcompress.h:227
int code_range() const
Definition: unicharcompress.h:161
int operator()(int index) const
Definition: unicharcompress.h:58
size_t operator()(const RecodedCharID &code) const
Definition: unicharcompress.h:81
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
Definition: unicharcompress.h:173
Definition: unicharset.h:146
Definition: serialis.h:77
Definition: baseapi.cpp:94
Definition: unicharcompress.h:128
bool operator==(const RecodedCharID &other) const
Definition: unicharcompress.h:72
bool DeSerialize(char *data, size_t count=1)
Definition: serialis.cpp:103
bool IsValidFirstCode(int code) const
Definition: unicharcompress.h:170
static const int kMaxCodeLen
Definition: unicharcompress.h:37
int32_t code_[kMaxCodeLen]
Definition: unicharcompress.h:97
int32_t length_
Definition: unicharcompress.h:95
std::unordered_map< RecodedCharID, GenericVectorEqEq< int > *, RecodedCharID::RecodedCharIDHash > final_codes_
Definition: unicharcompress.h:232
RecodedCharID()
Definition: unicharcompress.h:39
int8_t self_normalized_
Definition: unicharcompress.h:93
void Set(int index, int value)
Definition: unicharcompress.h:44
std::unordered_map< RecodedCharID, int, RecodedCharID::RecodedCharIDHash > decoder_
Definition: unicharcompress.h:220
Definition: unicharcompress.h:80
GenericVector< RecodedCharID > encoder_
Definition: unicharcompress.h:217
GenericVector< bool > is_valid_start_
Definition: unicharcompress.h:222
bool Serialize(TFile *fp) const
Definition: unicharcompress.h:61
Definition: unicharcompress.h:34
int length() const
Definition: unicharcompress.h:57
bool Serialize(const char *data, size_t count=1)
Definition: serialis.cpp:147
bool DeSerialize(TFile *fp)
Definition: unicharcompress.h:67
void Set3(int code0, int code1, int code2)
Definition: unicharcompress.h:50
int code_range_
Definition: unicharcompress.h:234
void Truncate(int length)
Definition: unicharcompress.h:42