20 #ifndef TESSERACT_CCUTIL_UNICHAR_H_ 21 #define TESSERACT_CCUTIL_UNICHAR_H_ 31 #define UNICHAR_LEN 30 35 using UNICHAR_ID = int;
38 static const int INVALID_UNICHAR_ID = -1;
40 static const char INVALID_UNICHAR[] =
"__INVALID_UNICHAR__";
42 enum StrongScriptDirection {
44 DIR_LEFT_TO_RIGHT = 1,
45 DIR_RIGHT_TO_LEFT = 2,
61 memset(
chars, 0, UNICHAR_LEN);
79 int len =
chars[UNICHAR_LEN - 1];
80 return len >=0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
92 static int utf8_step(
const char* utf8_str);
140 return lhs.
it_ == rhs.
it_;
143 return !(lhs == rhs);
161 static std::vector<char32>
UTF8ToUTF32(
const char* utf8_str);
164 static std::string
UTF32ToUTF8(
const std::vector<char32>& str32);
175 #endif // TESSERACT_CCUTIL_UNICHAR_H_ const_iterator(const char *it)
Definition: unichar.h:148
const_iterator & operator++()
Definition: unichar.cpp:151
signed int char32
Definition: unichar.h:52
bool is_legal() const
Definition: unichar.cpp:198
Definition: unichar.h:108
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:136
static const_iterator begin(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:202
int operator*() const
Definition: unichar.cpp:165
const char * utf8_data() const
Definition: unichar.h:136
char * utf8_str() const
Definition: unichar.cpp:127
int utf8_len() const
Definition: unichar.cpp:188
Definition: baseapi.cpp:94
const char * utf8() const
Definition: unichar.h:84
static std::vector< char32 > UTF8ToUTF32(const char *utf8_str)
Definition: unichar.cpp:213
int get_utf8(char *buf) const
Definition: unichar.cpp:176
const char * it_
Definition: unichar.h:150
friend bool operator==(const CI &lhs, const CI &rhs)
Definition: unichar.h:139
friend bool operator!=(const CI &lhs, const CI &rhs)
Definition: unichar.h:142
static const_iterator end(const char *utf8_str, const int byte_length)
Definition: unichar.cpp:206
int utf8_len() const
Definition: unichar.h:78
static std::string UTF32ToUTF8(const std::vector< char32 > &str32)
Definition: unichar.cpp:230
UNICHAR()
Definition: unichar.h:60
char chars[30]
Definition: unichar.h:170
int first_uni() const
Definition: unichar.cpp:99