21 #ifndef TESSERACT_CCUTIL_NORMSTRNGS_H_ 22 #define TESSERACT_CCUTIL_NORMSTRNGS_H_ 27 #include "validator.h" 59 std::string* normalized);
67 std::vector<std::string>* graphemes);
103 #endif // TESSERACT_CCUTIL_NORMSTRNGS_H_ OCRNorm
Definition: normstrngs.h:41
signed int char32
Definition: unichar.h:52
bool IsInterchangeValid(const char32 ch)
Definition: normstrngs.cpp:253
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:229
GraphemeNormMode
Definition: validator.h:34
Definition: baseapi.cpp:94
GraphemeNorm
Definition: normstrngs.h:49
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:147
char32 FullwidthToHalfwidth(const char32 ch)
Definition: normstrngs.cpp:282
bool IsInterchangeValid7BitAscii(const char32 ch)
Definition: normstrngs.cpp:276
unsigned int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:243
UnicodeNormMode
Definition: normstrngs.h:32
unsigned int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:233
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:223
bool IsValidCodepoint(const char32 ch)
Definition: normstrngs.cpp:218
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
Definition: normstrngs.cpp:172
bool IsOCREquivalent(char32 ch1, char32 ch2)
Definition: normstrngs.cpp:214
char32 OCRNormalize(char32 ch)
Definition: normstrngs.cpp:204