#include <validator.h>
|
enum | CharClass {
CharClass::kConsonant = 'C',
CharClass::kVowel = 'V',
CharClass::kVirama = 'H',
CharClass::kMatra = 'M',
CharClass::kMatraPiece = 'P',
CharClass::kVowelModifier = 'D',
CharClass::kZeroWidthNonJoiner = 'z',
CharClass::kZeroWidthJoiner = 'Z',
CharClass::kVedicMark = 'v',
CharClass::kNukta = 'N',
CharClass::kRobat = 'R',
CharClass::kOther = 'O',
CharClass::kWhitespace = ' ',
CharClass::kCombiner = 'c'
} |
|
using | IndicPair = std::pair< CharClass, char32 > |
|
◆ IndicPair
◆ CharClass
Enumerator |
---|
kConsonant | |
kVowel | |
kVirama | |
kMatra | |
kMatraPiece | |
kVowelModifier | |
kZeroWidthNonJoiner | |
kZeroWidthJoiner | |
kVedicMark | |
kNukta | |
kRobat | |
kOther | |
kWhitespace | |
kCombiner | |
◆ ~Validator()
tesseract::Validator::~Validator |
( |
| ) |
|
|
virtualdefault |
◆ Validator()
tesseract::Validator::Validator |
( |
ViramaScript |
script, |
|
|
bool |
report_errors |
|
) |
| |
|
inlineprotected |
◆ Clear()
void tesseract::Validator::Clear |
( |
| ) |
|
|
protected |
◆ CodeOnlyToOutput()
bool tesseract::Validator::CodeOnlyToOutput |
( |
| ) |
|
|
inlineprotected |
◆ ComputeClassCodes()
void tesseract::Validator::ComputeClassCodes |
( |
const std::vector< char32 > & |
text | ) |
|
|
protected |
◆ ConsumeGraphemeIfValid()
virtual bool tesseract::Validator::ConsumeGraphemeIfValid |
( |
| ) |
|
|
protectedpure virtual |
◆ IsSubscriptScript()
bool tesseract::Validator::IsSubscriptScript |
( |
| ) |
const |
|
protected |
◆ IsVedicAccent()
bool tesseract::Validator::IsVedicAccent |
( |
char32 |
unicode | ) |
|
|
staticprotected |
◆ IsVirama()
bool tesseract::Validator::IsVirama |
( |
char32 |
unicode | ) |
|
|
staticprotected |
◆ IsZeroWidthMark()
static bool tesseract::Validator::IsZeroWidthMark |
( |
char32 |
ch | ) |
|
|
inlinestatic |
◆ MostFrequentViramaScript()
ViramaScript tesseract::Validator::MostFrequentViramaScript |
( |
const std::vector< char32 > & |
utf32 | ) |
|
|
staticprotected |
◆ MoveResultsToDest()
void tesseract::Validator::MoveResultsToDest |
( |
GraphemeNormMode |
g_mode, |
|
|
std::vector< std::vector< char32 >> * |
dest |
|
) |
| |
|
protected |
◆ MultiCodePart()
void tesseract::Validator::MultiCodePart |
( |
int |
length | ) |
|
|
inlineprotected |
◆ ScriptValidator()
std::unique_ptr< Validator > tesseract::Validator::ScriptValidator |
( |
ViramaScript |
script, |
|
|
bool |
report_errors |
|
) |
| |
|
staticprotected |
◆ UnicodeToCharClass()
virtual CharClass tesseract::Validator::UnicodeToCharClass |
( |
char32 |
ch | ) |
const |
|
protectedpure virtual |
◆ UseMultiCode()
bool tesseract::Validator::UseMultiCode |
( |
int |
length | ) |
|
|
inlineprotected |
◆ ValidateCleanAndSegment()
bool tesseract::Validator::ValidateCleanAndSegment |
( |
GraphemeNormMode |
g_mode, |
|
|
bool |
report_errors, |
|
|
const std::vector< char32 > & |
src, |
|
|
std::vector< std::vector< char32 >> * |
dest |
|
) |
| |
|
static |
◆ ValidateCleanAndSegmentInternal()
bool tesseract::Validator::ValidateCleanAndSegmentInternal |
( |
GraphemeNormMode |
g_mode, |
|
|
const std::vector< char32 > & |
src, |
|
|
std::vector< std::vector< char32 >> * |
dest |
|
) |
| |
|
protected |
◆ codes_
std::vector<IndicPair> tesseract::Validator::codes_ |
|
protected |
◆ codes_used_
int tesseract::Validator::codes_used_ |
|
protected |
◆ kIndicCodePageSize
const int tesseract::Validator::kIndicCodePageSize = 128 |
|
staticprotected |
◆ kInvalid
const char32 tesseract::Validator::kInvalid = 0xfffd |
|
static |
◆ kJavaneseVirama
const char32 tesseract::Validator::kJavaneseVirama = 0xa9c0 |
|
staticprotected |
◆ kKhmerVirama
const char32 tesseract::Validator::kKhmerVirama = 0x17d2 |
|
staticprotected |
◆ kLeftToRightMark
const char32 tesseract::Validator::kLeftToRightMark = 0x200E |
|
static |
◆ kMaxJavaneseUnicode
const char32 tesseract::Validator::kMaxJavaneseUnicode = 0xa9df |
|
staticprotected |
◆ kMaxSinhalaUnicode
const char32 tesseract::Validator::kMaxSinhalaUnicode = 0xdff |
|
staticprotected |
◆ kMaxViramaScriptUnicode
const char32 tesseract::Validator::kMaxViramaScriptUnicode = 0x17ff |
|
staticprotected |
◆ kMinIndicUnicode
const char32 tesseract::Validator::kMinIndicUnicode = 0x900 |
|
staticprotected |
◆ kMyanmarVirama
const char32 tesseract::Validator::kMyanmarVirama = 0x1039 |
|
staticprotected |
◆ kRightToLeftMark
const char32 tesseract::Validator::kRightToLeftMark = 0x200F |
|
static |
◆ kSinhalaVirama
const char32 tesseract::Validator::kSinhalaVirama = 0xdca |
|
staticprotected |
◆ kZeroWidthJoiner
const char32 tesseract::Validator::kZeroWidthJoiner = 0x200D |
|
static |
◆ kZeroWidthNonJoiner
const char32 tesseract::Validator::kZeroWidthNonJoiner = 0x200C |
|
static |
◆ kZeroWidthSpace
const char32 tesseract::Validator::kZeroWidthSpace = 0x200B |
|
static |
◆ output_
std::vector<char32> tesseract::Validator::output_ |
|
protected |
◆ output_used_
int tesseract::Validator::output_used_ |
|
protected |
◆ parts_
std::vector<std::vector<char32> > tesseract::Validator::parts_ |
|
protected |
◆ report_errors_
bool tesseract::Validator::report_errors_ |
|
protected |
◆ script_
The documentation for this class was generated from the following files:
- /home/stephane/src/tesseract/src/training/validator.h
- /home/stephane/src/tesseract/src/training/validator.cpp