tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::ValidateMyanmar Class Reference

#include <validate_myanmar.h>

Inheritance diagram for tesseract::ValidateMyanmar:
Collaboration diagram for tesseract::ValidateMyanmar:

Public Member Functions

 ValidateMyanmar (ViramaScript script, bool report_errors)
 
 ~ValidateMyanmar ()
 
- Public Member Functions inherited from tesseract::Validator
virtual ~Validator ()
 

Protected Member Functions

bool ConsumeGraphemeIfValid () override
 
Validator::CharClass UnicodeToCharClass (char32 ch) const override
 
- Protected Member Functions inherited from tesseract::Validator
 Validator (ViramaScript script, bool report_errors)
 
bool ValidateCleanAndSegmentInternal (GraphemeNormMode g_mode, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
 
void MoveResultsToDest (GraphemeNormMode g_mode, std::vector< std::vector< char32 >> *dest)
 
bool IsSubscriptScript () const
 
bool CodeOnlyToOutput ()
 
void MultiCodePart (int length)
 
bool UseMultiCode (int length)
 
void ComputeClassCodes (const std::vector< char32 > &text)
 
void Clear ()
 

Private Member Functions

bool ConsumeSubscriptIfPresent ()
 
bool ConsumeOptionalSignsIfPresent ()
 

Static Private Member Functions

static bool IsMyanmarLetter (char32 ch)
 
static bool IsMyanmarOther (char32 ch)
 

Static Private Attributes

static const char32 kMyanmarAsat = 0x103a
 
static const char32 kMyanmarMedialYa = 0x103b
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::Validator
static bool ValidateCleanAndSegment (GraphemeNormMode g_mode, bool report_errors, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
 
static bool IsZeroWidthMark (char32 ch)
 
- Static Public Attributes inherited from tesseract::Validator
static const char32 kZeroWidthSpace = 0x200B
 
static const char32 kZeroWidthNonJoiner = 0x200C
 
static const char32 kZeroWidthJoiner = 0x200D
 
static const char32 kLeftToRightMark = 0x200E
 
static const char32 kRightToLeftMark = 0x200F
 
static const char32 kInvalid = 0xfffd
 
- Protected Types inherited from tesseract::Validator
enum  CharClass {
  CharClass::kConsonant = 'C', CharClass::kVowel = 'V', CharClass::kVirama = 'H', CharClass::kMatra = 'M',
  CharClass::kMatraPiece = 'P', CharClass::kVowelModifier = 'D', CharClass::kZeroWidthNonJoiner = 'z', CharClass::kZeroWidthJoiner = 'Z',
  CharClass::kVedicMark = 'v', CharClass::kNukta = 'N', CharClass::kRobat = 'R', CharClass::kOther = 'O',
  CharClass::kWhitespace = ' ', CharClass::kCombiner = 'c'
}
 
using IndicPair = std::pair< CharClass, char32 >
 
- Static Protected Member Functions inherited from tesseract::Validator
static std::unique_ptr< ValidatorScriptValidator (ViramaScript script, bool report_errors)
 
static ViramaScript MostFrequentViramaScript (const std::vector< char32 > &utf32)
 
static bool IsVirama (char32 unicode)
 
static bool IsVedicAccent (char32 unicode)
 
- Protected Attributes inherited from tesseract::Validator
ViramaScript script_
 
std::vector< IndicPaircodes_
 
std::vector< std::vector< char32 > > parts_
 
std::vector< char32output_
 
int codes_used_
 
int output_used_
 
bool report_errors_
 
- Static Protected Attributes inherited from tesseract::Validator
static const int kIndicCodePageSize = 128
 
static const char32 kMinIndicUnicode = 0x900
 
static const char32 kMaxSinhalaUnicode = 0xdff
 
static const char32 kMaxViramaScriptUnicode = 0x17ff
 
static const char32 kSinhalaVirama = 0xdca
 
static const char32 kMyanmarVirama = 0x1039
 
static const char32 kKhmerVirama = 0x17d2
 
static const char32 kJavaneseVirama = 0xa9c0
 
static const char32 kMaxJavaneseUnicode = 0xa9df
 

Constructor & Destructor Documentation

◆ ValidateMyanmar()

tesseract::ValidateMyanmar::ValidateMyanmar ( ViramaScript  script,
bool  report_errors 
)
inline

◆ ~ValidateMyanmar()

tesseract::ValidateMyanmar::~ValidateMyanmar ( )
inline

Member Function Documentation

◆ ConsumeGraphemeIfValid()

bool tesseract::ValidateMyanmar::ConsumeGraphemeIfValid ( )
overrideprotectedvirtual

Implements tesseract::Validator.

◆ ConsumeOptionalSignsIfPresent()

bool tesseract::ValidateMyanmar::ConsumeOptionalSignsIfPresent ( )
private

◆ ConsumeSubscriptIfPresent()

bool tesseract::ValidateMyanmar::ConsumeSubscriptIfPresent ( )
private

◆ IsMyanmarLetter()

bool tesseract::ValidateMyanmar::IsMyanmarLetter ( char32  ch)
staticprivate

◆ IsMyanmarOther()

bool tesseract::ValidateMyanmar::IsMyanmarOther ( char32  ch)
staticprivate

◆ UnicodeToCharClass()

Validator::CharClass tesseract::ValidateMyanmar::UnicodeToCharClass ( char32  ch) const
overrideprotectedvirtual

Implements tesseract::Validator.

Member Data Documentation

◆ kMyanmarAsat

const char32 tesseract::ValidateMyanmar::kMyanmarAsat = 0x103a
staticprivate

◆ kMyanmarMedialYa

const char32 tesseract::ValidateMyanmar::kMyanmarMedialYa = 0x103b
staticprivate

The documentation for this class was generated from the following files: