tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::TrainingSampleSet Class Reference

#include <trainingsampleset.h>

Collaboration diagram for tesseract::TrainingSampleSet:

Classes

struct  FontClassDistance
 
struct  FontClassInfo
 

Public Member Functions

 TrainingSampleSet (const FontInfoTable &fontinfo_table)
 
 ~TrainingSampleSet ()
 
bool Serialize (FILE *fp) const
 
bool DeSerialize (bool swap, FILE *fp)
 
int num_samples () const
 
int num_raw_samples () const
 
int NumFonts () const
 
const UNICHARSETunicharset () const
 
int charsetsize () const
 
const FontInfoTablefontinfo_table () const
 
void LoadUnicharset (const char *filename)
 
int AddSample (const char *unichar, TrainingSample *sample)
 
void AddSample (int unichar_id, TrainingSample *sample)
 
int NumClassSamples (int font_id, int class_id, bool randomize) const
 
const TrainingSampleGetSample (int index) const
 
const TrainingSampleGetSample (int font_id, int class_id, int index) const
 
TrainingSampleMutableSample (int font_id, int class_id, int index)
 
STRING SampleToString (const TrainingSample &sample) const
 
const BitVectorGetCloudFeatures (int font_id, int class_id) const
 
const GenericVector< int > & GetCanonicalFeatures (int font_id, int class_id) const
 
float UnicharDistance (const UnicharAndFonts &uf1, const UnicharAndFonts &uf2, bool matched_fonts, const IntFeatureMap &feature_map)
 
float ClusterDistance (int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map)
 
float ComputeClusterDistance (int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map) const
 
int ReliablySeparable (int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map, bool thorough) const
 
int GlobalSampleIndex (int font_id, int class_id, int index) const
 
const TrainingSampleGetCanonicalSample (int font_id, int class_id) const
 
float GetCanonicalDist (int font_id, int class_id) const
 
TrainingSamplemutable_sample (int index)
 
TrainingSampleextract_sample (int index)
 
void IndexFeatures (const IntFeatureSpace &feature_space)
 
void KillSample (TrainingSample *sample)
 
void DeleteDeadSamples ()
 
bool DeleteableSample (const TrainingSample *sample)
 
void OrganizeByFontAndClass ()
 
void SetupFontIdMap ()
 
void ComputeCanonicalSamples (const IntFeatureMap &map, bool debug)
 
void ReplicateAndRandomizeSamples ()
 
void ComputeCanonicalFeatures ()
 
void ComputeCloudFeatures (int feature_space_size)
 
void AddAllFontsForClass (int class_id, Shape *shape) const
 
void DisplaySamplesWithFeature (int f_index, const Shape &shape, const IntFeatureSpace &feature_space, ScrollView::Color color, ScrollView *window) const
 

Private Attributes

PointerVector< TrainingSamplesamples_
 
int num_raw_samples_
 
UNICHARSET unicharset_
 
int unicharset_size_
 
IndexMapBiDi font_id_map_
 
GENERIC_2D_ARRAY< FontClassInfo > * font_class_array_
 
const FontInfoTablefontinfo_table_
 

Constructor & Destructor Documentation

◆ TrainingSampleSet()

tesseract::TrainingSampleSet::TrainingSampleSet ( const FontInfoTable fontinfo_table)
explicit

◆ ~TrainingSampleSet()

tesseract::TrainingSampleSet::~TrainingSampleSet ( )

Member Function Documentation

◆ AddAllFontsForClass()

void tesseract::TrainingSampleSet::AddAllFontsForClass ( int  class_id,
Shape shape 
) const

◆ AddSample() [1/2]

int tesseract::TrainingSampleSet::AddSample ( const char *  unichar,
TrainingSample sample 
)

◆ AddSample() [2/2]

void tesseract::TrainingSampleSet::AddSample ( int  unichar_id,
TrainingSample sample 
)

◆ charsetsize()

int tesseract::TrainingSampleSet::charsetsize ( ) const
inline

◆ ClusterDistance()

float tesseract::TrainingSampleSet::ClusterDistance ( int  font_id1,
int  class_id1,
int  font_id2,
int  class_id2,
const IntFeatureMap feature_map 
)

◆ ComputeCanonicalFeatures()

void tesseract::TrainingSampleSet::ComputeCanonicalFeatures ( )

◆ ComputeCanonicalSamples()

void tesseract::TrainingSampleSet::ComputeCanonicalSamples ( const IntFeatureMap map,
bool  debug 
)

◆ ComputeCloudFeatures()

void tesseract::TrainingSampleSet::ComputeCloudFeatures ( int  feature_space_size)

◆ ComputeClusterDistance()

float tesseract::TrainingSampleSet::ComputeClusterDistance ( int  font_id1,
int  class_id1,
int  font_id2,
int  class_id2,
const IntFeatureMap feature_map 
) const

◆ DeleteableSample()

bool tesseract::TrainingSampleSet::DeleteableSample ( const TrainingSample sample)

◆ DeleteDeadSamples()

void tesseract::TrainingSampleSet::DeleteDeadSamples ( )

◆ DeSerialize()

bool tesseract::TrainingSampleSet::DeSerialize ( bool  swap,
FILE *  fp 
)

◆ DisplaySamplesWithFeature()

void tesseract::TrainingSampleSet::DisplaySamplesWithFeature ( int  f_index,
const Shape shape,
const IntFeatureSpace feature_space,
ScrollView::Color  color,
ScrollView window 
) const

◆ extract_sample()

TrainingSample* tesseract::TrainingSampleSet::extract_sample ( int  index)
inline

◆ fontinfo_table()

const FontInfoTable& tesseract::TrainingSampleSet::fontinfo_table ( ) const
inline

◆ GetCanonicalDist()

float tesseract::TrainingSampleSet::GetCanonicalDist ( int  font_id,
int  class_id 
) const

◆ GetCanonicalFeatures()

const GenericVector< int > & tesseract::TrainingSampleSet::GetCanonicalFeatures ( int  font_id,
int  class_id 
) const

◆ GetCanonicalSample()

const TrainingSample * tesseract::TrainingSampleSet::GetCanonicalSample ( int  font_id,
int  class_id 
) const

◆ GetCloudFeatures()

const BitVector & tesseract::TrainingSampleSet::GetCloudFeatures ( int  font_id,
int  class_id 
) const

◆ GetSample() [1/2]

const TrainingSample * tesseract::TrainingSampleSet::GetSample ( int  index) const

◆ GetSample() [2/2]

const TrainingSample * tesseract::TrainingSampleSet::GetSample ( int  font_id,
int  class_id,
int  index 
) const

◆ GlobalSampleIndex()

int tesseract::TrainingSampleSet::GlobalSampleIndex ( int  font_id,
int  class_id,
int  index 
) const

◆ IndexFeatures()

void tesseract::TrainingSampleSet::IndexFeatures ( const IntFeatureSpace feature_space)

◆ KillSample()

void tesseract::TrainingSampleSet::KillSample ( TrainingSample sample)

◆ LoadUnicharset()

void tesseract::TrainingSampleSet::LoadUnicharset ( const char *  filename)

◆ mutable_sample()

TrainingSample* tesseract::TrainingSampleSet::mutable_sample ( int  index)
inline

◆ MutableSample()

TrainingSample * tesseract::TrainingSampleSet::MutableSample ( int  font_id,
int  class_id,
int  index 
)

◆ num_raw_samples()

int tesseract::TrainingSampleSet::num_raw_samples ( ) const
inline

◆ num_samples()

int tesseract::TrainingSampleSet::num_samples ( ) const
inline

◆ NumClassSamples()

int tesseract::TrainingSampleSet::NumClassSamples ( int  font_id,
int  class_id,
bool  randomize 
) const

◆ NumFonts()

int tesseract::TrainingSampleSet::NumFonts ( ) const
inline

◆ OrganizeByFontAndClass()

void tesseract::TrainingSampleSet::OrganizeByFontAndClass ( )

◆ ReliablySeparable()

int tesseract::TrainingSampleSet::ReliablySeparable ( int  font_id1,
int  class_id1,
int  font_id2,
int  class_id2,
const IntFeatureMap feature_map,
bool  thorough 
) const

◆ ReplicateAndRandomizeSamples()

void tesseract::TrainingSampleSet::ReplicateAndRandomizeSamples ( )

◆ SampleToString()

STRING tesseract::TrainingSampleSet::SampleToString ( const TrainingSample sample) const

◆ Serialize()

bool tesseract::TrainingSampleSet::Serialize ( FILE *  fp) const

◆ SetupFontIdMap()

void tesseract::TrainingSampleSet::SetupFontIdMap ( )

◆ UnicharDistance()

float tesseract::TrainingSampleSet::UnicharDistance ( const UnicharAndFonts uf1,
const UnicharAndFonts uf2,
bool  matched_fonts,
const IntFeatureMap feature_map 
)

◆ unicharset()

const UNICHARSET& tesseract::TrainingSampleSet::unicharset ( ) const
inline

Member Data Documentation

◆ font_class_array_

GENERIC_2D_ARRAY<FontClassInfo>* tesseract::TrainingSampleSet::font_class_array_
private

◆ font_id_map_

IndexMapBiDi tesseract::TrainingSampleSet::font_id_map_
private

◆ fontinfo_table_

const FontInfoTable& tesseract::TrainingSampleSet::fontinfo_table_
private

◆ num_raw_samples_

int tesseract::TrainingSampleSet::num_raw_samples_
private

◆ samples_

PointerVector<TrainingSample> tesseract::TrainingSampleSet::samples_
private

◆ unicharset_

UNICHARSET tesseract::TrainingSampleSet::unicharset_
private

◆ unicharset_size_

int tesseract::TrainingSampleSet::unicharset_size_
private

The documentation for this class was generated from the following files: