16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLESET_H_ 17 #define TESSERACT_TRAINING_TRAININGSAMPLESET_H_ 19 #include "bitvector.h" 20 #include "genericvector.h" 21 #include "indexmapbidi.h" 23 #include "shapetable.h" 24 #include "trainingsample.h" 33 class IntFeatureSpace;
35 struct UnicharAndFonts;
125 int font_id2,
int class_id2,
130 int font_id2,
int class_id2,
144 int font_id2,
int class_id2,
146 bool thorough)
const;
289 #endif // TRAININGSAMPLESETSET_H_ const FontInfoTable & fontinfo_table_
Definition: trainingsampleset.h:283
int num_raw_samples_
Definition: trainingsampleset.h:268
int unichar_id
Definition: trainingsampleset.h:226
virtual int SparseSize() const
Definition: indexmapbidi.h:142
Definition: intfeaturespace.h:38
bool DeleteableSample(const TrainingSample *sample)
Definition: trainingsampleset.cpp:506
TrainingSampleSet(const FontInfoTable &fontinfo_table)
Definition: trainingsampleset.cpp:70
int num_samples() const
Definition: trainingsampleset.h:55
void KillSample(TrainingSample *sample)
Definition: trainingsampleset.cpp:492
void LoadUnicharset(const char *filename)
Definition: trainingsampleset.cpp:113
void IndexFeatures(const IntFeatureSpace &feature_space)
Definition: trainingsampleset.cpp:485
bool DeSerialize(bool swap, FILE *fp)
Definition: trainingsampleset.cpp:94
int GlobalSampleIndex(int font_id, int class_id, int index) const
Definition: trainingsampleset.cpp:452
Definition: shapetable.h:185
float distance
Definition: trainingsampleset.h:228
float ClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map)
Definition: trainingsampleset.cpp:296
void OrganizeByFontAndClass()
Definition: trainingsampleset.cpp:511
int num_raw_samples() const
Definition: trainingsampleset.h:58
Definition: trainingsampleset.h:43
int charsetsize() const
Definition: trainingsampleset.h:67
const TrainingSample * GetSample(int index) const
Definition: trainingsampleset.cpp:174
float GetCanonicalDist(int font_id, int class_id) const
Definition: trainingsampleset.cpp:474
void SetupFontIdMap()
Definition: trainingsampleset.cpp:548
void ComputeCloudFeatures(int feature_space_size)
Definition: trainingsampleset.cpp:711
UNICHARSET unicharset_
Definition: trainingsampleset.h:270
Definition: intsimdmatrix.h:25
Definition: unicharset.h:146
STRING SampleToString(const TrainingSample &sample) const
Definition: trainingsampleset.cpp:202
Definition: shapetable.h:160
float ComputeClusterDistance(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map) const
Definition: trainingsampleset.cpp:367
int32_t canonical_sample
Definition: trainingsampleset.h:243
int NumFonts() const
Definition: trainingsampleset.h:61
Definition: intfeaturemap.h:48
GenericVector< FontClassDistance > distance_cache
Definition: trainingsampleset.h:263
Definition: baseapi.cpp:94
const GenericVector< int > & GetCanonicalFeatures(int font_id, int class_id) const
Definition: trainingsampleset.cpp:219
void ComputeCanonicalFeatures()
Definition: trainingsampleset.cpp:693
int font_id
Definition: trainingsampleset.h:227
GenericVector< float > font_distance_cache
Definition: trainingsampleset.h:258
IndexMapBiDi font_id_map_
Definition: trainingsampleset.h:276
void ReplicateAndRandomizeSamples()
Definition: trainingsampleset.cpp:664
int NumClassSamples(int font_id, int class_id, bool randomize) const
Definition: trainingsampleset.cpp:156
~TrainingSampleSet()
Definition: trainingsampleset.cpp:75
const FontInfoTable & fontinfo_table() const
Definition: trainingsampleset.h:70
TrainingSample * mutable_sample(int index)
Definition: trainingsampleset.h:161
int ReliablySeparable(int font_id1, int class_id1, int font_id2, int class_id2, const IntFeatureMap &feature_map, bool thorough) const
Definition: trainingsampleset.cpp:413
Definition: indexmapbidi.h:102
float UnicharDistance(const UnicharAndFonts &uf1, const UnicharAndFonts &uf2, bool matched_fonts, const IntFeatureMap &feature_map)
Definition: trainingsampleset.cpp:230
GenericVector< int > canonical_features
Definition: trainingsampleset.h:251
const UNICHARSET & unicharset() const
Definition: trainingsampleset.h:64
const BitVector & GetCloudFeatures(int font_id, int class_id) const
Definition: trainingsampleset.cpp:211
Definition: bitvector.h:33
void DeleteDeadSamples()
Definition: trainingsampleset.cpp:497
GenericVector< int32_t > samples
Definition: trainingsampleset.h:247
Definition: fontinfo.h:146
GenericVector< float > unichar_distance_cache
Definition: trainingsampleset.h:260
PointerVector< TrainingSample > samples_
Definition: trainingsampleset.h:266
Definition: trainingsampleset.h:231
void DisplaySamplesWithFeature(int f_index, const Shape &shape, const IntFeatureSpace &feature_space, ScrollView::Color color, ScrollView *window) const
Definition: trainingsampleset.cpp:742
TrainingSample * extract_sample(int index)
Definition: trainingsampleset.h:165
bool Serialize(FILE *fp) const
Definition: trainingsampleset.cpp:80
Definition: genericvector.h:457
float canonical_dist
Definition: trainingsampleset.h:245
int32_t num_raw_samples
Definition: trainingsampleset.h:241
Definition: trainingsampleset.h:225
void ComputeCanonicalSamples(const IntFeatureMap &map, bool debug)
Definition: trainingsampleset.cpp:568
int unicharset_size_
Definition: trainingsampleset.h:272
void AddAllFontsForClass(int class_id, Shape *shape) const
Definition: trainingsampleset.cpp:733
BitVector cloud_features
Definition: trainingsampleset.h:253
Definition: trainingsample.h:53
GENERIC_2D_ARRAY< FontClassInfo > * font_class_array_
Definition: trainingsampleset.h:279
TrainingSample * MutableSample(int font_id, int class_id, int index)
Definition: trainingsampleset.cpp:191
const TrainingSample * GetCanonicalSample(int font_id, int class_id) const
Definition: trainingsampleset.cpp:462
int AddSample(const char *unichar, TrainingSample *sample)
Definition: trainingsampleset.cpp:129