22 #ifndef TESSERACT_TRAINING_MASTERTRAINER_H_ 23 #define TESSERACT_TRAINING_MASTERTRAINER_H_ 32 #include "errorcounter.h" 35 #include "indexmapbidi.h" 36 #include "intfeaturespace.h" 37 #include "intfeaturemap.h" 38 #include "intmatcher.h" 40 #include "shapetable.h" 41 #include "trainingsample.h" 42 #include "trainingsampleset.h" 43 #include "unicharset.h" 47 class ShapeClassifier;
72 bool replicate_samples,
int debug_level);
79 void LoadUnicharset(
const char* filename);
84 feature_map_.
Init(fs);
90 void ReadTrainingSamples(
const char* page_name,
96 void AddSample(
bool verification,
const char* unichar_str,
102 void LoadPageImages(
const char* filename);
109 void PostLoadCleanup();
114 void PreTrainingSetup();
118 void SetupMasterShapes();
138 void ReplicateAndRandomizeSamplesIfRequired();
142 bool LoadFontInfo(
const char* filename);
146 bool LoadXHeights(
const char* filename);
150 bool AddSpacingInfo(
const char *filename);
154 int GetFontInfoId(
const char* font_name);
158 int GetBestMatchingFontInfoId(
const char* filename);
163 return tr_filenames_[index];
167 void SetupFlatShapeTable(
ShapeTable* shape_table);
173 int shape_id,
int* num_samples);
179 void WriteInttempAndPFFMTable(
const UNICHARSET& unicharset,
183 const char* inttemp_file,
184 const char* pffmtable_file);
187 return samples_.unicharset();
193 return master_shapes_;
198 void DebugCanonical(
const char* unichar_str1,
const char* unichar_str2);
199 #ifndef GRAPHICS_DISABLED 210 void DisplaySamples(
const char* unichar_str1,
int cloud_font,
211 const char* unichar_str2,
int canonical_font);
212 #endif // GRAPHICS_DISABLED 214 void TestClassifierVOld(
bool replicate_samples,
220 void TestClassifierOnSamples(
CountTypes error_mode,
222 bool replicate_samples,
240 bool replicate_samples,
248 float ShapeDistance(
const ShapeTable& shapes,
int s1,
int s2);
253 void ReplaceFragmentedSamples();
260 void ClusterShapes(
int min_shapes,
int max_shape_unichars,
309 #endif // TESSERACT_TRAINING_MASTERTRAINER_H_ Definition: mastertrainer.h:69
ShapeDist(int s1, int s2, float dist)
Definition: mastertrainer.h:52
ShapeTable master_shapes_
Definition: mastertrainer.h:273
int * fragments_
Definition: mastertrainer.h:291
int prev_unichar_id_
Definition: mastertrainer.h:293
Definition: intfeaturespace.h:38
Definition: mastertrainer.h:50
GenericVector< STRING > tr_filenames_
Definition: mastertrainer.h:304
const ShapeTable & master_shapes() const
Definition: mastertrainer.h:192
TrainingSampleSet verify_samples_
Definition: mastertrainer.h:271
Definition: trainingsampleset.h:43
int charsetsize_
Definition: mastertrainer.h:284
TrainingSampleSet samples_
Definition: mastertrainer.h:269
const STRING & GetTRFileName(int index) const
Definition: mastertrainer.h:162
Definition: unicharset.h:146
GenericVector< int32_t > xheights_
Definition: mastertrainer.h:279
ShapeTable flat_shapes_
Definition: mastertrainer.h:275
int debug_level_
Definition: mastertrainer.h:295
Definition: intfeaturemap.h:48
bool enable_shape_analysis_
Definition: mastertrainer.h:287
Definition: baseapi.cpp:94
TrainingSampleSet junk_samples_
Definition: mastertrainer.h:270
int shape2
Definition: mastertrainer.h:61
int shape1
Definition: mastertrainer.h:60
IntFeatureMap feature_map_
Definition: mastertrainer.h:298
void SetFeatureSpace(const IntFeatureSpace &fs)
Definition: mastertrainer.h:82
float distance
Definition: mastertrainer.h:62
bool Serialize(FILE *fp, const char *data, size_t n)
Definition: serialis.cpp:59
Definition: featdefs.h:46
IntFeatureSpace feature_space_
Definition: mastertrainer.h:268
TrainingSampleSet * GetSamples()
Definition: mastertrainer.h:189
void Init(uint8_t xbuckets, uint8_t ybuckets, uint8_t thetabuckets)
Definition: intfeaturespace.cpp:29
Definition: shapetable.h:262
Definition: fontinfo.h:146
Definition: shapeclassifier.h:43
NormalizationMode norm_mode_
Definition: mastertrainer.h:264
NormalizationMode
Definition: normalis.h:42
bool operator<(const ShapeDist &other) const
Definition: mastertrainer.h:56
bool enable_replication_
Definition: mastertrainer.h:289
FontInfoTable fontinfo_table_
Definition: mastertrainer.h:277
GenericVector< Pix * > page_images_
Definition: mastertrainer.h:302
const UNICHARSET & unicharset() const
Definition: mastertrainer.h:186
Definition: trainingsample.h:53
CountTypes
Definition: errorcounter.h:69
ShapeDist()
Definition: mastertrainer.h:51
UNICHARSET unicharset_
Definition: mastertrainer.h:266