tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
unicharset_training_utils.h
1 // File: unicharset_training_utils.h
3 // Description: Training utilities for UNICHARSET.
4 // Author: Ray Smith
5 // Created: Fri Oct 17 17:14:01 PDT 2014
6 //
7 // (C) Copyright 2014, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
21 #define TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
22 
23 #include <string>
24 
25 #include "platform.h"
26 
27 class STATS;
28 class UNICHARSET;
29 
30 namespace tesseract {
31 
32 // Helper sets the character attribute properties and sets up the script table.
33 // Does not set tops and bottoms.
34 void SetupBasicProperties(bool report_errors, bool decompose,
35  UNICHARSET* unicharset);
36 // Default behavior is to compose, until it is proven that decomposed benefits
37 // at least one language.
38 inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) {
39  SetupBasicProperties(report_errors, false, unicharset);
40 }
41 // Helper sets the properties from universal script unicharsets, if found.
42 void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset);
43 // Helper gets the combined x-heights string.
44 std::string GetXheightString(const std::string& script_dir, const UNICHARSET& unicharset);
45 
46 // Helper to set the properties for an input unicharset file, writes to the
47 // output file. If an appropriate script unicharset can be found in the
48 // script_dir directory, then the tops and bottoms are expanded using the
49 // script unicharset.
50 // If non-empty, xheight data for the fonts are written to the xheights_file.
51 void SetPropertiesForInputFile(const std::string& script_dir,
52  const std::string& input_unicharset_file,
53  const std::string& output_unicharset_file,
54  const std::string& output_xheights_file);
55 
56 } // namespace tesseract.
57 
58 #endif // TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
void SetPropertiesForInputFile(const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)
Definition: unicharset_training_utils.cpp:183
Definition: unicharset.h:146
Definition: baseapi.cpp:94
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
Definition: unicharset_training_utils.cpp:40
std::string GetXheightString(const std::string &script_dir, const UNICHARSET &unicharset)
Definition: unicharset_training_utils.cpp:164
Definition: statistc.h:33
void SetScriptProperties(const std::string &script_dir, UNICHARSET *unicharset)
Definition: unicharset_training_utils.cpp:143