Typedefs | |
typedef int(Dict::* | DictFunc) (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const |
typedef double(Dict::* | ProbabilityInContextFunc) (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
typedef float(Dict::* | ParamsModelClassifyFunc) (const char *lang, void *path) |
typedef void(Wordrec::* | FillLatticeFunc) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
typedef TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > | TruthCallback |
using | SetOfModels = GenericVectorEqEq< const ParagraphModel * > |
typedef void(Tesseract::* | WordRecognizer) (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) |
using | ParamsTrainingHypothesisList = GenericVector< ParamsTrainingHypothesis > |
using | UnicharIdVector = GenericVector< UNICHAR_ID > |
using | UnicharAmbigsVector = GenericVector< AmbigSpec_LIST * > |
typedef bool(* | FileReader) (const STRING &filename, GenericVector< char > *data) |
typedef bool(* | FileWriter) (const GenericVector< char > &data, const STRING &filename) |
using | IntKDPair = KDPairInc< int, int > |
using | char32 = signed int |
using | RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > >> |
using | RSCounts = std::unordered_map< int, int > |
using | ShapeQueue = GenericHeap< ShapeQueueEntry > |
using | NodeChildVector = GenericVector< NodeChild > |
using | SuccessorList = GenericVector< int > |
using | SuccessorListsVector = GenericVector< SuccessorList * > |
using | DawgVector = GenericVector< Dawg * > |
typedef TessResultCallback2< bool, const GenericVector< char > &, LSTMTrainer * > * | CheckPointReader |
typedef TessResultCallback3< bool, SerializeAmount, const LSTMTrainer *, GenericVector< char > * > * | CheckPointWriter |
typedef TessResultCallback4< STRING, int, const double *, const TessdataManager &, int > * | TestCallback |
using | RecodePair = KDPairInc< double, RecodeNode > |
using | RecodeHeap = GenericHeap< RecodePair > |
using | BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > |
using | ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > |
using | PartSetVector = GenericVector< ColPartitionSet * > |
using | WidthCallback = TessResultCallback1< bool, int > |
using | ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT > |
using | ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT > |
using | WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT > |
using | WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT > |
using | LigHash = std::unordered_map< std::string, std::string, StringHash > |
using | PainPointHeap = GenericHeap< MatrixCoordPair > |
using | LanguageModelFlagsType = unsigned char |
Used for expressing various language model flags. More... | |
Functions | |
static void | addAvailableLanguages (const STRING &datadir, const STRING &base, GenericVector< STRING > *langs) |
static int | CompareSTRING (const void *p1, const void *p2) |
static tesseract::Orientation | GetBlockTextOrientation (const PageIterator *it) |
static void | AddBaselineCoordsTohOCR (const PageIterator *it, PageIteratorLevel level, STRING *hocr_str) |
static void | AddIdTohOCR (STRING *hocr_str, const std::string base, int num1, int num2) |
static void | AddIdTohOCR (STRING *hocr_str, const std::string base, int num1, int num2, int num3) |
static void | AddBoxTohOCR (const ResultIterator *it, PageIteratorLevel level, STRING *hocr_str) |
static void | AddBoxToTSV (const PageIterator *it, PageIteratorLevel level, STRING *hocr_str) |
STRING | HOcrEscape (const char *text) |
static TBLOB * | make_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix) |
static void | add_space (TESS_CHAR_IT *it) |
static float | rating_to_cost (float rating) |
static void | extract_result (TESS_CHAR_IT *out, PAGE_RES *page_res) |
static double | prec (double x) |
static long | dist2 (int x1, int y1, int x2, int y2) |
static void | GetWordBaseline (int writing_direction, int ppi, int height, int word_x1, int word_y1, int word_x2, int word_y2, int line_x1, int line_y1, int line_x2, int line_y2, double *x0, double *y0, double *length) |
static void | AffineMatrix (int writing_direction, int line_x1, int line_y1, int line_x2, int line_y2, double *a, double *b, double *c, double *d) |
static void | ClipBaseline (int ppi, int x1, int y1, int x2, int y2, int *line_x1, int *line_y1, int *line_x2, int *line_y2) |
static bool | CodepointToUtf16be (int code, char utf16[kMaxBytesPerCodepoint]) |
double | DotProductAVX (const double *u, const double *v, int n) |
double | DotProductSSE (const double *u, const double *v, int n) |
int32_t | IntDotProductSSE (const int8_t *u, const int8_t *v, int n) |
static void | clear_any_old_text (BLOCK_LIST *block_list) |
static double | MedianXHeight (BLOCK_LIST *block_list) |
static double | BoxMissMetric (const TBOX &box1, const TBOX &box2) |
static void | WordGap (const PointerVector< WERD_RES > &words, int index, int *right, int *next_left) |
static void | EvaluateWordSpan (const PointerVector< WERD_RES > &words, int first_index, int end_index, float *rating, float *certainty, bool *bad, bool *valid_permuter) |
static int | SelectBestWords (double rating_ratio, double certainty_margin, bool debug, PointerVector< WERD_RES > *new_words, PointerVector< WERD_RES > *best_words) |
static bool | WordsAcceptable (const PointerVector< WERD_RES > &words) |
static BLOB_CHOICE * | FindBestMatchingChoice (UNICHAR_ID char_id, WERD_RES *word_res) |
static void | CorrectRepcharChoices (BLOB_CHOICE *blob_choice, WERD_RES *word_res) |
static void | find_modal_font (STATS *fonts, int16_t *font_out, int8_t *font_count) |
static int | SortCPByTopReverse (const void *p1, const void *p2) |
static int | SortCPByBottom (const void *p1, const void *p2) |
static int | SortCPByHeight (const void *p1, const void *p2) |
bool | IsTextOrEquationType (PolyBlockType type) |
bool | IsLeftIndented (const EquationDetect::IndentType type) |
bool | IsRightIndented (const EquationDetect::IndentType type) |
static int | c_blob_comparator (const void *blob1p, const void *blob2p) |
static Pix * | RemoveEnclosingCircle (Pix *pixs) |
static void | AddAllScriptsConverted (const UNICHARSET &sid_set, const UNICHARSET &osd_set, GenericVector< int > *allowed_ids) |
static bool | LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification j) |
static int | Epsilon (int space_pix) |
static bool | AcceptableRowArgs (int debug_level, int min_num_rows, const char *function_name, const GenericVector< RowScratchRegisters > *rows, int row_start, int row_end) |
static STRING | StrOf (int num) |
static void | PrintTable (const GenericVector< GenericVector< STRING > > &rows, const STRING &colsep) |
static STRING | RtlEmbed (const STRING &word, bool rtlify) |
static void | PrintDetectorState (const ParagraphTheory &theory, const GenericVector< RowScratchRegisters > &rows) |
static void | DebugDump (bool should_print, const STRING &phase, const ParagraphTheory &theory, const GenericVector< RowScratchRegisters > &rows) |
static void | PrintRowRange (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end) |
static bool | IsLatinLetter (int ch) |
static bool | IsDigitLike (int ch) |
static bool | IsOpeningPunct (int ch) |
static bool | IsTerminalPunct (int ch) |
static const char * | SkipChars (const char *str, const char *toskip) |
static const char * | SkipChars (const char *str, bool(*skip)(int)) |
static const char * | SkipOne (const char *str, const char *toskip) |
static bool | LikelyListNumeral (const STRING &word) |
static bool | LikelyListMark (const STRING &word) |
bool | AsciiLikelyListItem (const STRING &word) |
int | UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos) |
static bool | LikelyListMarkUnicode (int ch) |
static bool | UniLikelyListItem (const UNICHARSET *u, const WERD_CHOICE *werd) |
void | LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea) |
void | RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea) |
static int | ClosestCluster (const GenericVector< Cluster > &clusters, int value) |
static void | CalculateTabStops (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, int tolerance, GenericVector< Cluster > *left_tabs, GenericVector< Cluster > *right_tabs) |
static void | MarkRowsWithModel (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, const ParagraphModel *model, bool ltr, int eop_threshold) |
static void | GeometricClassifyThreeTabStopTextBlock (int debug_level, GeometricClassifierState &s, ParagraphTheory *theory) |
static void | GeometricClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
bool | ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model) |
bool | ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model) |
bool | CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model) |
static void | DiscardUnusedModels (const GenericVector< RowScratchRegisters > &rows, ParagraphTheory *theory) |
static void | DowngradeWeakestToCrowns (int debug_level, ParagraphTheory *theory, GenericVector< RowScratchRegisters > *rows) |
void | RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile) |
int | InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end) |
bool | FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification) |
bool | FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after) |
static bool | TextSupportsBreak (const RowScratchRegisters &before, const RowScratchRegisters &after) |
static ParagraphModel | InternalParagraphModelByOutline (const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance, bool *consistent) |
static ParagraphModel | ParagraphModelByOutline (int debug_level, const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance) |
bool | RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model) |
static void | MarkStrongEvidence (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end) |
static void | ModelStrongEvidence (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, bool allow_flush_models, ParagraphTheory *theory) |
static void | StrongEvidenceClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
static void | SeparateSimpleLeaderLines (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
static void | ConvertHypothesizedModelRunsToParagraphs (int debug_level, const GenericVector< RowScratchRegisters > &rows, GenericVector< PARA *> *row_owners, ParagraphTheory *theory) |
static bool | RowIsStranded (const GenericVector< RowScratchRegisters > &rows, int row) |
static void | LeftoverSegments (const GenericVector< RowScratchRegisters > &rows, GenericVector< Interval > *to_fix, int row_start, int row_end) |
void | CanonicalizeDetectionResults (GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs) |
void | DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel *> *models) |
static void | InitializeTextAndBoxesPreRecognition (const MutableIterator &it, RowInfo *info) |
static void | InitializeRowInfo (bool after_recognition, const MutableIterator &it, RowInfo *info) |
void | DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel *> *models) |
bool | StrongModel (const ParagraphModel *model) |
static bool | read_t (PAGE_RES_IT *page_res_it, TBOX *tbox) |
static void | PrintPath (int length, const BLOB_CHOICE **blob_choices, const UNICHARSET &unicharset, const char *label, FILE *output_file) |
static void | PrintMatrixPaths (int col, int dim, const MATRIX &ratings, int length, const BLOB_CHOICE **blob_choices, const UNICHARSET &unicharset, const char *label, FILE *output_file) |
static void | PrintScriptDirs (const GenericVector< StrongScriptDirection > &dirs) |
static void | YOutlierPieces (WERD_RES *word, int rebuilt_blob_index, int super_y_bottom, int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers, ScriptPos *trailing_pos, int *num_trailing_outliers) |
static bool | IsStrInList (const STRING &str, const GenericVector< STRING > &str_list) |
static void | CollectFonts (const UnicityTable< FontInfo > &new_fonts, UnicityTable< FontInfo > *all_fonts) |
static void | AssignIds (const UnicityTable< FontInfo > &all_fonts, UnicityTable< FontInfo > *lang_fonts) |
bool | CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2) |
bool | CompareFontSet (const FontSet &fs1, const FontSet &fs2) |
void | FontInfoDeleteCallback (FontInfo f) |
void | FontSetDeleteCallback (FontSet fs) |
bool | read_info (TFile *f, FontInfo *fi) |
bool | write_info (FILE *f, const FontInfo &fi) |
bool | read_spacing_info (TFile *f, FontInfo *fi) |
bool | write_spacing_info (FILE *f, const FontInfo &fi) |
bool | read_set (TFile *f, FontSet *fs) |
bool | write_set (FILE *f, const FontSet &fs) |
void * | ReCachePagesFunc (void *data) |
int | OtsuThreshold (Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values) |
void | HistogramRect (Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram) |
int | OtsuStats (const int *histogram, int *H_out, int *omega0_out) |
int | ParamsTrainingFeatureByName (const char *name) |
bool | PSM_OSD_ENABLED (int pageseg_mode) |
bool | PSM_ORIENTATION_ENABLED (int pageseg_mode) |
bool | PSM_COL_FIND_ENABLED (int pageseg_mode) |
bool | PSM_SPARSE (int pageseg_mode) |
bool | PSM_BLOCK_FIND_ENABLED (int pageseg_mode) |
bool | PSM_LINE_FIND_ENABLED (int pageseg_mode) |
bool | PSM_WORD_FIND_ENABLED (int pageseg_mode) |
const char * | ScriptPosToString (enum ScriptPos script_pos) |
void | AmbigSpec_zapper (ELIST_LINK *link) |
bool | LoadDataFromFile (const char *filename, GenericVector< char > *data) |
bool | LoadDataFromFile (const STRING &filename, GenericVector< char > *data) |
bool | SaveDataToFile (const GenericVector< char > &data, const STRING &filename) |
bool | LoadFileLinesToStrings (const STRING &filename, GenericVector< STRING > *lines) |
template<typename T > | |
bool | cmp_eq (T const &t1, T const &t2) |
template<typename T > | |
int | sort_cmp (const void *t1, const void *t2) |
template<typename T > | |
int | sort_ptr_cmp (const void *t1, const void *t2) |
bool | DeSerialize (FILE *fp, char *data, size_t n) |
bool | DeSerialize (FILE *fp, float *data, size_t n) |
bool | DeSerialize (FILE *fp, int8_t *data, size_t n) |
bool | DeSerialize (FILE *fp, int16_t *data, size_t n) |
bool | DeSerialize (FILE *fp, int32_t *data, size_t n) |
bool | DeSerialize (FILE *fp, uint8_t *data, size_t n) |
bool | DeSerialize (FILE *fp, uint16_t *data, size_t n) |
bool | DeSerialize (FILE *fp, uint32_t *data, size_t n) |
bool | Serialize (FILE *fp, const char *data, size_t n) |
bool | Serialize (FILE *fp, const float *data, size_t n) |
bool | Serialize (FILE *fp, const int8_t *data, size_t n) |
bool | Serialize (FILE *fp, const int16_t *data, size_t n) |
bool | Serialize (FILE *fp, const int32_t *data, size_t n) |
bool | Serialize (FILE *fp, const uint8_t *data, size_t n) |
bool | Serialize (FILE *fp, const uint16_t *data, size_t n) |
bool | Serialize (FILE *fp, const uint32_t *data, size_t n) |
template<typename T , size_t N> | |
constexpr size_t | countof (T const (&)[N]) noexcept |
static int | RadicalPreHash (const std::vector< int > &rs) |
static bool | DecodeRadicalLine (STRING *radical_data_line, RSMap *radical_map) |
static bool | DecodeRadicalTable (STRING *radical_data, RSMap *radical_map) |
void | ExtractFontName (const STRING &filename, STRING *fontname) |
TrainingSample * | BlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features) |
static uint8_t | NormalizeDirection (uint8_t dir, const FCOORD &unnormed_pos, const DENORM &denorm, const DENORM *root_denorm) |
static FCOORD | MeanDirectionVector (const LLSQ &point_diffs, const LLSQ &dirs, const FCOORD &start_pt, const FCOORD &end_pt) |
static int | ComputeFeatures (const FCOORD &start_pt, const FCOORD &end_pt, double feature_length, GenericVector< INT_FEATURE_STRUCT > *features) |
static int | GatherPoints (const C_OUTLINE *outline, double feature_length, const DENORM &denorm, const DENORM *root_denorm, int start_index, int end_index, ICOORD *pos, FCOORD *pos_normed, LLSQ *points, LLSQ *dirs) |
static void | ExtractFeaturesFromRun (const EDGEPT *startpt, const EDGEPT *lastpt, const DENORM &denorm, double feature_length, bool force_poly, GenericVector< INT_FEATURE_STRUCT > *features) |
void | ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window) |
static void | AddNearFeatures (const IntFeatureMap &feature_map, int f, int levels, GenericVector< int > *good_features) |
static void | CallWithUTF8 (TessCallback1< const char *> *cb, const WERD_CHOICE *wc) |
static int | sort_strings_by_dec_length (const void *v1, const void *v2) |
static int | BestLabel (const GENERIC_2D_ARRAY< float > &outputs, int t) |
static double | LogSumExp (double ln_x, double ln_y) |
double | Tanh (double x) |
double | Logistic (double x) |
template<class Func > | |
void | FuncInplace (int n, double *inout) |
template<class Func > | |
void | FuncMultiply (const double *u, const double *v, int n, double *out) |
template<typename T > | |
void | SoftmaxInPlace (int n, T *inout) |
void | CopyVector (int n, const double *src, double *dest) |
void | AccumulateVector (int n, const double *src, double *dest) |
void | MultiplyVectorsInPlace (int n, const double *src, double *inout) |
void | MultiplyAccumulate (int n, const double *u, const double *v, double *out) |
void | SumVectors (int n, const double *v1, const double *v2, const double *v3, const double *v4, const double *v5, double *sum) |
template<typename T > | |
void | ZeroVector (int n, T *vec) |
template<typename T > | |
void | ClipVector (int n, T lower, T upper, T *vec) |
void | CodeInBinary (int n, int nf, double *vec) |
static uint32_t | ceil_log2 (uint32_t n) |
static void | SkipWhitespace (char **str) |
static NetworkType | NonLinearity (char func) |
static Network * | BuildFullyConnected (const StaticShape &input_shape, NetworkType type, const STRING &name, int depth) |
static void | ComputeBlackWhite (Pix *pix, float *black, float *white) |
static void | HistogramWeight (double weight, STATS *histogram) |
static bool | AtLeast2LineCrossings (BLOBNBOX_CLIST *blobs) |
static Pix * | GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) |
template<class BBC > | |
int | SortByBoxLeft (const void *void1, const void *void2) |
template<class BBC > | |
int | SortRightToLeft (const void *void1, const void *void2) |
template<class BBC > | |
int | SortByBoxBottom (const void *void1, const void *void2) |
static TBOX | AttemptBoxExpansion (const TBOX &box, const IntGrid &noise_density, int pad) |
BOOL_VAR (textord_tabfind_show_initial_partitions, false, "Show partition bounds") | |
BOOL_VAR (textord_tabfind_show_reject_blobs, false, "Show blobs rejected as noise") | |
INT_VAR (textord_tabfind_show_partitions, 0, "Show partition bounds, waiting if >1") | |
BOOL_VAR (textord_tabfind_show_columns, false, "Show column bounds") | |
BOOL_VAR (textord_tabfind_show_blocks, false, "Show final block bounds") | |
BOOL_VAR (textord_tabfind_find_tables, true, "run table detection") | |
static void | ReleaseAllBlobsAndDeleteUnused (BLOBNBOX_LIST *blobs) |
static TBOX | BoxFromHLine (const TabVector *hline) |
static void | ReflectBlobList (BLOBNBOX_LIST *bblobs) |
static void | RotateAndExplodeBlobList (const FCOORD &blob_rotation, BLOBNBOX_LIST *bblobs, STATS *widths, STATS *heights) |
BOOL_VAR_H (textord_tabfind_find_tables, false, "run table detection") | |
static void | ClipCoord (const ICOORD &bleft, const ICOORD &tright, ICOORD *pos) |
static TO_BLOCK * | MoveBlobsToBlock (bool vertical_text, int line_spacing, BLOCK *block, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) |
static int | MedianSpacing (int page_height, ColPartition_IT it) |
static bool | UpdateLeftMargin (const ColPartition &part, int *margin_left, int *margin_right) |
static bool | UpdateRightMargin (const ColPartition &part, int *margin_left, int *margin_right) |
BOOL_VAR (textord_tabfind_show_color_fit, false, "Show stroke widths") | |
static bool | OKMergeCandidate (const ColPartition *part, const ColPartition *candidate, bool debug) |
static int | IncreaseInOverlap (const ColPartition *merge1, const ColPartition *merge2, int ok_overlap, ColPartition_CLIST *parts) |
static bool | TestCompatibleCandidates (const ColPartition &part, bool debug, ColPartition_CLIST *candidates) |
static void | RemoveBadBox (BLOBNBOX *box, ColPartition *part, ColPartition_LIST *part_list) |
static void | ComputeSearchBoxAndScaling (BlobNeighbourDir direction, const TBOX &part_box, int min_padding, TBOX *search_box, ICOORD *dist_scaling) |
static bool | HScanForEdge (uint32_t *data, int wpl, int x_start, int x_end, int min_count, int mid_width, int max_count, int y_end, int y_step, int *y_start) |
static bool | VScanForEdge (uint32_t *data, int wpl, int y_start, int y_end, int min_count, int mid_width, int max_count, int x_end, int x_step, int *x_start) |
static void | AttemptToShrinkBox (const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box, Pix *pix, TBOX *slice) |
static void | CutChunkFromParts (const TBOX &box, const TBOX &im_box, const FCOORD &rotation, const FCOORD &rerotation, Pix *pix, ColPartition_LIST *part_list) |
static void | DivideImageIntoParts (const TBOX &im_box, const FCOORD &rotation, const FCOORD &rerotation, Pix *pix, ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) |
static int | ExpandImageLeft (const TBOX &box, int left_limit, ColPartitionGrid *part_grid) |
static int | ExpandImageRight (const TBOX &box, int right_limit, ColPartitionGrid *part_grid) |
static int | ExpandImageBottom (const TBOX &box, int bottom_limit, ColPartitionGrid *part_grid) |
static int | ExpandImageTop (const TBOX &box, int top_limit, ColPartitionGrid *part_grid) |
static int | ExpandImageDir (BlobNeighbourDir dir, const TBOX &im_box, const TBOX &limit_box, ColPartitionGrid *part_grid, TBOX *expanded_box) |
static void | MaximalImageBoundingBox (ColPartitionGrid *part_grid, TBOX *im_box) |
static void | DeletePartition (ColPartition *part) |
static bool | ExpandImageIntoParts (const TBOX &max_image_box, ColPartitionGridSearch *rectsearch, ColPartitionGrid *part_grid, ColPartition **part_ptr) |
static int | IntersectArea (const TBOX &box, ColPartition_LIST *part_list) |
static bool | TestWeakIntersectedPart (const TBOX &im_box, ColPartition_LIST *part_list, ColPartition *part) |
static void | EliminateWeakParts (const TBOX &im_box, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts, ColPartition_LIST *part_list) |
static bool | ScanForOverlappingText (ColPartitionGrid *part_grid, TBOX *box) |
static void | MarkAndDeleteImageParts (const FCOORD &rerotate, ColPartitionGrid *part_grid, ColPartition_LIST *image_parts, Pix *image_pix) |
static void | DeleteSmallImages (ColPartitionGrid *part_grid) |
static void | RemoveUnusedLineSegments (bool horizontal_lines, BLOBNBOX_LIST *line_bblobs, Pix *line_pix) |
static void | SubtractLinesAndResidue (Pix *line_pix, Pix *non_line_pix, int resolution, Pix *src_pix) |
static int | MaxStrokeWidth (Pix *pix) |
static int | NumTouchingIntersections (Box *line_box, Pix *intersection_pix) |
static int | CountPixelsAdjacentToLine (int line_width, Box *line_box, Pix *nonline_pix) |
static int | FilterFalsePositives (int resolution, Pix *nonline_pix, Pix *intersection_pix, Pix *line_pix) |
static Pix * | FilterMusic (int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pix_hline, l_int32 *v_empty, l_int32 *h_empty) |
INT_VAR (textord_tabfind_show_strokewidths, 0, "Show stroke widths") | |
BOOL_VAR (textord_tabfind_only_strokewidths, false, "Only run stroke widths") | |
static void | CollectHorizVertBlobs (BLOBNBOX_LIST *input_blobs, int *num_vertical_blobs, int *num_horizontal_blobs, BLOBNBOX_CLIST *vertical_blobs, BLOBNBOX_CLIST *horizontal_blobs, BLOBNBOX_CLIST *nondescript_blobs) |
static void | PrintBoxWidths (BLOBNBOX *neighbour) |
static int | UpperQuartileCJKSize (int gridsize, BLOBNBOX_LIST *blobs) |
static bool | AcceptableCJKMerge (const TBOX &bbox, const TBOX &nbox, bool debug, int max_size, int max_dist, int *x_gap, int *y_gap) |
static void | ListNeighbours (const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) |
static void | List2ndNeighbours (const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) |
static void | List3rdNeighbours (const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours) |
static void | CountNeighbourGaps (bool debug, BLOBNBOX_CLIST *neighbours, int *pure_h_count, int *pure_v_count) |
static void | CountNeighbourTypes (BLOBNBOX_CLIST *neighbours, int *pure_h_count, int *pure_v_count) |
static BLOBNBOX * | MutualUnusedVNeighbour (const BLOBNBOX *blob, BlobNeighbourDir dir) |
static BLOBNBOX * | MutualUnusedHNeighbour (const BLOBNBOX *blob, BlobNeighbourDir dir) |
static void | DrawDiacriticJoiner (const BLOBNBOX *blob, ScrollView *window) |
BOOL_VAR (textord_tabfind_show_initialtabs, false, "Show tab candidates") | |
BOOL_VAR (textord_tabfind_show_finaltabs, false, "Show tab vectors") | |
static void | DisplayBoxVector (const GenericVector< BLOBNBOX *> &boxes, ScrollView *win) |
BOOL_VAR (textord_show_tables, false, "Show table regions") | |
BOOL_VAR (textord_tablefind_show_mark, false, "Debug table marking steps in detail") | |
BOOL_VAR (textord_tablefind_show_stats, false, "Show page stats used in table finding") | |
BOOL_VAR (textord_tablefind_recognize_tables, false, "Enables the table recognizer for table layout and filtering.") | |
template<typename T > | |
void | DeleteObject (T *object) |
double_VAR (textord_tabvector_vertical_gap_fraction, 0.5, "max fraction of mean blob width allowed for vertical gaps in vertical text") | |
double_VAR (textord_tabvector_vertical_box_ratio, 0.5, "Fraction of box matches required to declare a line vertical") | |
double_VAR_H (textord_tabvector_vertical_gap_fraction, 0.5, "Max fraction of mean blob width allowed for vertical gaps in vertical text") | |
double_VAR_H (textord_tabvector_vertical_box_ratio, 0.5, "Fraction of box matches required to declare a line vertical") | |
static TBOX | BoundsWithinBox (Pix *pix, const TBOX &box) |
static void | TruncateBoxToMissNonText (int x_middle, int y_middle, bool split_on_x, Pix *nontext_map, TBOX *bbox) |
void | SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob) |
void | assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks) |
static bool | IntFlagExists (const char *flag_name, int32_t *value) |
static bool | DoubleFlagExists (const char *flag_name, double *value) |
static bool | BoolFlagExists (const char *flag_name, bool *value) |
static bool | StringFlagExists (const char *flag_name, const char **value) |
static void | SetIntFlagValue (const char *flag_name, const int32_t new_val) |
static void | SetDoubleFlagValue (const char *flag_name, const double new_val) |
static void | SetBoolFlagValue (const char *flag_name, const bool new_val) |
static void | SetStringFlagValue (const char *flag_name, const char *new_val) |
static bool | SafeAtoi (const char *str, int *val) |
static bool | SafeAtod (const char *str, double *val) |
static void | PrintCommandLineFlags () |
void | ParseCommandLineFlags (const char *usage, int *argc, char ***argv, const bool remove_flags) |
ShapeTable * | LoadShapeTable (const STRING &file_prefix) |
void | WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table) |
MasterTrainer * | LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix) |
static void | CheckSharedLibraryVersion () |
Pix * | DegradeImage (Pix *input, int exposure, TRand *randomizer, float *rotation) |
Pix * | PrepareDistortedPix (const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes) |
void | GeneratePerspectiveDistortion (int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes) |
int | ProjectiveCoeffs (int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs) |
bool | WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer) |
STRING | ReadFile (const std::string &filename, FileReader reader) |
bool | WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata) |
bool | WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING *radical_table_data, TessdataManager *traineddata) |
static bool | WriteDawg (const GenericVector< STRING > &words, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse_policy, TessdataType file_type, TessdataManager *traineddata) |
static bool | WriteDawgs (const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, const UNICHARSET &unicharset, TessdataManager *traineddata) |
int | CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer) |
static std::string | EncodeAsUTF8 (const char32 ch32) |
static bool | is_hyphen_punc (const char32 ch) |
static bool | is_single_quote (const char32 ch) |
static bool | is_double_quote (const char32 ch) |
static void | NormalizeUTF8ToUTF32 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, const char *str8, std::vector< char32 > *normed32) |
static void | StripJoiners (std::vector< char32 > *str32) |
bool | NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized) |
bool | NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes) |
char32 | OCRNormalize (char32 ch) |
bool | IsOCREquivalent (char32 ch1, char32 ch2) |
bool | IsValidCodepoint (const char32 ch) |
bool | IsWhitespace (const char32 ch) |
bool | IsUTF8Whitespace (const char *text) |
unsigned int | SpanUTF8Whitespace (const char *text) |
unsigned int | SpanUTF8NotWhitespace (const char *text) |
bool | IsInterchangeValid (const char32 ch) |
bool | IsInterchangeValid7BitAscii (const char32 ch) |
char32 | FullwidthToHalfwidth (const char32 ch) |
static void | ListFontFamilies (PangoFontFamily ***families, int *n_families) |
static char * | my_strnmove (char *dest, const char *src, size_t n) |
static bool | ShouldIgnoreFontFamilyName (const char *query) |
static void | CharCoverageMapToBitmap (PangoCoverage *coverage, std::vector< bool > *unichar_bitmap) |
static bool | IsCombiner (int ch) |
static std::string | EncodeAsUTF8 (const char32 ch32) |
static bool | RandBool (const double prob, TRand *rand) |
static Pix * | CairoARGB32ToPixFormat (cairo_surface_t *surface) |
static void | MergeBoxCharsToWords (std::vector< BoxChar *> *boxchars) |
static bool | IsWhitespaceBox (const BoxChar *boxchar) |
static std::string | StringReplace (const std::string &in, const std::string &oldsub, const std::string &newsub) |
static void | ExtractFontProperties (const std::string &utf8_text, StringRenderer *render, const std::string &output_base) |
static bool | MakeIndividualGlyphs (Pix *pix, const std::vector< BoxChar *> &vbox, const int input_tiff_page) |
static void | AddStringsToUnicharset (const GenericVector< STRING > &strings, int norm_mode, UNICHARSET *unicharset) |
static int | Main (int argc, char **argv) |
void | SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset) |
void | SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset) |
std::string | GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset) |
void | SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file) |
void | SetupBasicProperties (bool report_errors, UNICHARSET *unicharset) |
static bool | IsThaiConsonant (char32 ch) |
static bool | IsThaiBeforeConsonantVowel (char32 ch) |
static bool | IsThaiToneMark (char32 ch) |
static bool | IsThaiTonableVowel (char32 ch) |
static bool | CmpPairSecond (const std::pair< int, int > &p1, const std::pair< int, int > &p2) |
static void | ScanParentsForCaseMix (const UNICHARSET &unicharset, LanguageModelState *parent_node) |
static bool | HasBetterCaseVariant (const UNICHARSET &unicharset, const BLOB_CHOICE *choice, BLOB_CHOICE_LIST *choices) |
template<class BLOB_CHOICE > | |
int | SortByUnicharID (const void *void1, const void *void2) |
template<class BLOB_CHOICE > | |
int | SortByRating (const void *void1, const void *void2) |
Variables | |
const int | kMinRectSize = 10 |
const char | kTesseractReject = '~' |
const char | kUNLVReject = '~' |
const char | kUNLVSuspect = '^' |
const char * | kInputFile = "noname.tif" |
const char * | kOldVarsFile = "failed_vars.txt" |
const int | kMaxIntSize = 22 |
const int | kNumbersPerBlob = 5 |
const int | kBytesPerNumber = 5 |
const int | kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1 |
const int | kBytesPer64BitNumber = 20 |
const int | kMaxBytesPerLine |
const int | kUniChs [] |
const int | kLatinChs [] |
static const int | kBasicBufSize = 2048 |
static const int | kCharWidth = 2 |
static const int | kMaxBytesPerCodepoint = 20 |
const float | kMathDigitDensityTh1 = 0.25 |
const float | kMathDigitDensityTh2 = 0.1 |
const float | kMathItalicDensityTh = 0.5 |
const float | kUnclearDensityTh = 0.25 |
const int | kSeedBlobsCountTh = 10 |
const int | kLeftIndentAlignmentCountTh = 1 |
const int | kMaxCharTopRange = 48 |
const float | kCertaintyScale = 7.0f |
const float | kWorstDictCertainty = -25.0f |
const int | kMaxCircleErosions = 8 |
const ParagraphModel * | kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F) |
const ParagraphModel * | kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F) |
const int16_t | kMaxBoxEdgeDiff = 2 |
const int | kBoxClipTolerance = 2 |
const int | kNumEndPoints = 3 |
const int | kMinPointsForErrorCount = 16 |
const int | kMaxRealDistance = 2.0 |
const int | kFeaturePadding = 2 |
const int | kImagePadding = 4 |
const int | kHistogramSize = 256 |
static const int | kMaxSmallWordUnichars = 3 |
static const int | kMaxMediumWordUnichars = 6 |
static const char *const | kParamsTrainingFeatureTypeName [] |
const int | kMaxAmbigStringSize = 30 * ( 10 + 1) |
static const int | kUnigramAmbigsBufferSize = 1000 |
static const char | kAmbigNgramSeparator [] = { ' ', '\0' } |
static const char | kAmbigDelimiters [] = "\t " |
static const char | kIllegalMsg [] |
static const char | kIllegalUnicharMsg [] |
CCUtilMutex | tprintfMutex |
static const char *const | kTessdataFileSuffixes [] |
static const int | kMaxNumTessdataEntries = 1000 |
const char * | kNullChar = "<nul>" |
const int | kRadicalRadix = 29 |
const char * | kUTF8LineSeparator = "\u2028" |
const char * | kUTF8ParagraphSeparator = "\u2029" |
const char * | kLRM = "\u200E" |
const char * | kRLM = "\u200F" |
const char * | kRLE = "\u202A" |
const char * | kPDF = "\u202C" |
const char * | kHyphenLikeUTF8 [] |
const char * | kApostropheLikeUTF8 [] |
const char | kUniversalAmbigsFile [] |
const int | ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile) |
const double | kRatingEpsilon = 1.0 / 32 |
const int | kMaxOffsetDist = 32 |
static const int | kNumOffsetMaps = 2 |
const int | kMinClusteredShapes = 1 |
const int | kMaxUnicharsPerCluster = 2000 |
const float | kFontMergeDistance = 0.025 |
const float | kInfiniteDist = 999.0f |
const int | kRandomizingCenter = 128 |
static const int | kNumCNParams = 4 |
static const int | kSampleYShiftSize = 5 |
static const int | kSampleScaleSize = 3 |
static const int | kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2 |
const int | kTestChar = -1 |
const int | kSquareLimit = 25 |
const int | kPrime1 = 17 |
const int | kPrime2 = 13 |
static const int | kMinAbsoluteGarbageWordLength = 10 |
static const float | kMinAbsoluteGarbageAlphanumFrac = 0.5f |
const int | case_state_table [6][4] |
static const bool | kDawgSuccessors [DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] |
static const char | kWildcard [] = "*" |
static const int | kRatingPad = 4 |
static const char | kDictWildcard [] = "\u2606" |
static const int | kDictMaxWildcards = 2 |
static const char | kHyphenSymbol [] = "-" |
static const char | kSlashSymbol [] = "/" |
static const char | kQuestionSymbol [] = "?" |
static const char | kApostropheSymbol [] = "'" |
static const float | kSimCertaintyScale = -10.0 |
static const float | kSimCertaintyOffset = -10.0 |
static const float | kSimilarityFloor = 100.0 |
static const int | kDocDictMaxRepChars = 4 |
const char | kDoNotReverse [] = "RRP_DO_NO_REVERSE" |
const char | kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL" |
const char | kForceReverse [] = "RRP_FORCE_REVERSE" |
const char *const | RTLReversePolicyNames [] |
double | TanhTable [kTableSize] |
double | LogisticTable [kTableSize] |
const int | kTableSize = 4096 |
const double | kScaleFactor = 256.0 |
const int | kMaxInputHeight = 48 |
const double | kStateClip = 100.0 |
const double | kErrClip = 1.0f |
const double | kDictRatio = 2.25 |
const double | kCertOffset = -0.085 |
const double | kMinDivergenceRate = 50.0 |
const int | kMinStallIterations = 10000 |
const double | kSubTrainerMarginFraction = 3.0 / 128 |
const double | kLearningRateDecay = sqrt(0.5) |
const int | kNumAdjustmentIterations = 100 |
const int | kErrorGraphInterval = 1000 |
const int | kNumPagesPerBatch = 100 |
const int | kMinStartedErrorRate = 75 |
const double | kStageTransitionThreshold = 10.0 |
const double | kHighConfidence = 0.9375 |
const double | kImprovementFraction = 15.0 / 16.0 |
const double | kBestCheckpointFraction = 31.0 / 32.0 |
const int | kTargetXScale = 5 |
const int | kTargetYScale = 100 |
const int | kMinWinSize = 500 |
const int | kMaxWinSize = 2000 |
const int | kXWinFrameSize = 30 |
const int | kYWinFrameSize = 80 |
const float | kMinCertainty = -20.0f |
const float | kMinProb = exp(kMinCertainty) |
const char * | kNodeContNames [] = {"Anything", "OnlyDup", "NoDup"} |
const int | kAdamCorrectionIterations = 200000 |
const double | kAdamEpsilon = 1e-8 |
const int | kInt8Flag = 1 |
const int | kAdamFlag = 4 |
const int | kDoubleFlag = 128 |
const int | kHistogramBuckets = 16 |
const double | kAlignedFraction = 0.03125 |
const double | kRaggedFraction = 2.5 |
const double | kAlignedGapFraction = 0.75 |
const double | kRaggedGapFraction = 1.0 |
const int | kVLineAlignment = 3 |
const int | kVLineGutter = 1 |
const int | kVLineSearchSize = 150 |
const int | kMinRaggedTabs = 5 |
const int | kMinAlignedTabs = 4 |
const int | kVLineMinLength = 500 |
const double | kMinTabGradient = 4.0 |
const int | kMaxSkewFactor = 15 |
const double | kMaxSmallNeighboursPerPix = 1.0 / 32 |
const int | kMaxLargeOverlapsWithSmall = 3 |
const int | kMaxMediumOverlapsWithSmall = 12 |
const int | kMaxLargeOverlapsWithMedium = 12 |
const int | kOriginalNoiseMultiple = 8 |
const int | kNoisePadding = 4 |
const double | kPhotoOffsetFraction = 0.375 |
const double | kMinGoodTextPARatio = 1.5 |
const int | kMaxIncompatibleColumnCount = 2 |
const double | kHorizontalGapMergeFraction = 0.5 |
const double | kMinGutterWidthGrid = 0.5 |
const double | kMaxDistToPartSizeRatio = 1.5 |
const double | kMaxSpacingDrift = 1.0 / 72 |
const double | kMaxTopSpacingFraction = 0.25 |
const double | kMaxSameBlockLineSpacing = 3 |
const double | kMaxSizeRatio = 1.5 |
const double | kMaxLeaderGapFractionOfMax = 0.25 |
const double | kMaxLeaderGapFractionOfMin = 0.5 |
const int | kMinLeaderCount = 5 |
const int | kMinStrongTextValue = 6 |
const int | kMinChainTextValue = 3 |
const int | kHorzStrongTextlineCount = 8 |
const int | kHorzStrongTextlineHeight = 10 |
const int | kHorzStrongTextlineAspect = 5 |
const double | kMaxBaselineError = 0.4375 |
const double | kMinBaselineCoverage = 0.5 |
const int | kMaxRMSColorNoise = 128 |
const int | kMaxColorDistance = 900 |
static char | kBlobTypes [BRT_COUNT+1] = "NHSRIUVT" |
const int | kRGBRMSColors = 4 |
const int | kMaxPadFactor = 6 |
const int | kMaxNeighbourDistFactor = 4 |
const int | kMaxCaptionLines = 7 |
const double | kMinCaptionGapRatio = 2.0 |
const double | kMinCaptionGapHeightRatio = 0.5 |
const double | kMarginOverlapFraction = 0.25 |
const double | kBigPartSizeRatio = 1.75 |
const double | kTinyEnoughTextlineOverlapFraction = 0.25 |
const double | kMaxPartitionSpacing = 1.75 |
const int | kSmoothDecisionMargin = 4 |
const double | kMinColumnWidth = 2.0 / 3 |
const double | kMinRectangularFraction = 0.125 |
const double | kMaxRectangularFraction = 0.75 |
const double | kMaxRectangularGradient = 0.1 |
const int | kMinImageFindSize = 100 |
const double | kRMSFitScaling = 8.0 |
const int | kMinColorDifference = 16 |
const int | kThinLineFraction = 20 |
Denominator of resolution makes max pixel width to allow thin lines. More... | |
const int | kMinLineLengthFraction = 4 |
Denominator of resolution makes min pixels to demand line lengths to be. More... | |
const int | kCrackSpacing = 100 |
Spacing of cracks across the page to break up tall vertical lines. More... | |
const int | kLineFindGridSize = 50 |
Grid size used by line finder. Not very critical. More... | |
const int | kMinThickLineWidth = 12 |
const int | kMaxLineResidue = 6 |
const double | kThickLengthMultiple = 0.75 |
const double | kMaxNonLineDensity = 0.25 |
const double | kMaxStaveHeight = 1.0 |
const double | kMinMusicPixelFraction = 0.75 |
const double | kStrokeWidthFractionTolerance = 0.125 |
const double | kStrokeWidthTolerance = 1.5 |
const double | kStrokeWidthFractionCJK = 0.25 |
const double | kStrokeWidthCJK = 2.0 |
const int | kCJKRadius = 2 |
const double | kCJKBrokenDistanceFraction = 0.25 |
const int | kCJKMaxComponents = 8 |
const double | kCJKAspectRatio = 1.25 |
const double | kCJKAspectRatioIncrease = 1.0625 |
const int | kMaxCJKSizeRatio = 5 |
const double | kBrokenCJKIterationFraction = 0.125 |
const double | kDiacriticXPadRatio = 7.0 |
const double | kDiacriticYPadRatio = 1.75 |
const double | kMinDiacriticSizeRatio = 1.0625 |
const double | kMaxDiacriticDistanceRatio = 1.25 |
const double | kMaxDiacriticGapToBaseCharHeight = 1.0 |
const int | kLineTrapLongest = 4 |
const int | kLineTrapShortest = 2 |
const int | kMostlyOneDirRatio = 3 |
const double | kLineResidueAspectRatio = 8.0 |
const int | kLineResiduePadRatio = 3 |
const double | kLineResidueSizeRatio = 1.75 |
const float | kSizeRatioToReject = 2.0 |
const double | kNeighbourSearchFactor = 2.5 |
const double | kNoiseOverlapGrowthFactor = 4.0 |
const double | kNoiseOverlapAreaFactor = 1.0 / 512 |
const int | kTabRadiusFactor = 5 |
const int | kMinVerticalSearch = 3 |
const int | kMaxVerticalSearch = 12 |
const int | kMaxRaggedSearch = 25 |
const int | kMinLinesInColumn = 10 |
const double | kMinFractionalLinesInColumn = 0.125 |
const double | kMaxGutterWidthAbsolute = 2.00 |
const int | kRaggedGutterMultiple = 5 |
const double | kLineFragmentAspectRatio = 10.0 |
const int | kMinEvaluatedTabs = 3 |
const double | kCosMaxSkewAngle = 0.866025 |
const int | kColumnWidthFactor = 20 |
const int | kMaxVerticalSpacing = 500 |
const int | kMaxBlobWidth = 500 |
const double | kSplitPartitionSize = 2.0 |
const double | kAllowTextHeight = 0.5 |
const double | kAllowTextWidth = 0.6 |
const double | kAllowTextArea = 0.8 |
const double | kAllowBlobHeight = 0.3 |
const double | kAllowBlobWidth = 0.4 |
const double | kAllowBlobArea = 0.05 |
const int | kMinBoxesInTextPartition = 10 |
const int | kMaxBoxesInDataPartition = 20 |
const double | kMaxGapInTextPartition = 4.0 |
const double | kMinMaxGapInTextPartition = 0.5 |
const double | kMaxBlobOverlapFactor = 4.0 |
const double | kMaxTableCellXheight = 2.0 |
const int | kMaxColumnHeaderDistance = 4 |
const double | kTableColumnThreshold = 3.0 |
const int | kRulingVerticalMargin = 3 |
const double | kMinOverlapWithTable = 0.6 |
const int | kSideSpaceMargin = 10 |
const double | kSmallTableProjectionThreshold = 0.35 |
const double | kLargeTableProjectionThreshold = 0.45 |
const int | kLargeTableRowCount = 6 |
const int | kMinRowsInTable = 3 |
const int | kAdjacentLeaderSearchPadding = 2 |
const double | kParagraphEndingPreviousLineRatio = 1.3 |
const double | kMaxParagraphEndingLeftSpaceMultiple = 3.0 |
const double | kMinParagraphEndingTextToWhitespaceRatio = 3.0 |
const double | kMaxXProjectionGapFactor = 2.0 |
const double | kStrokeWidthFractionalTolerance = 0.25 |
const double | kStrokeWidthConstantTolerance = 2.0 |
const double | kHorizontalSpacing = 0.30 |
const double | kVerticalSpacing = -0.2 |
const int | kCellSplitRowThreshold = 0 |
const int | kCellSplitColumnThreshold = 0 |
const int | kLinedTableMinVerticalLines = 3 |
const int | kLinedTableMinHorizontalLines = 3 |
const double | kRequiredColumns = 0.7 |
const double | kMarginFactor = 1.1 |
const double | kMaxRowSize = 2.5 |
const double | kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 } |
const int | kGoodRowNumberOfColumnsSmallSize |
const double | kGoodRowNumberOfColumnsLarge = 0.7 |
const double | kMinFilledArea = 0.35 |
const int | kGutterMultiple = 4 |
const int | kGutterToNeighbourRatio = 3 |
const int | kSimilarVectorDist = 10 |
const int | kSimilarRaggedDist = 50 |
const int | kMaxFillinMultiple = 11 |
const double | kMinGutterFraction = 0.5 |
const double | kLineCountReciprocal = 4.0 |
const double | kMinAlignedGutter = 0.25 |
const double | kMinRaggedGutter = 1.5 |
const char * | kAlignmentNames [] |
const int | kMaxLineLength = 1024 |
const float | kRotationRange = 0.02f |
const int | kExposureFactor = 16 |
const int | kSaltnPepper = 5 |
const int | kMinRampSize = 1000 |
const int | kMinLigature = 0xfb00 |
const int | kMaxLigature = 0xfb17 |
const int | kDefaultResolution = 300 |
static const int | kDefaultOutputResolution = 300 |
static const char * | kWordJoinerUTF8 = "\u2060" |
static const char *const | XHeightConsistencyEnumName [] |
static const char *const | LMPainPointsTypeName [] |
static const float | kScoreScaleFactor = 100.0f |
static const float | kMinFinalCost = 0.001f |
static const float | kMaxFinalCost = 100.0f |
The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:
* <UTF8 str> <left> <bottom> <right> <top> <page id> *
and for word/line-level boxes:
* WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str> *
NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.
<page id>=""> is 0-based, and the page number is used for multipage input (tiff).
In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)
In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.
* word -> #w o r d * multi word line -> #m u l t i w o r d l i n e *
The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.
In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.
Make a word from the selected blobs and run Tess on them.
page_res | recognise blobs |
selection_box | within this box |
fp_eval_word_spacing() Evaluation function for fixed pitch word lists.
Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars
build_menu()
Construct the menu tree used by the command window
process_cmd_win_event()
Process a command returned from the command window (Just call the appropriate command handler)
word_blank_and_set_display() Word processor
Blank display of word then redisplay word according to current display mode settings
using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> |
using tesseract::char32 = typedef signed int |
typedef TessResultCallback2<bool, const GenericVector<char>&, LSTMTrainer*>* tesseract::CheckPointReader |
typedef TessResultCallback3<bool, SerializeAmount, const LSTMTrainer*, GenericVector<char>*>* tesseract::CheckPointWriter |
using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> |
using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT> |
using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> |
using tesseract::DawgVector = typedef GenericVector<Dawg *> |
typedef int(Dict::* tesseract::DictFunc) (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const |
typedef bool(* tesseract::FileReader)(const STRING &filename, GenericVector< char > *data) |
typedef bool(* tesseract::FileWriter)(const GenericVector< char > &data, const STRING &filename) |
typedef void(Wordrec::* tesseract::FillLatticeFunc) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
using tesseract::IntKDPair = typedef KDPairInc<int, int> |
using tesseract::LanguageModelFlagsType = typedef unsigned char |
Used for expressing various language model flags.
using tesseract::LigHash = typedef std::unordered_map<std::string, std::string, StringHash> |
using tesseract::NodeChildVector = typedef GenericVector<NodeChild> |
using tesseract::PainPointHeap = typedef GenericHeap<MatrixCoordPair> |
typedef float(Dict::* tesseract::ParamsModelClassifyFunc) (const char *lang, void *path) |
using tesseract::ParamsTrainingHypothesisList = typedef GenericVector<ParamsTrainingHypothesis> |
using tesseract::PartSetVector = typedef GenericVector<ColPartitionSet*> |
typedef double(Dict::* tesseract::ProbabilityInContextFunc) (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
using tesseract::RecodeHeap = typedef GenericHeap<RecodePair> |
using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode> |
using tesseract::RSCounts = typedef std::unordered_map<int, int> |
using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >> |
using tesseract::SetOfModels = typedef GenericVectorEqEq<const ParagraphModel *> |
using tesseract::ShapeQueue = typedef GenericHeap<ShapeQueueEntry> |
using tesseract::SuccessorList = typedef GenericVector<int> |
using tesseract::SuccessorListsVector = typedef GenericVector<SuccessorList *> |
typedef TessResultCallback4<STRING, int, const double*, const TessdataManager&, int>* tesseract::TestCallback |
typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *> tesseract::TruthCallback |
using tesseract::UnicharAmbigsVector = typedef GenericVector<AmbigSpec_LIST *> |
using tesseract::UnicharIdVector = typedef GenericVector<UNICHAR_ID> |
using tesseract::WidthCallback = typedef TessResultCallback1<bool, int> |
using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> |
typedef void(Tesseract::* tesseract::WordRecognizer) (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words) |
using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> |
enum tesseract::AmbigType |
enum tesseract::DawgType |
|
strong |
|
strong |
enum tesseract::LineType |
enum tesseract::LossType |
When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.
ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.
Enumerator | |
---|---|
OEM_TESSERACT_ONLY | |
OEM_LSTM_ONLY | |
OEM_TESSERACT_LSTM_COMBINED | |
OEM_DEFAULT | |
OEM_COUNT |
|
strong |
+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto
If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.
In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).
The values of this enum match the convention of Tesseract's osdetect.h
Enumerator | |
---|---|
ORIENTATION_PAGE_UP | |
ORIENTATION_PAGE_RIGHT | |
ORIENTATION_PAGE_DOWN | |
ORIENTATION_PAGE_LEFT |
enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.
Enumerator | |
---|---|
RIL_BLOCK | |
RIL_PARA | |
RIL_TEXTLINE | |
RIL_WORD | |
RIL_SYMBOL |
Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.
JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.
NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.
Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.
JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.
JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.
JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.
Enumerator | |
---|---|
JUSTIFICATION_UNKNOWN | |
JUSTIFICATION_LEFT | |
JUSTIFICATION_CENTER | |
JUSTIFICATION_RIGHT |
enum tesseract::ScriptPos |
The text lines are read in the given sequence.
In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.
Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
Enumerator | |
---|---|
TEXTLINE_ORDER_LEFT_TO_RIGHT | |
TEXTLINE_ORDER_RIGHT_TO_LEFT | |
TEXTLINE_ORDER_TOP_TO_BOTTOM |
enum tesseract::TopNState |
|
strong |
|
strong |
The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".
For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.
Enumerator | |
---|---|
WRITING_DIRECTION_LEFT_TO_RIGHT | |
WRITING_DIRECTION_RIGHT_TO_LEFT | |
WRITING_DIRECTION_TOP_TO_BOTTOM |
|
static |
|
static |
|
inline |
|
static |
|
static |
|
static |
|
static |
Fits a line to the baseline at the given level, and appends its coefficients to the hOCR string. NOTE: The hOCR spec is unclear on how to specify baseline coefficients for rotated textlines. For this reason, on textlines that are not upright, this method currently only inserts a 'textangle' property to indicate the rotation direction and does not add any baseline information to the hocr string.
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
void tesseract::AmbigSpec_zapper | ( | ELIST_LINK * | link | ) |
bool tesseract::AsciiLikelyListItem | ( | const STRING & | word | ) |
void tesseract::assign_blobs_to_blocks2 | ( | Pix * | pix, |
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | port_blocks | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
TrainingSample * tesseract::BlobToTrainingSample | ( | const TBLOB & | blob, |
bool | nonlinear_norm, | ||
INT_FX_RESULT_STRUCT * | fx_info, | ||
GenericVector< INT_FEATURE_STRUCT > * | bl_features | ||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_show_color_fit | , |
false | , | ||
"Show stroke widths" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_only_strokewidths | , |
false | , | ||
"Only run stroke widths" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_show_initial_partitions | , |
false | , | ||
"Show partition bounds" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_show_reject_blobs | , |
false | , | ||
"Show blobs rejected as noise" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_show_initialtabs | , |
false | , | ||
"Show tab candidates" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_show_columns | , |
false | , | ||
"Show column bounds" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_show_finaltabs | , |
false | , | ||
"Show tab vectors" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_show_blocks | , |
false | , | ||
"Show final block bounds" | |||
) |
tesseract::BOOL_VAR | ( | textord_tabfind_find_tables | , |
true | , | ||
"run table detection" | |||
) |
tesseract::BOOL_VAR | ( | textord_show_tables | , |
false | , | ||
"Show table regions" | |||
) |
tesseract::BOOL_VAR | ( | textord_tablefind_show_mark | , |
false | , | ||
"Debug table marking steps in detail" | |||
) |
tesseract::BOOL_VAR | ( | textord_tablefind_show_stats | , |
false | , | ||
"Show page stats used in table finding" | |||
) |
tesseract::BOOL_VAR | ( | textord_tablefind_recognize_tables | , |
false | , | ||
"Enables the table recognizer for table layout and filtering." | |||
) |
tesseract::BOOL_VAR_H | ( | textord_tabfind_find_tables | , |
false | , | ||
"run table detection" | |||
) |
|
static |
Helper to compute the dispute resolution metric. Disputed blob resolution. The aim is to give the blob to the most appropriate boxfile box. Most of the time it is obvious, but if two boxfile boxes overlap significantly it is not. If a small boxfile box takes most of the blob, and a large boxfile box does too, then we want the small boxfile box to get it, but if the small box is much smaller than the blob, we don't want it to get it. Details of the disputed blob resolution: Given a box with area A, and a blob with area B, with overlap area C, then the miss metric is (A-C)(B-C)/(AB) and the box with minimum miss metric gets the blob.
|
static |
|
static |
|
static |
|
static |
|
static |
void tesseract::CanonicalizeDetectionResults | ( | GenericVector< PARA *> * | row_owners, |
PARA_LIST * | paragraphs | ||
) |
|
inlinestatic |
|
static |
|
inlinestatic |
|
static |
void tesseract::ClearFeatureSpaceWindow | ( | NORM_METHOD | norm_method, |
ScrollView * | window | ||
) |
Clears the given window and draws the featurespace guides for the appropriate normalization method.
|
static |
|
static |
|
inline |
|
static |
bool tesseract::cmp_eq | ( | T const & | t1, |
T const & | t2 | ||
) |
|
static |
|
inline |
|
static |
|
static |
|
static |
int tesseract::CombineLangModel | ( | const UNICHARSET & | unicharset, |
const std::string & | script_dir, | ||
const std::string & | version_str, | ||
const std::string & | output_dir, | ||
const std::string & | lang, | ||
bool | pass_through_recoder, | ||
const GenericVector< STRING > & | words, | ||
const GenericVector< STRING > & | puncs, | ||
const GenericVector< STRING > & | numbers, | ||
bool | lang_is_rtl, | ||
FileReader | reader, | ||
FileWriter | writer | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inline |
|
static |
|
static |
|
static |
|
noexcept |
|
static |
bool tesseract::CrownCompatible | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | a, | ||
int | b, | ||
const ParagraphModel * | model | ||
) |
|
static |
|
static |
struct Pix * tesseract::DegradeImage | ( | Pix * | input, |
int | exposure, | ||
TRand * | randomizer, | ||
float * | rotation | ||
) |
void tesseract::DeleteObject | ( | T * | object | ) |
|
static |
|
static |
bool tesseract::DeSerialize | ( | FILE * | fp, |
char * | data, | ||
size_t | n | ||
) |
bool tesseract::DeSerialize | ( | FILE * | fp, |
float * | data, | ||
size_t | n | ||
) |
bool tesseract::DeSerialize | ( | FILE * | fp, |
int8_t * | data, | ||
size_t | n | ||
) |
bool tesseract::DeSerialize | ( | FILE * | fp, |
int16_t * | data, | ||
size_t | n | ||
) |
bool tesseract::DeSerialize | ( | FILE * | fp, |
int32_t * | data, | ||
size_t | n | ||
) |
bool tesseract::DeSerialize | ( | FILE * | fp, |
uint8_t * | data, | ||
size_t | n | ||
) |
bool tesseract::DeSerialize | ( | FILE * | fp, |
uint16_t * | data, | ||
size_t | n | ||
) |
bool tesseract::DeSerialize | ( | FILE * | fp, |
uint32_t * | data, | ||
size_t | n | ||
) |
void tesseract::DetectParagraphs | ( | int | debug_level, |
GenericVector< RowInfo > * | row_infos, | ||
GenericVector< PARA *> * | row_owners, | ||
PARA_LIST * | paragraphs, | ||
GenericVector< ParagraphModel *> * | models | ||
) |
void tesseract::DetectParagraphs | ( | int | debug_level, |
bool | after_text_recognition, | ||
const MutableIterator * | block_start, | ||
GenericVector< ParagraphModel *> * | models | ||
) |
|
static |
|
static |
|
static |
|
static |
double tesseract::DotProductAVX | ( | const double * | u, |
const double * | v, | ||
int | n | ||
) |
double tesseract::DotProductSSE | ( | const double * | u, |
const double * | v, | ||
int | n | ||
) |
tesseract::double_VAR | ( | textord_tabvector_vertical_gap_fraction | , |
0. | 5, | ||
"max fraction of mean blob width allowed for vertical gaps in vertical text" | |||
) |
tesseract::double_VAR | ( | textord_tabvector_vertical_box_ratio | , |
0. | 5, | ||
"Fraction of box matches required to declare a line vertical" | |||
) |
tesseract::double_VAR_H | ( | textord_tabvector_vertical_gap_fraction | , |
0. | 5, | ||
"Max fraction of mean blob width allowed for vertical gaps in vertical text" | |||
) |
tesseract::double_VAR_H | ( | textord_tabvector_vertical_box_ratio | , |
0. | 5, | ||
"Fraction of box matches required to declare a line vertical" | |||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.
|
static |
|
static |
|
static |
|
static |
|
static |
find_modal_font
Find the modal font and remove from the stats.
|
static |
bool tesseract::FirstWordWouldHaveFit | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after, | ||
tesseract::ParagraphJustification | justification | ||
) |
bool tesseract::FirstWordWouldHaveFit | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after | ||
) |
void tesseract::FontInfoDeleteCallback | ( | FontInfo | f | ) |
void tesseract::FontSetDeleteCallback | ( | FontSet | fs | ) |
|
inline |
|
inline |
|
static |
void tesseract::GeneratePerspectiveDistortion | ( | int | width, |
int | height, | ||
TRand * | randomizer, | ||
Pix ** | pix, | ||
GenericVector< TBOX > * | boxes | ||
) |
|
static |
|
static |
|
static |
Gets the block orientation at the current iterator position.
|
static |
std::string tesseract::GetXheightString | ( | const std::string & | script_dir, |
const UNICHARSET & | unicharset | ||
) |
|
static |
|
static |
Helper returns true if the given choice has a better case variant before it in the choice_list that is not distinguishable by size.
void tesseract::HistogramRect | ( | Pix * | src_pix, |
int | channel, | ||
int | left, | ||
int | top, | ||
int | width, | ||
int | height, | ||
int * | histogram | ||
) |
|
static |
STRING tesseract::HOcrEscape | ( | const char * | text | ) |
Escape a char string - remove <>&"' with HTML codes.
Escape a char string - remove &<>"' with HTML codes.
|
static |
|
static |
|
static |
|
static |
tesseract::INT_VAR | ( | textord_tabfind_show_strokewidths | , |
0 | , | ||
"Show stroke widths" | |||
) |
tesseract::INT_VAR | ( | textord_tabfind_show_partitions | , |
0 | , | ||
"Show partition | bounds, | ||
waiting | if, | ||
1" | |||
) |
int32_t tesseract::IntDotProductSSE | ( | const int8_t * | u, |
const int8_t * | v, | ||
int | n | ||
) |
|
static |
|
static |
int tesseract::InterwordSpace | ( | const GenericVector< RowScratchRegisters > & | rows, |
int | row_start, | ||
int | row_end | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
bool tesseract::IsInterchangeValid | ( | const char32 | ch | ) |
bool tesseract::IsInterchangeValid7BitAscii | ( | const char32 | ch | ) |
|
static |
|
inline |
|
static |
|
inline |
|
static |
|
static |
|
inline |
|
static |
|
static |
|
static |
|
static |
bool tesseract::IsUTF8Whitespace | ( | const char * | text | ) |
bool tesseract::IsValidCodepoint | ( | const char32 | ch | ) |
bool tesseract::IsWhitespace | ( | const char32 | ch | ) |
|
static |
|
static |
void tesseract::LeftWordAttributes | ( | const UNICHARSET * | unicharset, |
const WERD_CHOICE * | werd, | ||
const STRING & | utf8, | ||
bool * | is_list, | ||
bool * | starts_idea, | ||
bool * | ends_idea | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inline |
|
inline |
|
inline |
ShapeTable * tesseract::LoadShapeTable | ( | const STRING & | file_prefix | ) |
MasterTrainer * tesseract::LoadTrainingData | ( | int | argc, |
const char *const * | argv, | ||
bool | replication, | ||
ShapeTable ** | shape_table, | ||
STRING * | file_prefix | ||
) |
Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:
|
inline |
|
static |
|
static |
|
static |
Return a TBLOB * from the whole pix. To be freed later with delete.
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inline |
|
inline |
|
static |
|
static |
|
static |
|
static |
bool tesseract::NormalizeCleanAndSegmentUTF8 | ( | UnicodeNormMode | u_mode, |
OCRNorm | ocr_normalize, | ||
GraphemeNormMode | g_mode, | ||
bool | report_errors, | ||
const char * | str8, | ||
std::vector< std::string > * | graphemes | ||
) |
|
static |
bool tesseract::NormalizeUTF8String | ( | UnicodeNormMode | u_mode, |
OCRNorm | ocr_normalize, | ||
GraphemeNorm | grapheme_normalize, | ||
const char * | str8, | ||
std::string * | normalized | ||
) |
|
static |
|
static |
|
static |
int tesseract::OtsuStats | ( | const int * | histogram, |
int * | H_out, | ||
int * | omega0_out | ||
) |
int tesseract::OtsuThreshold | ( | Pix * | src_pix, |
int | left, | ||
int | top, | ||
int | width, | ||
int | height, | ||
int ** | thresholds, | ||
int ** | hi_values | ||
) |
|
static |
int tesseract::ParamsTrainingFeatureByName | ( | const char * | name | ) |
void tesseract::ParseCommandLineFlags | ( | const char * | usage, |
int * | argc, | ||
char *** | argv, | ||
const bool | remove_flags | ||
) |
|
static |
Pix * tesseract::PrepareDistortedPix | ( | const Pix * | pix, |
bool | perspective, | ||
bool | invert, | ||
bool | white_noise, | ||
bool | smooth_noise, | ||
bool | blur, | ||
int | box_reduction, | ||
TRand * | randomizer, | ||
GenericVector< TBOX > * | boxes | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
int tesseract::ProjectiveCoeffs | ( | int | width, |
int | height, | ||
TRand * | randomizer, | ||
float ** | im_coeffs, | ||
float ** | box_coeffs | ||
) |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.
|
inline |
|
inline |
|
static |
|
static |
|
static |
|
static |
STRING tesseract::ReadFile | ( | const std::string & | filename, |
FileReader | reader | ||
) |
void* tesseract::ReCachePagesFunc | ( | void * | data | ) |
void tesseract::RecomputeMarginsAndClearHypotheses | ( | GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
int | percentile | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
void tesseract::RightWordAttributes | ( | const UNICHARSET * | unicharset, |
const WERD_CHOICE * | werd, | ||
const STRING & | utf8, | ||
bool * | is_list, | ||
bool * | starts_idea, | ||
bool * | ends_idea | ||
) |
|
static |
|
static |
bool tesseract::RowsFitModel | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
const ParagraphModel * | model | ||
) |
|
static |
|
static |
|
inline |
|
static |
|
static |
Helper scans the collection of predecessors for competing siblings that have the same letter with the opposite case, setting competing_vse.
const char * tesseract::ScriptPosToString | ( | enum ScriptPos | script_pos | ) |
|
static |
|
static |
bool tesseract::Serialize | ( | FILE * | fp, |
const char * | data, | ||
size_t | n | ||
) |
bool tesseract::Serialize | ( | FILE * | fp, |
const float * | data, | ||
size_t | n | ||
) |
bool tesseract::Serialize | ( | FILE * | fp, |
const int8_t * | data, | ||
size_t | n | ||
) |
bool tesseract::Serialize | ( | FILE * | fp, |
const int16_t * | data, | ||
size_t | n | ||
) |
bool tesseract::Serialize | ( | FILE * | fp, |
const int32_t * | data, | ||
size_t | n | ||
) |
bool tesseract::Serialize | ( | FILE * | fp, |
const uint8_t * | data, | ||
size_t | n | ||
) |
bool tesseract::Serialize | ( | FILE * | fp, |
const uint16_t * | data, | ||
size_t | n | ||
) |
bool tesseract::Serialize | ( | FILE * | fp, |
const uint32_t * | data, | ||
size_t | n | ||
) |
void tesseract::SetBlobStrokeWidth | ( | Pix * | pix, |
BLOBNBOX * | blob | ||
) |
|
static |
|
static |
|
static |
void tesseract::SetPropertiesForInputFile | ( | const std::string & | script_dir, |
const std::string & | input_unicharset_file, | ||
const std::string & | output_unicharset_file, | ||
const std::string & | output_xheights_file | ||
) |
void tesseract::SetScriptProperties | ( | const std::string & | script_dir, |
UNICHARSET * | unicharset | ||
) |
|
static |
|
inline |
void tesseract::SetupBasicProperties | ( | bool | report_errors, |
bool | decompose, | ||
UNICHARSET * | unicharset | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inline |
int tesseract::sort_cmp | ( | const void * | t1, |
const void * | t2 | ||
) |
int tesseract::sort_ptr_cmp | ( | const void * | t1, |
const void * | t2 | ||
) |
|
static |
int tesseract::SortByBoxBottom | ( | const void * | void1, |
const void * | void2 | ||
) |
int tesseract::SortByBoxLeft | ( | const void * | void1, |
const void * | void2 | ||
) |
int tesseract::SortByRating | ( | const void * | void1, |
const void * | void2 | ||
) |
int tesseract::SortByUnicharID | ( | const void * | void1, |
const void * | void2 | ||
) |
|
static |
|
static |
|
static |
int tesseract::SortRightToLeft | ( | const void * | void1, |
const void * | void2 | ||
) |
unsigned int tesseract::SpanUTF8NotWhitespace | ( | const char * | text | ) |
unsigned int tesseract::SpanUTF8Whitespace | ( | const char * | text | ) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inline |
|
static |
|
inline |
|
inline |
|
static |
|
static |
|
static |
Pix * tesseract::TraceBlockOnReducedPix | ( | BLOCK * | block, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
Pix * tesseract::TraceOutlineOnReducedPix | ( | C_OUTLINE * | outline, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
|
static |
int tesseract::UnicodeFor | ( | const UNICHARSET * | u, |
const WERD_CHOICE * | werd, | ||
int | pos | ||
) |
|
static |
|
static |
|
static |
|
static |
bool tesseract::ValidBodyLine | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | row, | ||
const ParagraphModel * | model | ||
) |
bool tesseract::ValidFirstLine | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | row, | ||
const ParagraphModel * | model | ||
) |
|
static |
|
static |
|
static |
bool tesseract::write_info | ( | FILE * | f, |
const FontInfo & | fi | ||
) |
bool tesseract::write_set | ( | FILE * | f, |
const FontSet & | fs | ||
) |
bool tesseract::write_spacing_info | ( | FILE * | f, |
const FontInfo & | fi | ||
) |
|
static |
|
static |
bool tesseract::WriteFile | ( | const std::string & | output_dir, |
const std::string & | lang, | ||
const std::string & | suffix, | ||
const GenericVector< char > & | data, | ||
FileWriter | writer | ||
) |
bool tesseract::WriteRecoder | ( | const UNICHARSET & | unicharset, |
bool | pass_through, | ||
const std::string & | output_dir, | ||
const std::string & | lang, | ||
FileWriter | writer, | ||
STRING * | radical_table_data, | ||
TessdataManager * | traineddata | ||
) |
void tesseract::WriteShapeTable | ( | const STRING & | file_prefix, |
const ShapeTable & | shape_table | ||
) |
bool tesseract::WriteUnicharset | ( | const UNICHARSET & | unicharset, |
const std::string & | output_dir, | ||
const std::string & | lang, | ||
FileWriter | writer, | ||
TessdataManager * | traineddata | ||
) |
|
static |
Given a recognized blob, see if a contiguous collection of sub-pieces (chopped blobs) starting at its left might qualify as being a subscript or superscript letter based only on y position. Also do this for the right side.
|
inline |
const int tesseract::case_state_table[6][4] |
const int tesseract::kAdamCorrectionIterations = 200000 |
const double tesseract::kAdamEpsilon = 1e-8 |
const int tesseract::kAdamFlag = 4 |
const int tesseract::kAdjacentLeaderSearchPadding = 2 |
const double tesseract::kAlignedFraction = 0.03125 |
const double tesseract::kAlignedGapFraction = 0.75 |
const char* tesseract::kAlignmentNames[] |
const double tesseract::kAllowBlobArea = 0.05 |
const double tesseract::kAllowBlobHeight = 0.3 |
const double tesseract::kAllowBlobWidth = 0.4 |
const double tesseract::kAllowTextArea = 0.8 |
const double tesseract::kAllowTextHeight = 0.5 |
const double tesseract::kAllowTextWidth = 0.6 |
|
static |
|
static |
const char * tesseract::kApostropheLikeUTF8 |
|
static |
|
static |
const double tesseract::kBestCheckpointFraction = 31.0 / 32.0 |
const double tesseract::kBigPartSizeRatio = 1.75 |
|
static |
const int tesseract::kBoxClipTolerance = 2 |
const double tesseract::kBrokenCJKIterationFraction = 0.125 |
const int tesseract::kBytesPer64BitNumber = 20 |
Max bytes in the decimal representation of int64_t.
const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1 |
Multiplier for max expected textlength assumes (kBytesPerNumber + space)
const int tesseract::kBytesPerNumber = 5 |
The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.
const int tesseract::kCellSplitColumnThreshold = 0 |
const int tesseract::kCellSplitRowThreshold = 0 |
const float tesseract::kCertaintyScale = 7.0f |
const double tesseract::kCertOffset = -0.085 |
|
static |
const double tesseract::kCJKAspectRatio = 1.25 |
const double tesseract::kCJKAspectRatioIncrease = 1.0625 |
const double tesseract::kCJKBrokenDistanceFraction = 0.25 |
const int tesseract::kCJKMaxComponents = 8 |
const int tesseract::kCJKRadius = 2 |
const int tesseract::kColumnWidthFactor = 20 |
Pixel resolution of column width estimates.
const double tesseract::kCosMaxSkewAngle = 0.866025 |
const int tesseract::kCrackSpacing = 100 |
Spacing of cracks across the page to break up tall vertical lines.
const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F) |
const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F) |
|
static |
|
static |
const int tesseract::kDefaultResolution = 300 |
const double tesseract::kDiacriticXPadRatio = 7.0 |
const double tesseract::kDiacriticYPadRatio = 1.75 |
|
static |
const double tesseract::kDictRatio = 2.25 |
|
static |
|
static |
const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE" |
const int tesseract::kDoubleFlag = 128 |
const double tesseract::kErrClip = 1.0f |
const int tesseract::kErrorGraphInterval = 1000 |
const int tesseract::kExposureFactor = 16 |
const int tesseract::kFeaturePadding = 2 |
const float tesseract::kFontMergeDistance = 0.025 |
const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE" |
const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7 |
const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 } |
const int tesseract::kGoodRowNumberOfColumnsSmallSize |
const int tesseract::kGutterMultiple = 4 |
const int tesseract::kGutterToNeighbourRatio = 3 |
const double tesseract::kHighConfidence = 0.9375 |
const int tesseract::kHistogramBuckets = 16 |
const int tesseract::kHistogramSize = 256 |
const double tesseract::kHorizontalGapMergeFraction = 0.5 |
const double tesseract::kHorizontalSpacing = 0.30 |
const int tesseract::kHorzStrongTextlineAspect = 5 |
const int tesseract::kHorzStrongTextlineCount = 8 |
const int tesseract::kHorzStrongTextlineHeight = 10 |
const char * tesseract::kHyphenLikeUTF8 |
The following are confusable internal word punctuation symbols which we normalize to the first variant when matching in dawgs.
|
static |
|
static |
|
static |
const int tesseract::kImagePadding = 4 |
const double tesseract::kImprovementFraction = 15.0 / 16.0 |
const float tesseract::kInfiniteDist = 999.0f |
const char* tesseract::kInputFile = "noname.tif" |
Filename used for input image file, from which to derive a name to search for a possible UNLV zone file, if none is specified by SetInputName.
const int tesseract::kInt8Flag = 1 |
const double tesseract::kLargeTableProjectionThreshold = 0.45 |
const int tesseract::kLargeTableRowCount = 6 |
const int tesseract::kLatinChs[] |
Latin chars corresponding to the unicode chars above.
const double tesseract::kLearningRateDecay = sqrt(0.5) |
const int tesseract::kLeftIndentAlignmentCountTh = 1 |
const double tesseract::kLineCountReciprocal = 4.0 |
const int tesseract::kLinedTableMinHorizontalLines = 3 |
const int tesseract::kLinedTableMinVerticalLines = 3 |
const int tesseract::kLineFindGridSize = 50 |
Grid size used by line finder. Not very critical.
const double tesseract::kLineFragmentAspectRatio = 10.0 |
const double tesseract::kLineResidueAspectRatio = 8.0 |
const int tesseract::kLineResiduePadRatio = 3 |
const double tesseract::kLineResidueSizeRatio = 1.75 |
const int tesseract::kLineTrapLongest = 4 |
const int tesseract::kLineTrapShortest = 2 |
const char * tesseract::kLRM = "\u200E" |
const double tesseract::kMarginFactor = 1.1 |
const double tesseract::kMarginOverlapFraction = 0.25 |
const float tesseract::kMathDigitDensityTh1 = 0.25 |
const float tesseract::kMathDigitDensityTh2 = 0.1 |
const float tesseract::kMathItalicDensityTh = 0.5 |
const int tesseract::kMaxAmbigStringSize = 30 * ( 10 + 1) |
const double tesseract::kMaxBaselineError = 0.4375 |
const double tesseract::kMaxBlobOverlapFactor = 4.0 |
const int tesseract::kMaxBlobWidth = 500 |
const int16_t tesseract::kMaxBoxEdgeDiff = 2 |
const int tesseract::kMaxBoxesInDataPartition = 20 |
|
static |
const int tesseract::kMaxBytesPerLine |
A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.
const int tesseract::kMaxCaptionLines = 7 |
const int tesseract::kMaxCharTopRange = 48 |
const int tesseract::kMaxCircleErosions = 8 |
const int tesseract::kMaxCJKSizeRatio = 5 |
const int tesseract::kMaxColorDistance = 900 |
const int tesseract::kMaxColumnHeaderDistance = 4 |
const double tesseract::kMaxDiacriticDistanceRatio = 1.25 |
const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0 |
const double tesseract::kMaxDistToPartSizeRatio = 1.5 |
const int tesseract::kMaxFillinMultiple = 11 |
|
static |
const double tesseract::kMaxGapInTextPartition = 4.0 |
const double tesseract::kMaxGutterWidthAbsolute = 2.00 |
const int tesseract::kMaxIncompatibleColumnCount = 2 |
const int tesseract::kMaxInputHeight = 48 |
const int tesseract::kMaxIntSize = 22 |
Max string length of an int.
const int tesseract::kMaxLargeOverlapsWithMedium = 12 |
const int tesseract::kMaxLargeOverlapsWithSmall = 3 |
const double tesseract::kMaxLeaderGapFractionOfMax = 0.25 |
const double tesseract::kMaxLeaderGapFractionOfMin = 0.5 |
const int tesseract::kMaxLigature = 0xfb17 |
const int tesseract::kMaxLineLength = 1024 |
const int tesseract::kMaxLineResidue = 6 |
const int tesseract::kMaxMediumOverlapsWithSmall = 12 |
|
static |
const int tesseract::kMaxNeighbourDistFactor = 4 |
const double tesseract::kMaxNonLineDensity = 0.25 |
|
static |
TessdataType could be updated to contain more entries, however we do not expect that number to be astronomically high. In order to automatically detect endianness TessdataManager will flip the bits if actual_tessdata_num_entries_ is larger than kMaxNumTessdataEntries.
const int tesseract::kMaxOffsetDist = 32 |
const int tesseract::kMaxPadFactor = 6 |
const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0 |
const double tesseract::kMaxPartitionSpacing = 1.75 |
const int tesseract::kMaxRaggedSearch = 25 |
const int tesseract::kMaxRealDistance = 2.0 |
const double tesseract::kMaxRectangularFraction = 0.75 |
const double tesseract::kMaxRectangularGradient = 0.1 |
const int tesseract::kMaxRMSColorNoise = 128 |
const double tesseract::kMaxRowSize = 2.5 |
const double tesseract::kMaxSameBlockLineSpacing = 3 |
const double tesseract::kMaxSizeRatio = 1.5 |
const int tesseract::kMaxSkewFactor = 15 |
const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32 |
|
static |
const double tesseract::kMaxSpacingDrift = 1.0 / 72 |
const double tesseract::kMaxStaveHeight = 1.0 |
const double tesseract::kMaxTableCellXheight = 2.0 |
const double tesseract::kMaxTopSpacingFraction = 0.25 |
const int tesseract::kMaxUnicharsPerCluster = 2000 |
const int tesseract::kMaxVerticalSearch = 12 |
const int tesseract::kMaxVerticalSpacing = 500 |
const int tesseract::kMaxWinSize = 2000 |
const double tesseract::kMaxXProjectionGapFactor = 2.0 |
|
static |
|
static |
const double tesseract::kMinAlignedGutter = 0.25 |
const int tesseract::kMinAlignedTabs = 4 |
const double tesseract::kMinBaselineCoverage = 0.5 |
const int tesseract::kMinBoxesInTextPartition = 10 |
const double tesseract::kMinCaptionGapHeightRatio = 0.5 |
const double tesseract::kMinCaptionGapRatio = 2.0 |
const float tesseract::kMinCertainty = -20.0f |
const int tesseract::kMinChainTextValue = 3 |
const int tesseract::kMinClusteredShapes = 1 |
const int tesseract::kMinColorDifference = 16 |
const int tesseract::kMinColumnWidth = 2.0 / 3 |
const double tesseract::kMinDiacriticSizeRatio = 1.0625 |
const double tesseract::kMinDivergenceRate = 50.0 |
const int tesseract::kMinEvaluatedTabs = 3 |
const double tesseract::kMinFilledArea = 0.35 |
|
static |
const double tesseract::kMinFractionalLinesInColumn = 0.125 |
const double tesseract::kMinGoodTextPARatio = 1.5 |
const double tesseract::kMinGutterFraction = 0.5 |
const double tesseract::kMinGutterWidthGrid = 0.5 |
const int tesseract::kMinImageFindSize = 100 |
const int tesseract::kMinLeaderCount = 5 |
const int tesseract::kMinLigature = 0xfb00 |
const int tesseract::kMinLineLengthFraction = 4 |
Denominator of resolution makes min pixels to demand line lengths to be.
const int tesseract::kMinLinesInColumn = 10 |
const double tesseract::kMinMaxGapInTextPartition = 0.5 |
const double tesseract::kMinMusicPixelFraction = 0.75 |
const double tesseract::kMinOverlapWithTable = 0.6 |
const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0 |
const int tesseract::kMinPointsForErrorCount = 16 |
const float tesseract::kMinProb = exp(kMinCertainty) |
const double tesseract::kMinRaggedGutter = 1.5 |
const int tesseract::kMinRaggedTabs = 5 |
const int tesseract::kMinRampSize = 1000 |
const double tesseract::kMinRectangularFraction = 0.125 |
const int tesseract::kMinRectSize = 10 |
Minimum sensible image size to be worth running tesseract.
const int tesseract::kMinRowsInTable = 3 |
const int tesseract::kMinStallIterations = 10000 |
const int tesseract::kMinStartedErrorRate = 75 |
const int tesseract::kMinStrongTextValue = 6 |
const double tesseract::kMinTabGradient = 4.0 |
const int tesseract::kMinThickLineWidth = 12 |
const int tesseract::kMinVerticalSearch = 3 |
const int tesseract::kMinWinSize = 500 |
const int tesseract::kMostlyOneDirRatio = 3 |
const double tesseract::kNeighbourSearchFactor = 2.5 |
const char* tesseract::kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"} |
const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512 |
const double tesseract::kNoiseOverlapGrowthFactor = 4.0 |
const int tesseract::kNoisePadding = 4 |
const char* tesseract::kNullChar = "<nul>" |
const int tesseract::kNumAdjustmentIterations = 100 |
const int tesseract::kNumbersPerBlob = 5 |
The 5 numbers output for each box (the usual 4 and a page number.)
|
static |
const int tesseract::kNumEndPoints = 3 |
|
static |
const int tesseract::kNumPagesPerBatch = 100 |
const char* tesseract::kOldVarsFile = "failed_vars.txt" |
Temp file used for storing current parameters before applying retry values.
const int tesseract::kOriginalNoiseMultiple = 8 |
const double tesseract::kParagraphEndingPreviousLineRatio = 1.3 |
|
static |
const char * tesseract::kPDF = "\u202C" |
const double tesseract::kPhotoOffsetFraction = 0.375 |
const int tesseract::kPrime1 = 17 |
const int tesseract::kPrime2 = 13 |
|
static |
const int tesseract::kRadicalRadix = 29 |
const double tesseract::kRaggedFraction = 2.5 |
const double tesseract::kRaggedGapFraction = 1.0 |
const int tesseract::kRaggedGutterMultiple = 5 |
const int tesseract::kRandomizingCenter = 128 |
const double tesseract::kRatingEpsilon = 1.0 / 32 |
|
static |
const double tesseract::kRequiredColumns = 0.7 |
const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL" |
const int tesseract::kRGBRMSColors = 4 |
const char * tesseract::kRLE = "\u202A" |
const char * tesseract::kRLM = "\u200F" |
const double tesseract::kRMSFitScaling = 8.0 |
const float tesseract::kRotationRange = 0.02f |
const int tesseract::kRulingVerticalMargin = 3 |
const int tesseract::kSaltnPepper = 5 |
|
static |
|
static |
|
static |
const double tesseract::kScaleFactor = 256.0 |
|
static |
const int tesseract::kSeedBlobsCountTh = 10 |
const int tesseract::kSideSpaceMargin = 10 |
|
static |
|
static |
|
static |
const int tesseract::kSimilarRaggedDist = 50 |
const int tesseract::kSimilarVectorDist = 10 |
const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile) |
const float tesseract::kSizeRatioToReject = 2.0 |
|
static |
const double tesseract::kSmallTableProjectionThreshold = 0.35 |
const int tesseract::kSmoothDecisionMargin = 4 |
const double tesseract::kSplitPartitionSize = 2.0 |
const int tesseract::kSquareLimit = 25 |
const double tesseract::kStageTransitionThreshold = 10.0 |
const double tesseract::kStateClip = 100.0 |
const double tesseract::kStrokeWidthCJK = 2.0 |
const double tesseract::kStrokeWidthConstantTolerance = 2.0 |
const double tesseract::kStrokeWidthFractionalTolerance = 0.25 |
const double tesseract::kStrokeWidthFractionCJK = 0.25 |
const double tesseract::kStrokeWidthFractionTolerance = 0.125 |
Allowed proportional change in stroke width to be the same font.
const double tesseract::kStrokeWidthTolerance = 1.5 |
Allowed constant change in stroke width to be the same font. Really 1.5 pixels.
const double tesseract::kSubTrainerMarginFraction = 3.0 / 128 |
const double tesseract::kTableColumnThreshold = 3.0 |
const int tesseract::kTableSize = 4096 |
const int tesseract::kTabRadiusFactor = 5 |
const int tesseract::kTargetXScale = 5 |
const int tesseract::kTargetYScale = 100 |
|
static |
kTessdataFileSuffixes[i] indicates the file suffix for tessdata of type i (from TessdataType enum).
const char tesseract::kTesseractReject = '~' |
Character returned when Tesseract couldn't recognize as anything.
const int tesseract::kTestChar = -1 |
const double tesseract::kThickLengthMultiple = 0.75 |
const int tesseract::kThinLineFraction = 20 |
Denominator of resolution makes max pixel width to allow thin lines.
const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25 |
const float tesseract::kUnclearDensityTh = 0.25 |
const int tesseract::kUniChs[] |
Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.
|
static |
const char tesseract::kUniversalAmbigsFile |
const char tesseract::kUNLVReject = '~' |
Character used by UNLV error counter as a reject.
const char tesseract::kUNLVSuspect = '^' |
Character used by UNLV as a suspect marker.
const char * tesseract::kUTF8LineSeparator = "\u2028" |
const char * tesseract::kUTF8ParagraphSeparator = "\u2029" |
const double tesseract::kVerticalSpacing = -0.2 |
const int tesseract::kVLineAlignment = 3 |
const int tesseract::kVLineGutter = 1 |
const int tesseract::kVLineMinLength = 500 |
const int tesseract::kVLineSearchSize = 150 |
|
static |
|
static |
const float tesseract::kWorstDictCertainty = -25.0f |
const int tesseract::kXWinFrameSize = 30 |
const int tesseract::kYWinFrameSize = 80 |
|
static |
double tesseract::LogisticTable |
const char* const tesseract::RTLReversePolicyNames[] |
double tesseract::TanhTable |
CCUtilMutex tesseract::tprintfMutex |
|
static |