Classes
class	AlignedBlob

struct	AlignedBlobParams

class	AmbigSpec

class	AmbigSpec_IT

class	AmbigSpec_LIST

struct	AssociateStats

class	AssociateUtils

class	BaselineBlock

class	BaselineDetect

class	BaselineRow

class	BBGrid

struct	BestChoiceBundle
	Bundle together all the things pertaining to the best choice/state. More...

class	BitVector

struct	BlobData

class	BlobGrid

struct	BlockGroup

class	BoolParam

class	BoxChar

struct	BoxCharPtrSort

class	BoxWord

class	CCNonTextDetect

class	CCStruct

class	CCUtil

class	CCUtilMutex

class	ChoiceIterator

class	Classify

class	ClassPruner

struct	ClipFFunc

struct	ClipFPrime

struct	ClipGFunc

struct	ClipGPrime

struct	Cluster

class	ColPartition

class	ColPartitionGrid

class	ColPartitionSet

class	ColSegment

class	ColumnFinder

class	Convolve

class	CTC

class	CUtil

class	Dawg

struct	DawgArgs

class	DawgCache

struct	DawgLoader

struct	DawgPosition

class	DawgPositionVector

class	DebugPixa

class	DetLineFit

class	Dict

struct	DocQualCallbacks

class	DocumentCache

class	DocumentData

class	DoubleParam

class	DoublePtr

class	DPPoint

class	EquationDetect

class	EquationDetectBase

class	ErrorCounter

struct	FFunc

class	File

struct	FloatWordFeature

struct	FontInfo

class	FontInfoTable

struct	FontSet

struct	FontSpacingInfo

class	FontUtils

struct	FPrime

class	FRAGMENT

class	FullyConnected

class	GenericHeap

struct	GeometricClassifierState

struct	GFunc

struct	GPrime

class	GridBase

class	GridSearch

struct	HFunc

struct	HPrime

class	IcuErrorCode

struct	IdentityFunc

class	ImageData

class	ImageFind

class	ImageThresholder

class	IndexMap

class	IndexMapBiDi

class	Input

class	InputBuffer

struct	Interval

class	IntFeatureDist

class	IntFeatureMap

class	IntFeatureSpace

class	IntGrid

class	IntParam

class	IntSimdMatrix

class	IntSimdMatrixAVX2

class	IntSimdMatrixSSE

struct	KDPair

struct	KDPairDec

struct	KDPairInc

class	KDPtrPair

struct	KDPtrPairDec

struct	KDPtrPairInc

class	KDVector

class	LanguageModel

struct	LanguageModelDawgInfo

struct	LanguageModelNgramInfo

struct	LanguageModelState
	Struct to store information maintained by various language model components. More...

class	LigatureTable

class	LineFinder

struct	LineHypothesis

struct	LMConsistencyInfo

class	LMPainPoints

class	LSTM

class	LSTMRecognizer

class	LSTMTester

class	LSTMTrainer

class	LTRResultIterator

class	MasterTrainer

class	Maxpool

class	MutableIterator

class	Network

class	NetworkBuilder

class	NetworkIO

class	NetworkScratch

struct	NodeChild

class	ObjectCache

class	OutputBuffer

class	PageIterator

class	PangoFontInfo

class	ParagraphModelSmearer

class	ParagraphTheory

class	Parallel

class	Param

class	ParamsModel

class	ParamsTrainingBundle

struct	ParamsTrainingHypothesis

struct	ParamsVectors

class	ParamUtils

class	PixelHistogram

class	Plumbing

class	PointerVector

struct	PtrHash

class	RecodeBeamSearch

class	RecodedCharID

struct	RecodeNode

class	Reconfig

struct	Relu

struct	ReluPrime

class	ResultIterator

class	Reversed

class	RowInfo

class	RowScratchRegisters

class	SampleIterator

struct	ScoredFont

class	SegSearchPending

class	Series

class	Shape

class	ShapeClassifier

struct	ShapeDist

struct	ShapeQueueEntry

struct	ShapeRating

class	ShapeTable

class	ShiroRekhaSplitter

class	SimpleClusterer

struct	SpacingProperties

class	SquishedDawg

class	StaticShape

class	StrideMap

class	StringParam

class	StringRenderer

class	StrokeWidth

class	StructuredTable

class	TabConstraint

class	TabEventHandler

class	TabFind

class	TableFinder

class	TableRecognizer

class	TabVector

struct	TESS_CHAR

class	TessBaseAPI

class	TessBoxTextRenderer

class	TessClassifier

class	TessdataManager

class	Tesseract

struct	TesseractStats

class	TessHOcrRenderer

class	TessOsdRenderer

class	TessPDFRenderer

class	TessResultRenderer

class	TessTextRenderer

class	TessTsvRenderer

class	TessUnlvRenderer

class	TextlineProjection

class	Textord

class	TFile

class	TrainingSample

class	TrainingSampleSet

class	TRand

class	TransposedArray

class	Trie

class	UNICHAR

class	UnicharAmbigs

struct	UnicharAndFonts

class	UnicharCompress

class	UnicharIdArrayUtils

struct	UnicharRating

class	UnicodeSpanSkipper

struct	UnityFunc

class	ValidateGrapheme

class	ValidateIndic

class	ValidateJavanese

class	ValidateKhmer

class	ValidateMyanmar

class	Validator

struct	ViterbiStateEntry

class	WeightMatrix

struct	WordData

class	WordFeature

class	Wordrec

class	WordWithBox

class	WorkingPartSet

Typedefs
typedef int(Dict::*	DictFunc) (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const

typedef double(Dict::*	ProbabilityInContextFunc) (const char lang, const char context, int context_bytes, const char *character, int character_bytes)

typedef float(Dict::*	ParamsModelClassifyFunc) (const char lang, void path)

typedef void(Wordrec::*	FillLatticeFunc) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

typedef TessCallback4< const UNICHARSET &, int, PageIterator , Pix >	TruthCallback

using	SetOfModels = GenericVectorEqEq< const ParagraphModel * >

typedef void(Tesseract::*	WordRecognizer) (const WordData &word_data, WERD_RES *in_word, PointerVector< WERD_RES > out_words)

using	ParamsTrainingHypothesisList = GenericVector< ParamsTrainingHypothesis >

using	UnicharIdVector = GenericVector< UNICHAR_ID >

using	UnicharAmbigsVector = GenericVector< AmbigSpec_LIST * >

typedef bool(*	FileReader) (const STRING &filename, GenericVector< char > *data)

typedef bool(*	FileWriter) (const GenericVector< char > &data, const STRING &filename)

using	IntKDPair = KDPairInc< int, int >

using	char32 = signed int

using	RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > >>

using	RSCounts = std::unordered_map< int, int >

using	ShapeQueue = GenericHeap< ShapeQueueEntry >

using	NodeChildVector = GenericVector< NodeChild >

using	SuccessorList = GenericVector< int >

using	SuccessorListsVector = GenericVector< SuccessorList * >

using	DawgVector = GenericVector< Dawg * >

typedef TessResultCallback2< bool, const GenericVector< char > &, LSTMTrainer * > *	CheckPointReader

typedef TessResultCallback3< bool, SerializeAmount, const LSTMTrainer , GenericVector< char > > *	CheckPointWriter

typedef TessResultCallback4< STRING, int, const double , const TessdataManager &, int >	TestCallback

using	RecodePair = KDPairInc< double, RecodeNode >

using	RecodeHeap = GenericHeap< RecodePair >

using	BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >

using	ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT >

using	PartSetVector = GenericVector< ColPartitionSet * >

using	WidthCallback = TessResultCallback1< bool, int >

using	ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT >

using	ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT >

using	WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >

using	WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >

using	LigHash = std::unordered_map< std::string, std::string, StringHash >

using	PainPointHeap = GenericHeap< MatrixCoordPair >

using	LanguageModelFlagsType = unsigned char
	Used for expressing various language model flags. More...

Enumerations
enum	LineType { LT_START = 'S', LT_BODY = 'C', LT_UNKNOWN = 'U', LT_MULTIPLE = 'M' }

enum	CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT }

enum	CachingStrategy { CS_SEQUENTIAL, CS_ROUND_ROBIN }

enum	NormalizationMode { NM_BASELINE = -3, NM_CHAR_ISOTROPIC = -2, NM_CHAR_ANISOTROPIC = -1 }

enum	kParamsTrainingFeatureType { PTRAIN_DIGITS_SHORT, PTRAIN_DIGITS_MED, PTRAIN_DIGITS_LONG, PTRAIN_NUM_SHORT, PTRAIN_NUM_MED, PTRAIN_NUM_LONG, PTRAIN_DOC_SHORT, PTRAIN_DOC_MED, PTRAIN_DOC_LONG, PTRAIN_DICT_SHORT, PTRAIN_DICT_MED, PTRAIN_DICT_LONG, PTRAIN_FREQ_SHORT, PTRAIN_FREQ_MED, PTRAIN_FREQ_LONG, PTRAIN_SHAPE_COST_PER_CHAR, PTRAIN_NGRAM_COST_PER_CHAR, PTRAIN_NUM_BAD_PUNC, PTRAIN_NUM_BAD_CASE, PTRAIN_XHEIGHT_CONSISTENCY, PTRAIN_NUM_BAD_CHAR_TYPE, PTRAIN_NUM_BAD_SPACING, PTRAIN_NUM_BAD_FONT, PTRAIN_RATING_PER_CHAR, PTRAIN_NUM_FEATURE_TYPES }

enum	Orientation { ORIENTATION_PAGE_UP = 0, ORIENTATION_PAGE_RIGHT = 1, ORIENTATION_PAGE_DOWN = 2, ORIENTATION_PAGE_LEFT = 3 }

enum	WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }

enum	TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }

enum	PageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT, PSM_SPARSE_TEXT_OSD, PSM_RAW_LINE, PSM_COUNT }

enum	PageIteratorLevel { RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL }

enum	ParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT }

enum	OcrEngineMode { OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT, OEM_COUNT }

enum	ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }

enum	AmbigType { NOT_AMBIG, REPLACE_AMBIG, DEFINITE_AMBIG, SIMILAR_AMBIG, CASE_AMBIG, AMBIG_TYPE_COUNT }

enum	SetParamConstraint { SET_PARAM_CONSTRAINT_NONE, SET_PARAM_CONSTRAINT_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_INIT_ONLY }

enum	TessdataType { TESSDATA_LANG_CONFIG, TESSDATA_UNICHARSET, TESSDATA_AMBIGS, TESSDATA_INTTEMP, TESSDATA_PFFMTABLE, TESSDATA_NORMPROTO, TESSDATA_PUNC_DAWG, TESSDATA_SYSTEM_DAWG, TESSDATA_NUMBER_DAWG, TESSDATA_FREQ_DAWG, TESSDATA_FIXED_LENGTH_DAWGS, TESSDATA_CUBE_UNICHARSET, TESSDATA_CUBE_SYSTEM_DAWG, TESSDATA_SHAPE_TABLE, TESSDATA_BIGRAM_DAWG, TESSDATA_UNAMBIG_DAWG, TESSDATA_PARAMS_MODEL, TESSDATA_LSTM, TESSDATA_LSTM_PUNC_DAWG, TESSDATA_LSTM_SYSTEM_DAWG, TESSDATA_LSTM_NUMBER_DAWG, TESSDATA_LSTM_UNICHARSET, TESSDATA_LSTM_RECODER, TESSDATA_VERSION, TESSDATA_NUM_ENTRIES }

enum	CharSegmentationType { CST_FRAGMENT, CST_WHOLE, CST_IMPROPER, CST_NGRAM }

enum	CountTypes { CT_UNICHAR_TOP_OK, CT_UNICHAR_TOP1_ERR, CT_UNICHAR_TOP2_ERR, CT_UNICHAR_TOPN_ERR, CT_UNICHAR_TOPTOP_ERR, CT_OK_MULTI_UNICHAR, CT_OK_JOINED, CT_OK_BROKEN, CT_REJECT, CT_FONT_ATTR_ERR, CT_OK_MULTI_FONT, CT_NUM_RESULTS, CT_RANK, CT_REJECTED_JUNK, CT_ACCEPTED_JUNK, CT_SIZE }

enum	DawgType { DAWG_TYPE_PUNCTUATION, DAWG_TYPE_WORD, DAWG_TYPE_NUMBER, DAWG_TYPE_PATTERN, DAWG_TYPE_COUNT }

enum	XHeightConsistencyEnum { XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT }

enum	TrainingFlags { TF_INT_MODE = 1, TF_COMPRESS_UNICHARSET = 64 }

enum	ErrorTypes { ET_RMS, ET_DELTA, ET_WORD_RECERR, ET_CHAR_ERROR, ET_SKIP_RATIO, ET_COUNT }

enum	Trainability { TRAINABLE, PERFECT, UNENCODABLE, HI_PRECISION_ERR, NOT_BOXED }

enum	SerializeAmount { LIGHT, NO_BEST_TRAINER, FULL }

enum	SubTrainerResult { STR_NONE, STR_UPDATED, STR_REPLACED }

enum	NetworkType { NT_NONE, NT_INPUT, NT_CONVOLVE, NT_MAXPOOL, NT_PARALLEL, NT_REPLICATED, NT_PAR_RL_LSTM, NT_PAR_UD_LSTM, NT_PAR_2D_LSTM, NT_SERIES, NT_RECONFIG, NT_XREVERSED, NT_YREVERSED, NT_XYTRANSPOSE, NT_LSTM, NT_LSTM_SUMMARY, NT_LOGISTIC, NT_POSCLIP, NT_SYMCLIP, NT_TANH, NT_RELU, NT_LINEAR, NT_SOFTMAX, NT_SOFTMAX_NO_CTC, NT_LSTM_SOFTMAX, NT_LSTM_SOFTMAX_ENCODED, NT_TENSORFLOW, NT_COUNT }

enum	NetworkFlags { NF_LAYER_SPECIFIC_LR = 64, NF_ADAM = 128 }

enum	TrainingState { TS_DISABLED, TS_ENABLED, TS_TEMP_DISABLE, TS_RE_ENABLE }

enum	NodeContinuation { NC_ANYTHING, NC_ONLY_DUP, NC_NO_DUP, NC_COUNT }

enum	TopNState { TN_TOP2, TN_TOPN, TN_ALSO_RAN, TN_COUNT }

enum	LossType { LT_NONE, LT_CTC, LT_SOFTMAX, LT_LOGISTIC }

enum	FlexDimensions { FD_BATCH, FD_HEIGHT, FD_WIDTH, FD_DIMSIZE }

enum	ColumnSpanningType { CST_NOISE, CST_FLOWING, CST_HEADING, CST_PULLOUT, CST_COUNT }

enum	NeighbourPartitionType { NPT_HTEXT, NPT_VTEXT, NPT_WEAK_HTEXT, NPT_WEAK_VTEXT, NPT_IMAGE, NPT_COUNT }

enum	LeftOrRight { LR_LEFT, LR_RIGHT }

enum	PartitionFindResult { PFR_OK, PFR_SKEW, PFR_NOISE }

enum	ColSegType { COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED, COL_COUNT }

enum	TabAlignment { TA_LEFT_ALIGNED, TA_LEFT_RAGGED, TA_CENTER_JUSTIFIED, TA_RIGHT_ALIGNED, TA_RIGHT_RAGGED, TA_SEPARATOR, TA_COUNT }

enum	FactorNames { FN_INCOLOR, FN_Y0, FN_Y1, FN_Y2, FN_Y3, FN_X0, FN_X1, FN_SHEAR, FN_NUM_FACTORS }

enum	UnicodeNormMode { UnicodeNormMode::kNFD, UnicodeNormMode::kNFC, UnicodeNormMode::kNFKD, UnicodeNormMode::kNFKC }

enum	OCRNorm { OCRNorm::kNone, OCRNorm::kNormalize }

enum	GraphemeNorm { GraphemeNorm::kNone, GraphemeNorm::kNormalize }

enum	GraphemeNormMode { GraphemeNormMode::kSingleString, GraphemeNormMode::kCombined, GraphemeNormMode::kGlyphSplit, GraphemeNormMode::kIndividualUnicodes }

enum	ViramaScript : char32 { ViramaScript::kNonVirama = 0, ViramaScript::kDevanagari = 0x900, ViramaScript::kBengali = 0x980, ViramaScript::kGurmukhi = 0xa00, ViramaScript::kGujarati = 0xa80, ViramaScript::kOriya = 0xb00, ViramaScript::kTamil = 0xb80, ViramaScript::kTelugu = 0xc00, ViramaScript::kKannada = 0xc80, ViramaScript::kMalayalam = 0xd00, ViramaScript::kSinhala = 0xd80, ViramaScript::kMyanmar = 0x1000, ViramaScript::kKhmer = 0x1780, ViramaScript::kJavanese = 0xa980 }

enum	LMPainPointsType { LM_PPTYPE_BLAMER, LM_PPTYPE_AMBIG, LM_PPTYPE_PATH, LM_PPTYPE_SHAPE, LM_PPTYPE_NUM }

Functions
static void	addAvailableLanguages (const STRING &datadir, const STRING &base, GenericVector< STRING > *langs)

static int	CompareSTRING (const void p1, const void p2)

static tesseract::Orientation	GetBlockTextOrientation (const PageIterator *it)

static void	AddBaselineCoordsTohOCR (const PageIterator it, PageIteratorLevel level, STRING hocr_str)

static void	AddIdTohOCR (STRING *hocr_str, const std::string base, int num1, int num2)

static void	AddIdTohOCR (STRING *hocr_str, const std::string base, int num1, int num2, int num3)

static void	AddBoxTohOCR (const ResultIterator it, PageIteratorLevel level, STRING hocr_str)

static void	AddBoxToTSV (const PageIterator it, PageIteratorLevel level, STRING hocr_str)

STRING	HOcrEscape (const char *text)

static TBLOB *	make_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix)

static void	add_space (TESS_CHAR_IT *it)

static float	rating_to_cost (float rating)

static void	extract_result (TESS_CHAR_IT out, PAGE_RES page_res)

static double	prec (double x)

static long	dist2 (int x1, int y1, int x2, int y2)

static void	GetWordBaseline (int writing_direction, int ppi, int height, int word_x1, int word_y1, int word_x2, int word_y2, int line_x1, int line_y1, int line_x2, int line_y2, double x0, double y0, double *length)

static void	AffineMatrix (int writing_direction, int line_x1, int line_y1, int line_x2, int line_y2, double a, double b, double c, double d)

static void	ClipBaseline (int ppi, int x1, int y1, int x2, int y2, int line_x1, int line_y1, int line_x2, int line_y2)

static bool	CodepointToUtf16be (int code, char utf16[kMaxBytesPerCodepoint])

double	DotProductAVX (const double u, const double v, int n)

double	DotProductSSE (const double u, const double v, int n)

int32_t	IntDotProductSSE (const int8_t u, const int8_t v, int n)

static void	clear_any_old_text (BLOCK_LIST *block_list)

static double	MedianXHeight (BLOCK_LIST *block_list)

static double	BoxMissMetric (const TBOX &box1, const TBOX &box2)

static void	WordGap (const PointerVector< WERD_RES > &words, int index, int right, int next_left)

static void	EvaluateWordSpan (const PointerVector< WERD_RES > &words, int first_index, int end_index, float rating, float certainty, bool bad, bool valid_permuter)

static int	SelectBestWords (double rating_ratio, double certainty_margin, bool debug, PointerVector< WERD_RES > new_words, PointerVector< WERD_RES > best_words)

static bool	WordsAcceptable (const PointerVector< WERD_RES > &words)

static BLOB_CHOICE *	FindBestMatchingChoice (UNICHAR_ID char_id, WERD_RES *word_res)

static void	CorrectRepcharChoices (BLOB_CHOICE blob_choice, WERD_RES word_res)

static void	find_modal_font (STATS fonts, int16_t font_out, int8_t *font_count)

static int	SortCPByTopReverse (const void p1, const void p2)

static int	SortCPByBottom (const void p1, const void p2)

static int	SortCPByHeight (const void p1, const void p2)

bool	IsTextOrEquationType (PolyBlockType type)

bool	IsLeftIndented (const EquationDetect::IndentType type)

bool	IsRightIndented (const EquationDetect::IndentType type)

static int	c_blob_comparator (const void blob1p, const void blob2p)

static Pix *	RemoveEnclosingCircle (Pix *pixs)

static void	AddAllScriptsConverted (const UNICHARSET &sid_set, const UNICHARSET &osd_set, GenericVector< int > *allowed_ids)

static bool	LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification j)

static int	Epsilon (int space_pix)

static bool	AcceptableRowArgs (int debug_level, int min_num_rows, const char function_name, const GenericVector< RowScratchRegisters > rows, int row_start, int row_end)

static STRING	StrOf (int num)

static void	PrintTable (const GenericVector< GenericVector< STRING > > &rows, const STRING &colsep)

static STRING	RtlEmbed (const STRING &word, bool rtlify)

static void	PrintDetectorState (const ParagraphTheory &theory, const GenericVector< RowScratchRegisters > &rows)

static void	DebugDump (bool should_print, const STRING &phase, const ParagraphTheory &theory, const GenericVector< RowScratchRegisters > &rows)

static void	PrintRowRange (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)

static bool	IsLatinLetter (int ch)

static bool	IsDigitLike (int ch)

static bool	IsOpeningPunct (int ch)

static bool	IsTerminalPunct (int ch)

static const char *	SkipChars (const char str, const char toskip)

static const char *	SkipChars (const char str, bool(skip)(int))

static const char *	SkipOne (const char str, const char toskip)

static bool	LikelyListNumeral (const STRING &word)

static bool	LikelyListMark (const STRING &word)

bool	AsciiLikelyListItem (const STRING &word)

int	UnicodeFor (const UNICHARSET u, const WERD_CHOICE werd, int pos)

static bool	LikelyListMarkUnicode (int ch)

static bool	UniLikelyListItem (const UNICHARSET u, const WERD_CHOICE werd)

void	LeftWordAttributes (const UNICHARSET unicharset, const WERD_CHOICE werd, const STRING &utf8, bool is_list, bool starts_idea, bool *ends_idea)

void	RightWordAttributes (const UNICHARSET unicharset, const WERD_CHOICE werd, const STRING &utf8, bool is_list, bool starts_idea, bool *ends_idea)

static int	ClosestCluster (const GenericVector< Cluster > &clusters, int value)

static void	CalculateTabStops (GenericVector< RowScratchRegisters > rows, int row_start, int row_end, int tolerance, GenericVector< Cluster > left_tabs, GenericVector< Cluster > *right_tabs)

static void	MarkRowsWithModel (GenericVector< RowScratchRegisters > rows, int row_start, int row_end, const ParagraphModel model, bool ltr, int eop_threshold)

static void	GeometricClassifyThreeTabStopTextBlock (int debug_level, GeometricClassifierState &s, ParagraphTheory *theory)

static void	GeometricClassify (int debug_level, GenericVector< RowScratchRegisters > rows, int row_start, int row_end, ParagraphTheory theory)

bool	ValidFirstLine (const GenericVector< RowScratchRegisters > rows, int row, const ParagraphModel model)

bool	ValidBodyLine (const GenericVector< RowScratchRegisters > rows, int row, const ParagraphModel model)

bool	CrownCompatible (const GenericVector< RowScratchRegisters > rows, int a, int b, const ParagraphModel model)

static void	DiscardUnusedModels (const GenericVector< RowScratchRegisters > &rows, ParagraphTheory *theory)

static void	DowngradeWeakestToCrowns (int debug_level, ParagraphTheory theory, GenericVector< RowScratchRegisters > rows)

void	RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile)

int	InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)

bool	FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)

bool	FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)

static bool	TextSupportsBreak (const RowScratchRegisters &before, const RowScratchRegisters &after)

static ParagraphModel	InternalParagraphModelByOutline (const GenericVector< RowScratchRegisters > rows, int start, int end, int tolerance, bool consistent)

static ParagraphModel	ParagraphModelByOutline (int debug_level, const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance)

bool	RowsFitModel (const GenericVector< RowScratchRegisters > rows, int start, int end, const ParagraphModel model)

static void	MarkStrongEvidence (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end)

static void	ModelStrongEvidence (int debug_level, GenericVector< RowScratchRegisters > rows, int row_start, int row_end, bool allow_flush_models, ParagraphTheory theory)

static void	StrongEvidenceClassify (int debug_level, GenericVector< RowScratchRegisters > rows, int row_start, int row_end, ParagraphTheory theory)

static void	SeparateSimpleLeaderLines (GenericVector< RowScratchRegisters > rows, int row_start, int row_end, ParagraphTheory theory)

static void	ConvertHypothesizedModelRunsToParagraphs (int debug_level, const GenericVector< RowScratchRegisters > &rows, GenericVector< PARA > row_owners, ParagraphTheory *theory)

static bool	RowIsStranded (const GenericVector< RowScratchRegisters > &rows, int row)

static void	LeftoverSegments (const GenericVector< RowScratchRegisters > &rows, GenericVector< Interval > *to_fix, int row_start, int row_end)

void	CanonicalizeDetectionResults (GenericVector< PARA > row_owners, PARA_LIST *paragraphs)

void	DetectParagraphs (int debug_level, GenericVector< RowInfo > row_infos, GenericVector< PARA > row_owners, PARA_LIST paragraphs, GenericVector< ParagraphModel > models)

static void	InitializeTextAndBoxesPreRecognition (const MutableIterator &it, RowInfo *info)

static void	InitializeRowInfo (bool after_recognition, const MutableIterator &it, RowInfo *info)

void	DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator block_start, GenericVector< ParagraphModel > *models)

bool	StrongModel (const ParagraphModel *model)

static bool	read_t (PAGE_RES_IT page_res_it, TBOX tbox)

static void	PrintPath (int length, const BLOB_CHOICE *blob_choices, const UNICHARSET &unicharset, const char label, FILE *output_file)

static void	PrintMatrixPaths (int col, int dim, const MATRIX &ratings, int length, const BLOB_CHOICE *blob_choices, const UNICHARSET &unicharset, const char label, FILE *output_file)

static void	PrintScriptDirs (const GenericVector< StrongScriptDirection > &dirs)

static void	YOutlierPieces (WERD_RES word, int rebuilt_blob_index, int super_y_bottom, int sub_y_top, ScriptPos leading_pos, int num_leading_outliers, ScriptPos trailing_pos, int *num_trailing_outliers)

static bool	IsStrInList (const STRING &str, const GenericVector< STRING > &str_list)

static void	CollectFonts (const UnicityTable< FontInfo > &new_fonts, UnicityTable< FontInfo > *all_fonts)

static void	AssignIds (const UnicityTable< FontInfo > &all_fonts, UnicityTable< FontInfo > *lang_fonts)

bool	CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2)

bool	CompareFontSet (const FontSet &fs1, const FontSet &fs2)

void	FontInfoDeleteCallback (FontInfo f)

void	FontSetDeleteCallback (FontSet fs)

bool	read_info (TFile f, FontInfo fi)

bool	write_info (FILE *f, const FontInfo &fi)

bool	read_spacing_info (TFile f, FontInfo fi)

bool	write_spacing_info (FILE *f, const FontInfo &fi)

bool	read_set (TFile f, FontSet fs)

bool	write_set (FILE *f, const FontSet &fs)

void *	ReCachePagesFunc (void *data)

int	OtsuThreshold (Pix src_pix, int left, int top, int width, int height, int thresholds, int *hi_values)

void	HistogramRect (Pix src_pix, int channel, int left, int top, int width, int height, int histogram)

int	OtsuStats (const int histogram, int H_out, int *omega0_out)

int	ParamsTrainingFeatureByName (const char *name)

bool	PSM_OSD_ENABLED (int pageseg_mode)

bool	PSM_ORIENTATION_ENABLED (int pageseg_mode)

bool	PSM_COL_FIND_ENABLED (int pageseg_mode)

bool	PSM_SPARSE (int pageseg_mode)

bool	PSM_BLOCK_FIND_ENABLED (int pageseg_mode)

bool	PSM_LINE_FIND_ENABLED (int pageseg_mode)

bool	PSM_WORD_FIND_ENABLED (int pageseg_mode)

const char *	ScriptPosToString (enum ScriptPos script_pos)

void	AmbigSpec_zapper (ELIST_LINK *link)

bool	LoadDataFromFile (const char filename, GenericVector< char > data)

bool	LoadDataFromFile (const STRING &filename, GenericVector< char > *data)

bool	SaveDataToFile (const GenericVector< char > &data, const STRING &filename)

bool	LoadFileLinesToStrings (const STRING &filename, GenericVector< STRING > *lines)

template<typename T >
bool	cmp_eq (T const &t1, T const &t2)

template<typename T >
int	sort_cmp (const void t1, const void t2)

template<typename T >
int	sort_ptr_cmp (const void t1, const void t2)

bool	DeSerialize (FILE fp, char data, size_t n)

bool	DeSerialize (FILE fp, float data, size_t n)

bool	DeSerialize (FILE fp, int8_t data, size_t n)

bool	DeSerialize (FILE fp, int16_t data, size_t n)

bool	DeSerialize (FILE fp, int32_t data, size_t n)

bool	DeSerialize (FILE fp, uint8_t data, size_t n)

bool	DeSerialize (FILE fp, uint16_t data, size_t n)

bool	DeSerialize (FILE fp, uint32_t data, size_t n)

bool	Serialize (FILE fp, const char data, size_t n)

bool	Serialize (FILE fp, const float data, size_t n)

bool	Serialize (FILE fp, const int8_t data, size_t n)

bool	Serialize (FILE fp, const int16_t data, size_t n)

bool	Serialize (FILE fp, const int32_t data, size_t n)

bool	Serialize (FILE fp, const uint8_t data, size_t n)

bool	Serialize (FILE fp, const uint16_t data, size_t n)

bool	Serialize (FILE fp, const uint32_t data, size_t n)

template<typename T , size_t N>
constexpr size_t	countof (T const (&)[N]) noexcept

static int	RadicalPreHash (const std::vector< int > &rs)

static bool	DecodeRadicalLine (STRING radical_data_line, RSMap radical_map)

static bool	DecodeRadicalTable (STRING radical_data, RSMap radical_map)

void	ExtractFontName (const STRING &filename, STRING *fontname)

TrainingSample *	BlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT fx_info, GenericVector< INT_FEATURE_STRUCT > bl_features)

static uint8_t	NormalizeDirection (uint8_t dir, const FCOORD &unnormed_pos, const DENORM &denorm, const DENORM *root_denorm)

static FCOORD	MeanDirectionVector (const LLSQ &point_diffs, const LLSQ &dirs, const FCOORD &start_pt, const FCOORD &end_pt)

static int	ComputeFeatures (const FCOORD &start_pt, const FCOORD &end_pt, double feature_length, GenericVector< INT_FEATURE_STRUCT > *features)

static int	GatherPoints (const C_OUTLINE outline, double feature_length, const DENORM &denorm, const DENORM root_denorm, int start_index, int end_index, ICOORD pos, FCOORD pos_normed, LLSQ points, LLSQ dirs)

static void	ExtractFeaturesFromRun (const EDGEPT startpt, const EDGEPT lastpt, const DENORM &denorm, double feature_length, bool force_poly, GenericVector< INT_FEATURE_STRUCT > *features)

void	ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)

static void	AddNearFeatures (const IntFeatureMap &feature_map, int f, int levels, GenericVector< int > *good_features)

static void	CallWithUTF8 (TessCallback1< const char > cb, const WERD_CHOICE *wc)

static int	sort_strings_by_dec_length (const void v1, const void v2)

static int	BestLabel (const GENERIC_2D_ARRAY< float > &outputs, int t)

static double	LogSumExp (double ln_x, double ln_y)

double	Tanh (double x)

double	Logistic (double x)

template<class Func >
void	FuncInplace (int n, double *inout)

template<class Func >
void	FuncMultiply (const double u, const double v, int n, double *out)

template<typename T >
void	SoftmaxInPlace (int n, T *inout)

void	CopyVector (int n, const double src, double dest)

void	AccumulateVector (int n, const double src, double dest)

void	MultiplyVectorsInPlace (int n, const double src, double inout)

void	MultiplyAccumulate (int n, const double u, const double v, double *out)

void	SumVectors (int n, const double v1, const double v2, const double v3, const double v4, const double v5, double sum)

template<typename T >
void	ZeroVector (int n, T *vec)

template<typename T >
void	ClipVector (int n, T lower, T upper, T *vec)

void	CodeInBinary (int n, int nf, double *vec)

static uint32_t	ceil_log2 (uint32_t n)

static void	SkipWhitespace (char **str)

static NetworkType	NonLinearity (char func)

static Network *	BuildFullyConnected (const StaticShape &input_shape, NetworkType type, const STRING &name, int depth)

static void	ComputeBlackWhite (Pix pix, float black, float *white)

static void	HistogramWeight (double weight, STATS *histogram)

static bool	AtLeast2LineCrossings (BLOBNBOX_CLIST *blobs)

static Pix *	GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int left, int bottom)

Pix *	TraceOutlineOnReducedPix (C_OUTLINE outline, int gridsize, ICOORD bleft, int left, int *bottom)

Pix *	TraceBlockOnReducedPix (BLOCK block, int gridsize, ICOORD bleft, int left, int *bottom)

template<class BBC >
int	SortByBoxLeft (const void void1, const void void2)

template<class BBC >
int	SortRightToLeft (const void void1, const void void2)

template<class BBC >
int	SortByBoxBottom (const void void1, const void void2)

static TBOX	AttemptBoxExpansion (const TBOX &box, const IntGrid &noise_density, int pad)

	BOOL_VAR (textord_tabfind_show_initial_partitions, false, "Show partition bounds")

	BOOL_VAR (textord_tabfind_show_reject_blobs, false, "Show blobs rejected as noise")

	INT_VAR (textord_tabfind_show_partitions, 0, "Show partition bounds, waiting if >1")

	BOOL_VAR (textord_tabfind_show_columns, false, "Show column bounds")

	BOOL_VAR (textord_tabfind_show_blocks, false, "Show final block bounds")

	BOOL_VAR (textord_tabfind_find_tables, true, "run table detection")

static void	ReleaseAllBlobsAndDeleteUnused (BLOBNBOX_LIST *blobs)

static TBOX	BoxFromHLine (const TabVector *hline)

static void	ReflectBlobList (BLOBNBOX_LIST *bblobs)

static void	RotateAndExplodeBlobList (const FCOORD &blob_rotation, BLOBNBOX_LIST bblobs, STATS widths, STATS *heights)

	BOOL_VAR_H (textord_tabfind_find_tables, false, "run table detection")

static void	ClipCoord (const ICOORD &bleft, const ICOORD &tright, ICOORD *pos)

static TO_BLOCK *	MoveBlobsToBlock (bool vertical_text, int line_spacing, BLOCK block, ColPartition_LIST block_parts, ColPartition_LIST *used_parts)

static int	MedianSpacing (int page_height, ColPartition_IT it)

static bool	UpdateLeftMargin (const ColPartition &part, int margin_left, int margin_right)

static bool	UpdateRightMargin (const ColPartition &part, int margin_left, int margin_right)

	BOOL_VAR (textord_tabfind_show_color_fit, false, "Show stroke widths")

static bool	OKMergeCandidate (const ColPartition part, const ColPartition candidate, bool debug)

static int	IncreaseInOverlap (const ColPartition merge1, const ColPartition merge2, int ok_overlap, ColPartition_CLIST *parts)

static bool	TestCompatibleCandidates (const ColPartition &part, bool debug, ColPartition_CLIST *candidates)

static void	RemoveBadBox (BLOBNBOX box, ColPartition part, ColPartition_LIST *part_list)

static void	ComputeSearchBoxAndScaling (BlobNeighbourDir direction, const TBOX &part_box, int min_padding, TBOX search_box, ICOORD dist_scaling)

static bool	HScanForEdge (uint32_t data, int wpl, int x_start, int x_end, int min_count, int mid_width, int max_count, int y_end, int y_step, int y_start)

static bool	VScanForEdge (uint32_t data, int wpl, int y_start, int y_end, int min_count, int mid_width, int max_count, int x_end, int x_step, int x_start)

static void	AttemptToShrinkBox (const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box, Pix pix, TBOX slice)

static void	CutChunkFromParts (const TBOX &box, const TBOX &im_box, const FCOORD &rotation, const FCOORD &rerotation, Pix pix, ColPartition_LIST part_list)

static void	DivideImageIntoParts (const TBOX &im_box, const FCOORD &rotation, const FCOORD &rerotation, Pix pix, ColPartitionGridSearch rectsearch, ColPartition_LIST *part_list)

static int	ExpandImageLeft (const TBOX &box, int left_limit, ColPartitionGrid *part_grid)

static int	ExpandImageRight (const TBOX &box, int right_limit, ColPartitionGrid *part_grid)

static int	ExpandImageBottom (const TBOX &box, int bottom_limit, ColPartitionGrid *part_grid)

static int	ExpandImageTop (const TBOX &box, int top_limit, ColPartitionGrid *part_grid)

static int	ExpandImageDir (BlobNeighbourDir dir, const TBOX &im_box, const TBOX &limit_box, ColPartitionGrid part_grid, TBOX expanded_box)

static void	MaximalImageBoundingBox (ColPartitionGrid part_grid, TBOX im_box)

static void	DeletePartition (ColPartition *part)

static bool	ExpandImageIntoParts (const TBOX &max_image_box, ColPartitionGridSearch rectsearch, ColPartitionGrid part_grid, ColPartition **part_ptr)

static int	IntersectArea (const TBOX &box, ColPartition_LIST *part_list)

static bool	TestWeakIntersectedPart (const TBOX &im_box, ColPartition_LIST part_list, ColPartition part)

static void	EliminateWeakParts (const TBOX &im_box, ColPartitionGrid part_grid, ColPartition_LIST big_parts, ColPartition_LIST *part_list)

static bool	ScanForOverlappingText (ColPartitionGrid part_grid, TBOX box)

static void	MarkAndDeleteImageParts (const FCOORD &rerotate, ColPartitionGrid part_grid, ColPartition_LIST image_parts, Pix *image_pix)

static void	DeleteSmallImages (ColPartitionGrid *part_grid)

static void	RemoveUnusedLineSegments (bool horizontal_lines, BLOBNBOX_LIST line_bblobs, Pix line_pix)

static void	SubtractLinesAndResidue (Pix line_pix, Pix non_line_pix, int resolution, Pix *src_pix)

static int	MaxStrokeWidth (Pix *pix)

static int	NumTouchingIntersections (Box line_box, Pix intersection_pix)

static int	CountPixelsAdjacentToLine (int line_width, Box line_box, Pix nonline_pix)

static int	FilterFalsePositives (int resolution, Pix nonline_pix, Pix intersection_pix, Pix *line_pix)

static Pix *	FilterMusic (int resolution, Pix pix_closed, Pix pix_vline, Pix pix_hline, l_int32 v_empty, l_int32 *h_empty)

	INT_VAR (textord_tabfind_show_strokewidths, 0, "Show stroke widths")

	BOOL_VAR (textord_tabfind_only_strokewidths, false, "Only run stroke widths")

static void	CollectHorizVertBlobs (BLOBNBOX_LIST input_blobs, int num_vertical_blobs, int num_horizontal_blobs, BLOBNBOX_CLIST vertical_blobs, BLOBNBOX_CLIST horizontal_blobs, BLOBNBOX_CLIST nondescript_blobs)

static void	PrintBoxWidths (BLOBNBOX *neighbour)

static int	UpperQuartileCJKSize (int gridsize, BLOBNBOX_LIST *blobs)

static bool	AcceptableCJKMerge (const TBOX &bbox, const TBOX &nbox, bool debug, int max_size, int max_dist, int x_gap, int y_gap)

static void	ListNeighbours (const BLOBNBOX blob, BLOBNBOX_CLIST neighbours)

static void	List2ndNeighbours (const BLOBNBOX blob, BLOBNBOX_CLIST neighbours)

static void	List3rdNeighbours (const BLOBNBOX blob, BLOBNBOX_CLIST neighbours)

static void	CountNeighbourGaps (bool debug, BLOBNBOX_CLIST neighbours, int pure_h_count, int *pure_v_count)

static void	CountNeighbourTypes (BLOBNBOX_CLIST neighbours, int pure_h_count, int *pure_v_count)

static BLOBNBOX *	MutualUnusedVNeighbour (const BLOBNBOX *blob, BlobNeighbourDir dir)

static BLOBNBOX *	MutualUnusedHNeighbour (const BLOBNBOX *blob, BlobNeighbourDir dir)

static void	DrawDiacriticJoiner (const BLOBNBOX blob, ScrollView window)

	BOOL_VAR (textord_tabfind_show_initialtabs, false, "Show tab candidates")

	BOOL_VAR (textord_tabfind_show_finaltabs, false, "Show tab vectors")

static void	DisplayBoxVector (const GenericVector< BLOBNBOX > &boxes, ScrollView win)

	BOOL_VAR (textord_show_tables, false, "Show table regions")

	BOOL_VAR (textord_tablefind_show_mark, false, "Debug table marking steps in detail")

	BOOL_VAR (textord_tablefind_show_stats, false, "Show page stats used in table finding")

	BOOL_VAR (textord_tablefind_recognize_tables, false, "Enables the table recognizer for table layout and filtering.")

template<typename T >
void	DeleteObject (T *object)

	double_VAR (textord_tabvector_vertical_gap_fraction, 0.5, "max fraction of mean blob width allowed for vertical gaps in vertical text")

	double_VAR (textord_tabvector_vertical_box_ratio, 0.5, "Fraction of box matches required to declare a line vertical")

	double_VAR_H (textord_tabvector_vertical_gap_fraction, 0.5, "Max fraction of mean blob width allowed for vertical gaps in vertical text")

	double_VAR_H (textord_tabvector_vertical_box_ratio, 0.5, "Fraction of box matches required to declare a line vertical")

static TBOX	BoundsWithinBox (Pix *pix, const TBOX &box)

static void	TruncateBoxToMissNonText (int x_middle, int y_middle, bool split_on_x, Pix nontext_map, TBOX bbox)

void	SetBlobStrokeWidth (Pix pix, BLOBNBOX blob)

void	assign_blobs_to_blocks2 (Pix pix, BLOCK_LIST blocks, TO_BLOCK_LIST *port_blocks)

static bool	IntFlagExists (const char flag_name, int32_t value)

static bool	DoubleFlagExists (const char flag_name, double value)

static bool	BoolFlagExists (const char flag_name, bool value)

static bool	StringFlagExists (const char flag_name, const char *value)

static void	SetIntFlagValue (const char *flag_name, const int32_t new_val)

static void	SetDoubleFlagValue (const char *flag_name, const double new_val)

static void	SetBoolFlagValue (const char *flag_name, const bool new_val)

static void	SetStringFlagValue (const char flag_name, const char new_val)

static bool	SafeAtoi (const char str, int val)

static bool	SafeAtod (const char str, double val)

static void	PrintCommandLineFlags ()

void	ParseCommandLineFlags (const char usage, int argc, char ***argv, const bool remove_flags)

ShapeTable *	LoadShapeTable (const STRING &file_prefix)

void	WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)

MasterTrainer *	LoadTrainingData (int argc, const char const argv, bool replication, ShapeTable *shape_table, STRING file_prefix)

static void	CheckSharedLibraryVersion ()

Pix *	DegradeImage (Pix input, int exposure, TRand randomizer, float *rotation)

Pix *	PrepareDistortedPix (const Pix pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand randomizer, GenericVector< TBOX > *boxes)

void	GeneratePerspectiveDistortion (int width, int height, TRand randomizer, Pix pix, GenericVector< TBOX > boxes)

int	ProjectiveCoeffs (int width, int height, TRand randomizer, float im_coeffs, float *box_coeffs)

bool	WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer)

STRING	ReadFile (const std::string &filename, FileReader reader)

bool	WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)

bool	WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING radical_table_data, TessdataManager traineddata)

static bool	WriteDawg (const GenericVector< STRING > &words, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse_policy, TessdataType file_type, TessdataManager *traineddata)

static bool	WriteDawgs (const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, const UNICHARSET &unicharset, TessdataManager *traineddata)

int	CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)

static std::string	EncodeAsUTF8 (const char32 ch32)

static bool	is_hyphen_punc (const char32 ch)

static bool	is_single_quote (const char32 ch)

static bool	is_double_quote (const char32 ch)

static void	NormalizeUTF8ToUTF32 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, const char str8, std::vector< char32 > normed32)

static void	StripJoiners (std::vector< char32 > *str32)

bool	NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char str8, std::string normalized)

bool	NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char str8, std::vector< std::string > graphemes)

char32	OCRNormalize (char32 ch)

bool	IsOCREquivalent (char32 ch1, char32 ch2)

bool	IsValidCodepoint (const char32 ch)

bool	IsWhitespace (const char32 ch)

bool	IsUTF8Whitespace (const char *text)

unsigned int	SpanUTF8Whitespace (const char *text)

unsigned int	SpanUTF8NotWhitespace (const char *text)

bool	IsInterchangeValid (const char32 ch)

bool	IsInterchangeValid7BitAscii (const char32 ch)

char32	FullwidthToHalfwidth (const char32 ch)

static void	ListFontFamilies (PangoFontFamily **families, int n_families)

static char *	my_strnmove (char dest, const char src, size_t n)

static bool	ShouldIgnoreFontFamilyName (const char *query)

static void	CharCoverageMapToBitmap (PangoCoverage coverage, std::vector< bool > unichar_bitmap)

static bool	IsCombiner (int ch)

static std::string	EncodeAsUTF8 (const char32 ch32)

static bool	RandBool (const double prob, TRand *rand)

static Pix *	CairoARGB32ToPixFormat (cairo_surface_t *surface)

static void	MergeBoxCharsToWords (std::vector< BoxChar > boxchars)

static bool	IsWhitespaceBox (const BoxChar *boxchar)

static std::string	StringReplace (const std::string &in, const std::string &oldsub, const std::string &newsub)

static void	ExtractFontProperties (const std::string &utf8_text, StringRenderer *render, const std::string &output_base)

static bool	MakeIndividualGlyphs (Pix pix, const std::vector< BoxChar > &vbox, const int input_tiff_page)

static void	AddStringsToUnicharset (const GenericVector< STRING > &strings, int norm_mode, UNICHARSET *unicharset)

static int	Main (int argc, char **argv)

void	SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset)

void	SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset)

std::string	GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset)

void	SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)

void	SetupBasicProperties (bool report_errors, UNICHARSET *unicharset)

static bool	IsThaiConsonant (char32 ch)

static bool	IsThaiBeforeConsonantVowel (char32 ch)

static bool	IsThaiToneMark (char32 ch)

static bool	IsThaiTonableVowel (char32 ch)

static bool	CmpPairSecond (const std::pair< int, int > &p1, const std::pair< int, int > &p2)

static void	ScanParentsForCaseMix (const UNICHARSET &unicharset, LanguageModelState *parent_node)

static bool	HasBetterCaseVariant (const UNICHARSET &unicharset, const BLOB_CHOICE choice, BLOB_CHOICE_LIST choices)

template<class BLOB_CHOICE >
int	SortByUnicharID (const void void1, const void void2)

template<class BLOB_CHOICE >
int	SortByRating (const void void1, const void void2)

Variables
const int	kMinRectSize = 10

const char	kTesseractReject = '~'

const char	kUNLVReject = '~'

const char	kUNLVSuspect = '^'

const char *	kInputFile = "noname.tif"

const char *	kOldVarsFile = "failed_vars.txt"

const int	kMaxIntSize = 22

const int	kNumbersPerBlob = 5

const int	kBytesPerNumber = 5

const int	kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

const int	kBytesPer64BitNumber = 20

const int	kMaxBytesPerLine

const int	kUniChs []

const int	kLatinChs []

static const int	kBasicBufSize = 2048

static const int	kCharWidth = 2

static const int	kMaxBytesPerCodepoint = 20

const float	kMathDigitDensityTh1 = 0.25

const float	kMathDigitDensityTh2 = 0.1

const float	kMathItalicDensityTh = 0.5

const float	kUnclearDensityTh = 0.25

const int	kSeedBlobsCountTh = 10

const int	kLeftIndentAlignmentCountTh = 1

const int	kMaxCharTopRange = 48

const float	kCertaintyScale = 7.0f

const float	kWorstDictCertainty = -25.0f

const int	kMaxCircleErosions = 8

const ParagraphModel *	kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F)

const ParagraphModel *	kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F)

const int16_t	kMaxBoxEdgeDiff = 2

const int	kBoxClipTolerance = 2

const int	kNumEndPoints = 3

const int	kMinPointsForErrorCount = 16

const int	kMaxRealDistance = 2.0

const int	kFeaturePadding = 2

const int	kImagePadding = 4

const int	kHistogramSize = 256

static const int	kMaxSmallWordUnichars = 3

static const int	kMaxMediumWordUnichars = 6

static const char *const	kParamsTrainingFeatureTypeName []

const int	kMaxAmbigStringSize = 30 * ( 10 + 1)

static const int	kUnigramAmbigsBufferSize = 1000

static const char	kAmbigNgramSeparator [] = { ' ', '\0' }

static const char	kAmbigDelimiters [] = "\t "

static const char	kIllegalMsg []

static const char	kIllegalUnicharMsg []

CCUtilMutex	tprintfMutex

static const char *const	kTessdataFileSuffixes []

static const int	kMaxNumTessdataEntries = 1000

const char *	kNullChar = "<nul>"

const int	kRadicalRadix = 29

const char *	kUTF8LineSeparator = "\u2028"

const char *	kUTF8ParagraphSeparator = "\u2029"

const char *	kLRM = "\u200E"

const char *	kRLM = "\u200F"

const char *	kRLE = "\u202A"

const char *	kPDF = "\u202C"

const char *	kHyphenLikeUTF8 []

const char *	kApostropheLikeUTF8 []

const char	kUniversalAmbigsFile []

const int	ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

const double	kRatingEpsilon = 1.0 / 32

const int	kMaxOffsetDist = 32

static const int	kNumOffsetMaps = 2

const int	kMinClusteredShapes = 1

const int	kMaxUnicharsPerCluster = 2000

const float	kFontMergeDistance = 0.025

const float	kInfiniteDist = 999.0f

const int	kRandomizingCenter = 128

static const int	kNumCNParams = 4

static const int	kSampleYShiftSize = 5

static const int	kSampleScaleSize = 3

static const int	kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2

const int	kTestChar = -1

const int	kSquareLimit = 25

const int	kPrime1 = 17

const int	kPrime2 = 13

static const int	kMinAbsoluteGarbageWordLength = 10

static const float	kMinAbsoluteGarbageAlphanumFrac = 0.5f

const int	case_state_table [6][4]

static const bool	kDawgSuccessors [DAWG_TYPE_COUNT][DAWG_TYPE_COUNT]

static const char	kWildcard [] = "*"

static const int	kRatingPad = 4

static const char	kDictWildcard [] = "\u2606"

static const int	kDictMaxWildcards = 2

static const char	kHyphenSymbol [] = "-"

static const char	kSlashSymbol [] = "/"

static const char	kQuestionSymbol [] = "?"

static const char	kApostropheSymbol [] = "'"

static const float	kSimCertaintyScale = -10.0

static const float	kSimCertaintyOffset = -10.0

static const float	kSimilarityFloor = 100.0

static const int	kDocDictMaxRepChars = 4

const char	kDoNotReverse [] = "RRP_DO_NO_REVERSE"

const char	kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"

const char	kForceReverse [] = "RRP_FORCE_REVERSE"

const char *const	RTLReversePolicyNames []

double	TanhTable [kTableSize]

double	LogisticTable [kTableSize]

const int	kTableSize = 4096

const double	kScaleFactor = 256.0

const int	kMaxInputHeight = 48

const double	kStateClip = 100.0

const double	kErrClip = 1.0f

const double	kDictRatio = 2.25

const double	kCertOffset = -0.085

const double	kMinDivergenceRate = 50.0

const int	kMinStallIterations = 10000

const double	kSubTrainerMarginFraction = 3.0 / 128

const double	kLearningRateDecay = sqrt(0.5)

const int	kNumAdjustmentIterations = 100

const int	kErrorGraphInterval = 1000

const int	kNumPagesPerBatch = 100

const int	kMinStartedErrorRate = 75

const double	kStageTransitionThreshold = 10.0

const double	kHighConfidence = 0.9375

const double	kImprovementFraction = 15.0 / 16.0

const double	kBestCheckpointFraction = 31.0 / 32.0

const int	kTargetXScale = 5

const int	kTargetYScale = 100

const int	kMinWinSize = 500

const int	kMaxWinSize = 2000

const int	kXWinFrameSize = 30

const int	kYWinFrameSize = 80

const float	kMinCertainty = -20.0f

const float	kMinProb = exp(kMinCertainty)

const char *	kNodeContNames [] = {"Anything", "OnlyDup", "NoDup"}

const int	kAdamCorrectionIterations = 200000

const double	kAdamEpsilon = 1e-8

const int	kInt8Flag = 1

const int	kAdamFlag = 4

const int	kDoubleFlag = 128

const int	kHistogramBuckets = 16

const double	kAlignedFraction = 0.03125

const double	kRaggedFraction = 2.5

const double	kAlignedGapFraction = 0.75

const double	kRaggedGapFraction = 1.0

const int	kVLineAlignment = 3

const int	kVLineGutter = 1

const int	kVLineSearchSize = 150

const int	kMinRaggedTabs = 5

const int	kMinAlignedTabs = 4

const int	kVLineMinLength = 500

const double	kMinTabGradient = 4.0

const int	kMaxSkewFactor = 15

const double	kMaxSmallNeighboursPerPix = 1.0 / 32

const int	kMaxLargeOverlapsWithSmall = 3

const int	kMaxMediumOverlapsWithSmall = 12

const int	kMaxLargeOverlapsWithMedium = 12

const int	kOriginalNoiseMultiple = 8

const int	kNoisePadding = 4

const double	kPhotoOffsetFraction = 0.375

const double	kMinGoodTextPARatio = 1.5

const int	kMaxIncompatibleColumnCount = 2

const double	kHorizontalGapMergeFraction = 0.5

const double	kMinGutterWidthGrid = 0.5

const double	kMaxDistToPartSizeRatio = 1.5

const double	kMaxSpacingDrift = 1.0 / 72

const double	kMaxTopSpacingFraction = 0.25

const double	kMaxSameBlockLineSpacing = 3

const double	kMaxSizeRatio = 1.5

const double	kMaxLeaderGapFractionOfMax = 0.25

const double	kMaxLeaderGapFractionOfMin = 0.5

const int	kMinLeaderCount = 5

const int	kMinStrongTextValue = 6

const int	kMinChainTextValue = 3

const int	kHorzStrongTextlineCount = 8

const int	kHorzStrongTextlineHeight = 10

const int	kHorzStrongTextlineAspect = 5

const double	kMaxBaselineError = 0.4375

const double	kMinBaselineCoverage = 0.5

const int	kMaxRMSColorNoise = 128

const int	kMaxColorDistance = 900

static char	kBlobTypes [BRT_COUNT+1] = "NHSRIUVT"

const int	kRGBRMSColors = 4

const int	kMaxPadFactor = 6

const int	kMaxNeighbourDistFactor = 4

const int	kMaxCaptionLines = 7

const double	kMinCaptionGapRatio = 2.0

const double	kMinCaptionGapHeightRatio = 0.5

const double	kMarginOverlapFraction = 0.25

const double	kBigPartSizeRatio = 1.75

const double	kTinyEnoughTextlineOverlapFraction = 0.25

const double	kMaxPartitionSpacing = 1.75

const int	kSmoothDecisionMargin = 4

const double	kMinColumnWidth = 2.0 / 3

const double	kMinRectangularFraction = 0.125

const double	kMaxRectangularFraction = 0.75

const double	kMaxRectangularGradient = 0.1

const int	kMinImageFindSize = 100

const double	kRMSFitScaling = 8.0

const int	kMinColorDifference = 16

const int	kThinLineFraction = 20
	Denominator of resolution makes max pixel width to allow thin lines. More...

const int	kMinLineLengthFraction = 4
	Denominator of resolution makes min pixels to demand line lengths to be. More...

const int	kCrackSpacing = 100
	Spacing of cracks across the page to break up tall vertical lines. More...

const int	kLineFindGridSize = 50
	Grid size used by line finder. Not very critical. More...

const int	kMinThickLineWidth = 12

const int	kMaxLineResidue = 6

const double	kThickLengthMultiple = 0.75

const double	kMaxNonLineDensity = 0.25

const double	kMaxStaveHeight = 1.0

const double	kMinMusicPixelFraction = 0.75

const double	kStrokeWidthFractionTolerance = 0.125

const double	kStrokeWidthTolerance = 1.5

const double	kStrokeWidthFractionCJK = 0.25

const double	kStrokeWidthCJK = 2.0

const int	kCJKRadius = 2

const double	kCJKBrokenDistanceFraction = 0.25

const int	kCJKMaxComponents = 8

const double	kCJKAspectRatio = 1.25

const double	kCJKAspectRatioIncrease = 1.0625

const int	kMaxCJKSizeRatio = 5

const double	kBrokenCJKIterationFraction = 0.125

const double	kDiacriticXPadRatio = 7.0

const double	kDiacriticYPadRatio = 1.75

const double	kMinDiacriticSizeRatio = 1.0625

const double	kMaxDiacriticDistanceRatio = 1.25

const double	kMaxDiacriticGapToBaseCharHeight = 1.0

const int	kLineTrapLongest = 4

const int	kLineTrapShortest = 2

const int	kMostlyOneDirRatio = 3

const double	kLineResidueAspectRatio = 8.0

const int	kLineResiduePadRatio = 3

const double	kLineResidueSizeRatio = 1.75

const float	kSizeRatioToReject = 2.0

const double	kNeighbourSearchFactor = 2.5

const double	kNoiseOverlapGrowthFactor = 4.0

const double	kNoiseOverlapAreaFactor = 1.0 / 512

const int	kTabRadiusFactor = 5

const int	kMinVerticalSearch = 3

const int	kMaxVerticalSearch = 12

const int	kMaxRaggedSearch = 25

const int	kMinLinesInColumn = 10

const double	kMinFractionalLinesInColumn = 0.125

const double	kMaxGutterWidthAbsolute = 2.00

const int	kRaggedGutterMultiple = 5

const double	kLineFragmentAspectRatio = 10.0

const int	kMinEvaluatedTabs = 3

const double	kCosMaxSkewAngle = 0.866025

const int	kColumnWidthFactor = 20

const int	kMaxVerticalSpacing = 500

const int	kMaxBlobWidth = 500

const double	kSplitPartitionSize = 2.0

const double	kAllowTextHeight = 0.5

const double	kAllowTextWidth = 0.6

const double	kAllowTextArea = 0.8

const double	kAllowBlobHeight = 0.3

const double	kAllowBlobWidth = 0.4

const double	kAllowBlobArea = 0.05

const int	kMinBoxesInTextPartition = 10

const int	kMaxBoxesInDataPartition = 20

const double	kMaxGapInTextPartition = 4.0

const double	kMinMaxGapInTextPartition = 0.5

const double	kMaxBlobOverlapFactor = 4.0

const double	kMaxTableCellXheight = 2.0

const int	kMaxColumnHeaderDistance = 4

const double	kTableColumnThreshold = 3.0

const int	kRulingVerticalMargin = 3

const double	kMinOverlapWithTable = 0.6

const int	kSideSpaceMargin = 10

const double	kSmallTableProjectionThreshold = 0.35

const double	kLargeTableProjectionThreshold = 0.45

const int	kLargeTableRowCount = 6

const int	kMinRowsInTable = 3

const int	kAdjacentLeaderSearchPadding = 2

const double	kParagraphEndingPreviousLineRatio = 1.3

const double	kMaxParagraphEndingLeftSpaceMultiple = 3.0

const double	kMinParagraphEndingTextToWhitespaceRatio = 3.0

const double	kMaxXProjectionGapFactor = 2.0

const double	kStrokeWidthFractionalTolerance = 0.25

const double	kStrokeWidthConstantTolerance = 2.0

const double	kHorizontalSpacing = 0.30

const double	kVerticalSpacing = -0.2

const int	kCellSplitRowThreshold = 0

const int	kCellSplitColumnThreshold = 0

const int	kLinedTableMinVerticalLines = 3

const int	kLinedTableMinHorizontalLines = 3

const double	kRequiredColumns = 0.7

const double	kMarginFactor = 1.1

const double	kMaxRowSize = 2.5

const double	kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 }

const int	kGoodRowNumberOfColumnsSmallSize

const double	kGoodRowNumberOfColumnsLarge = 0.7

const double	kMinFilledArea = 0.35

const int	kGutterMultiple = 4

const int	kGutterToNeighbourRatio = 3

const int	kSimilarVectorDist = 10

const int	kSimilarRaggedDist = 50

const int	kMaxFillinMultiple = 11

const double	kMinGutterFraction = 0.5

const double	kLineCountReciprocal = 4.0

const double	kMinAlignedGutter = 0.25

const double	kMinRaggedGutter = 1.5

const char *	kAlignmentNames []

const int	kMaxLineLength = 1024

const float	kRotationRange = 0.02f

const int	kExposureFactor = 16

const int	kSaltnPepper = 5

const int	kMinRampSize = 1000

const int	kMinLigature = 0xfb00

const int	kMaxLigature = 0xfb17

const int	kDefaultResolution = 300

static const int	kDefaultOutputResolution = 300

static const char *	kWordJoinerUTF8 = "\u2060"

static const char *const	XHeightConsistencyEnumName []

static const char *const	LMPainPointsTypeName []

static const float	kScoreScaleFactor = 100.0f

static const float	kMinFinalCost = 0.001f

static const float	kMaxFinalCost = 100.0f

Detailed Description

The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:

*   <UTF8 str> <left> <bottom> <right> <top> <page id>
*

and for word/line-level boxes:

*   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
*

NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.

<page id>=""> is 0-based, and the page number is used for multipage input (tiff).

In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)

In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.

* word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e
*

The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.

In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.

Make a word from the selected blobs and run Tess on them.

Parameters

page_res	recognise blobs
selection_box	within this box

fp_eval_word_spacing() Evaluation function for fixed pitch word lists.

Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars

build_menu()

Construct the menu tree used by the command window

process_cmd_win_event()

Process a command returned from the command window (Just call the appropriate command handler)

word_blank_and_set_display() Word processor

Blank display of word then redisplay word according to current display mode settings

Public Function Prototypes

Include Files and Type Defines

Typedef Documentation

◆ BlobGridSearch

using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>

◆ char32

using tesseract::char32 = typedef signed int

◆ CheckPointReader

typedef TessResultCallback2<bool, const GenericVector<char>&, LSTMTrainer*>* tesseract::CheckPointReader

◆ CheckPointWriter

typedef TessResultCallback3<bool, SerializeAmount, const LSTMTrainer*, GenericVector<char>*>* tesseract::CheckPointWriter

◆ ColPartitionGridSearch

using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>

◆ ColSegmentGrid

using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

◆ ColSegmentGridSearch

using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

◆ DawgVector

using tesseract::DawgVector = typedef GenericVector<Dawg *>

◆ DictFunc

typedef int(Dict::* tesseract::DictFunc) (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const

◆ FileReader

typedef bool(* tesseract::FileReader)(const STRING &filename, GenericVector< char > *data)

◆ FileWriter

typedef bool(* tesseract::FileWriter)(const GenericVector< char > &data, const STRING &filename)

◆ FillLatticeFunc

typedef void(Wordrec::* tesseract::FillLatticeFunc) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

◆ IntKDPair

using tesseract::IntKDPair = typedef KDPairInc<int, int>

◆ LanguageModelFlagsType

using tesseract::LanguageModelFlagsType = typedef unsigned char

Used for expressing various language model flags.

◆ LigHash

using tesseract::LigHash = typedef std::unordered_map<std::string, std::string, StringHash>

◆ NodeChildVector

using tesseract::NodeChildVector = typedef GenericVector<NodeChild>

◆ PainPointHeap

using tesseract::PainPointHeap = typedef GenericHeap<MatrixCoordPair>

◆ ParamsModelClassifyFunc

typedef float(Dict::* tesseract::ParamsModelClassifyFunc) (const char *lang, void *path)

◆ ParamsTrainingHypothesisList

using tesseract::ParamsTrainingHypothesisList = typedef GenericVector<ParamsTrainingHypothesis>

◆ PartSetVector

using tesseract::PartSetVector = typedef GenericVector<ColPartitionSet*>

◆ ProbabilityInContextFunc

typedef double(Dict::* tesseract::ProbabilityInContextFunc) (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)

◆ RecodeHeap

using tesseract::RecodeHeap = typedef GenericHeap<RecodePair>

◆ RecodePair

using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode>

◆ RSCounts

using tesseract::RSCounts = typedef std::unordered_map<int, int>

◆ RSMap

using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >>

◆ SetOfModels

using tesseract::SetOfModels = typedef GenericVectorEqEq<const ParagraphModel *>

◆ ShapeQueue

using tesseract::ShapeQueue = typedef GenericHeap<ShapeQueueEntry>

◆ SuccessorList

using tesseract::SuccessorList = typedef GenericVector<int>

◆ SuccessorListsVector

using tesseract::SuccessorListsVector = typedef GenericVector<SuccessorList *>

◆ TestCallback

typedef TessResultCallback4<STRING, int, const double*, const TessdataManager&, int>* tesseract::TestCallback

◆ TruthCallback

typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *> tesseract::TruthCallback

◆ UnicharAmbigsVector

using tesseract::UnicharAmbigsVector = typedef GenericVector<AmbigSpec_LIST *>

◆ UnicharIdVector

using tesseract::UnicharIdVector = typedef GenericVector<UNICHAR_ID>

◆ WidthCallback

using tesseract::WidthCallback = typedef TessResultCallback1<bool, int>

◆ WordGrid

using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

◆ WordRecognizer

typedef void(Tesseract::* tesseract::WordRecognizer) (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)

◆ WordSearch

using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Enumeration Type Documentation

◆ AmbigType

enum tesseract::AmbigType

Enumerator
NOT_AMBIG
REPLACE_AMBIG
DEFINITE_AMBIG
SIMILAR_AMBIG
CASE_AMBIG
AMBIG_TYPE_COUNT

◆ CachingStrategy

enum tesseract::CachingStrategy

Enumerator
CS_SEQUENTIAL
CS_ROUND_ROBIN

◆ CharSegmentationType

enum tesseract::CharSegmentationType

Enumerator
CST_FRAGMENT
CST_WHOLE
CST_IMPROPER
CST_NGRAM

◆ CMD_EVENTS

enum tesseract::CMD_EVENTS

Enumerator
ACTION_1_CMD_EVENT
RECOG_WERDS
RECOG_PSEUDO
ACTION_2_CMD_EVENT

◆ ColSegType

enum tesseract::ColSegType

Enumerator
COL_UNKNOWN
COL_TEXT
COL_TABLE
COL_MIXED
COL_COUNT

◆ ColumnSpanningType

enum tesseract::ColumnSpanningType

Enumerator
CST_NOISE
CST_FLOWING
CST_HEADING
CST_PULLOUT
CST_COUNT

◆ CountTypes

enum tesseract::CountTypes

Enumerator
CT_UNICHAR_TOP_OK
CT_UNICHAR_TOP1_ERR
CT_UNICHAR_TOP2_ERR
CT_UNICHAR_TOPN_ERR
CT_UNICHAR_TOPTOP_ERR
CT_OK_MULTI_UNICHAR
CT_OK_JOINED
CT_OK_BROKEN
CT_REJECT
CT_FONT_ATTR_ERR
CT_OK_MULTI_FONT
CT_NUM_RESULTS
CT_RANK
CT_REJECTED_JUNK
CT_ACCEPTED_JUNK
CT_SIZE

◆ DawgType

enum tesseract::DawgType

Enumerator
DAWG_TYPE_PUNCTUATION
DAWG_TYPE_WORD
DAWG_TYPE_NUMBER
DAWG_TYPE_PATTERN
DAWG_TYPE_COUNT

◆ ErrorTypes

enum tesseract::ErrorTypes

Enumerator
ET_RMS
ET_DELTA
ET_WORD_RECERR
ET_CHAR_ERROR
ET_SKIP_RATIO
ET_COUNT

◆ FactorNames

enum tesseract::FactorNames

Enumerator
FN_INCOLOR
FN_Y0
FN_Y1
FN_Y2
FN_Y3
FN_X0
FN_X1
FN_SHEAR
FN_NUM_FACTORS

◆ FlexDimensions

enum tesseract::FlexDimensions

Enumerator
FD_BATCH
FD_HEIGHT
FD_WIDTH
FD_DIMSIZE

◆ GraphemeNorm

enum tesseract::GraphemeNorm

strong

Enumerator
kNone
kNormalize

◆ GraphemeNormMode

enum tesseract::GraphemeNormMode

strong

Enumerator
kSingleString
kCombined
kGlyphSplit
kIndividualUnicodes

◆ kParamsTrainingFeatureType

enum tesseract::kParamsTrainingFeatureType

Enumerator
PTRAIN_DIGITS_SHORT
PTRAIN_DIGITS_MED
PTRAIN_DIGITS_LONG
PTRAIN_NUM_SHORT
PTRAIN_NUM_MED
PTRAIN_NUM_LONG
PTRAIN_DOC_SHORT
PTRAIN_DOC_MED
PTRAIN_DOC_LONG
PTRAIN_DICT_SHORT
PTRAIN_DICT_MED
PTRAIN_DICT_LONG
PTRAIN_FREQ_SHORT
PTRAIN_FREQ_MED
PTRAIN_FREQ_LONG
PTRAIN_SHAPE_COST_PER_CHAR
PTRAIN_NGRAM_COST_PER_CHAR
PTRAIN_NUM_BAD_PUNC
PTRAIN_NUM_BAD_CASE
PTRAIN_XHEIGHT_CONSISTENCY
PTRAIN_NUM_BAD_CHAR_TYPE
PTRAIN_NUM_BAD_SPACING
PTRAIN_NUM_BAD_FONT
PTRAIN_RATING_PER_CHAR
PTRAIN_NUM_FEATURE_TYPES

◆ LeftOrRight

enum tesseract::LeftOrRight

Enumerator
LR_LEFT
LR_RIGHT

◆ LineType

enum tesseract::LineType

Enumerator
LT_START
LT_BODY
LT_UNKNOWN
LT_MULTIPLE

◆ LMPainPointsType

enum tesseract::LMPainPointsType

Enumerator
LM_PPTYPE_BLAMER
LM_PPTYPE_AMBIG
LM_PPTYPE_PATH
LM_PPTYPE_SHAPE
LM_PPTYPE_NUM

◆ LossType

enum tesseract::LossType

Enumerator
LT_NONE
LT_CTC
LT_SOFTMAX
LT_LOGISTIC

◆ NeighbourPartitionType

enum tesseract::NeighbourPartitionType

Enumerator
NPT_HTEXT
NPT_VTEXT
NPT_WEAK_HTEXT
NPT_WEAK_VTEXT
NPT_IMAGE
NPT_COUNT

◆ NetworkFlags

enum tesseract::NetworkFlags

Enumerator
NF_LAYER_SPECIFIC_LR
NF_ADAM

◆ NetworkType

enum tesseract::NetworkType

Enumerator
NT_NONE
NT_INPUT
NT_CONVOLVE
NT_MAXPOOL
NT_PARALLEL
NT_REPLICATED
NT_PAR_RL_LSTM
NT_PAR_UD_LSTM
NT_PAR_2D_LSTM
NT_SERIES
NT_RECONFIG
NT_XREVERSED
NT_YREVERSED
NT_XYTRANSPOSE
NT_LSTM
NT_LSTM_SUMMARY
NT_LOGISTIC
NT_POSCLIP
NT_SYMCLIP
NT_TANH
NT_RELU
NT_LINEAR
NT_SOFTMAX
NT_SOFTMAX_NO_CTC
NT_LSTM_SOFTMAX
NT_LSTM_SOFTMAX_ENCODED
NT_TENSORFLOW
NT_COUNT

◆ NodeContinuation

enum tesseract::NodeContinuation

Enumerator
NC_ANYTHING
NC_ONLY_DUP
NC_NO_DUP
NC_COUNT

◆ NormalizationMode

enum tesseract::NormalizationMode

Enumerator
NM_BASELINE
NM_CHAR_ISOTROPIC
NM_CHAR_ANISOTROPIC

◆ OcrEngineMode

enum tesseract::OcrEngineMode

When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.

ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.

Enumerator
OEM_TESSERACT_ONLY
OEM_LSTM_ONLY
OEM_TESSERACT_LSTM_COMBINED
OEM_DEFAULT
OEM_COUNT

◆ OCRNorm

enum tesseract::OCRNorm

strong

Enumerator
kNone
kNormalize

◆ Orientation

enum tesseract::Orientation

+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto

If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.

In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).

The values of this enum match the convention of Tesseract's osdetect.h

Enumerator
ORIENTATION_PAGE_UP
ORIENTATION_PAGE_RIGHT
ORIENTATION_PAGE_DOWN
ORIENTATION_PAGE_LEFT

◆ PageIteratorLevel

enum tesseract::PageIteratorLevel

enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.

Enumerator
RIL_BLOCK
RIL_PARA
RIL_TEXTLINE
RIL_WORD
RIL_SYMBOL

◆ PageSegMode

enum tesseract::PageSegMode

Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.

Enumerator
PSM_OSD_ONLY	Orientation and script detection only.
PSM_AUTO_OSD	Automatic page segmentation with orientation and script detection. (OSD)
PSM_AUTO_ONLY	Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO	Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN	Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT	Assume a single uniform block of vertically aligned text.
PSM_SINGLE_BLOCK	Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE	Treat the image as a single text line.
PSM_SINGLE_WORD	Treat the image as a single word.
PSM_CIRCLE_WORD	Treat the image as a single word in a circle.
PSM_SINGLE_CHAR	Treat the image as a single character.
PSM_SPARSE_TEXT	Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD	Sparse text with orientation and script det.
PSM_RAW_LINE	Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
PSM_COUNT	Number of enum entries.

◆ ParagraphJustification

enum tesseract::ParagraphJustification

JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.

NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.

Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.

JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.

JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.

JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.

Enumerator
JUSTIFICATION_UNKNOWN
JUSTIFICATION_LEFT
JUSTIFICATION_CENTER
JUSTIFICATION_RIGHT

◆ PartitionFindResult

enum tesseract::PartitionFindResult

Enumerator
PFR_OK
PFR_SKEW
PFR_NOISE

◆ ScriptPos

enum tesseract::ScriptPos

Enumerator
SP_NORMAL
SP_SUBSCRIPT
SP_SUPERSCRIPT
SP_DROPCAP

◆ SerializeAmount

enum tesseract::SerializeAmount

Enumerator
LIGHT
NO_BEST_TRAINER
FULL

◆ SetParamConstraint

enum tesseract::SetParamConstraint

Enumerator
SET_PARAM_CONSTRAINT_NONE
SET_PARAM_CONSTRAINT_DEBUG_ONLY
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY
SET_PARAM_CONSTRAINT_NON_INIT_ONLY

◆ SubTrainerResult

enum tesseract::SubTrainerResult

Enumerator
STR_NONE
STR_UPDATED
STR_REPLACED

◆ TabAlignment

enum tesseract::TabAlignment

Enumerator
TA_LEFT_ALIGNED
TA_LEFT_RAGGED
TA_CENTER_JUSTIFIED
TA_RIGHT_ALIGNED
TA_RIGHT_RAGGED
TA_SEPARATOR
TA_COUNT

◆ TessdataType

enum tesseract::TessdataType

Enumerator
TESSDATA_LANG_CONFIG
TESSDATA_UNICHARSET
TESSDATA_AMBIGS
TESSDATA_INTTEMP
TESSDATA_PFFMTABLE
TESSDATA_NORMPROTO
TESSDATA_PUNC_DAWG
TESSDATA_SYSTEM_DAWG
TESSDATA_NUMBER_DAWG
TESSDATA_FREQ_DAWG
TESSDATA_FIXED_LENGTH_DAWGS
TESSDATA_CUBE_UNICHARSET
TESSDATA_CUBE_SYSTEM_DAWG
TESSDATA_SHAPE_TABLE
TESSDATA_BIGRAM_DAWG
TESSDATA_UNAMBIG_DAWG
TESSDATA_PARAMS_MODEL
TESSDATA_LSTM
TESSDATA_LSTM_PUNC_DAWG
TESSDATA_LSTM_SYSTEM_DAWG
TESSDATA_LSTM_NUMBER_DAWG
TESSDATA_LSTM_UNICHARSET
TESSDATA_LSTM_RECODER
TESSDATA_VERSION
TESSDATA_NUM_ENTRIES

◆ TextlineOrder

enum tesseract::TextlineOrder

The text lines are read in the given sequence.

In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.

Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM

Enumerator
TEXTLINE_ORDER_LEFT_TO_RIGHT
TEXTLINE_ORDER_RIGHT_TO_LEFT
TEXTLINE_ORDER_TOP_TO_BOTTOM

◆ TopNState

enum tesseract::TopNState

Enumerator
TN_TOP2
TN_TOPN
TN_ALSO_RAN
TN_COUNT

◆ Trainability

enum tesseract::Trainability

Enumerator
TRAINABLE
PERFECT
UNENCODABLE
HI_PRECISION_ERR
NOT_BOXED

◆ TrainingFlags

enum tesseract::TrainingFlags

Enumerator
TF_INT_MODE
TF_COMPRESS_UNICHARSET

◆ TrainingState

enum tesseract::TrainingState

Enumerator
TS_DISABLED
TS_ENABLED
TS_TEMP_DISABLE
TS_RE_ENABLE

◆ UnicodeNormMode

enum tesseract::UnicodeNormMode

strong

Enumerator
kNFD
kNFC
kNFKD
kNFKC

◆ ViramaScript

enum tesseract::ViramaScript : char32

strong

Enumerator
kNonVirama
kDevanagari
kBengali
kGurmukhi
kGujarati
kOriya
kTamil
kTelugu
kKannada
kMalayalam
kSinhala
kMyanmar
kKhmer
kJavanese

◆ WritingDirection

enum tesseract::WritingDirection

The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".

For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.

Enumerator
WRITING_DIRECTION_LEFT_TO_RIGHT
WRITING_DIRECTION_RIGHT_TO_LEFT
WRITING_DIRECTION_TOP_TO_BOTTOM

◆ XHeightConsistencyEnum

enum tesseract::XHeightConsistencyEnum

Enumerator
XH_GOOD
XH_SUBNORMAL
XH_INCONSISTENT

Function Documentation

◆ AcceptableCJKMerge()

static bool tesseract::AcceptableCJKMerge	(	const TBOX &	bbox,
		const TBOX &	nbox,
		bool	debug,
		int	max_size,
		int	max_dist,
		int *	x_gap,
		int *	y_gap
	)

static

◆ AcceptableRowArgs()

static bool tesseract::AcceptableRowArgs	(	int	debug_level,
		int	min_num_rows,
		const char *	function_name,
		const GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end
	)

static

◆ AccumulateVector()

void tesseract::AccumulateVector	(	int	n,
		const double *	src,
		double *	dest
	)

inline

◆ add_space()

static void tesseract::add_space ( TESS_CHAR_IT * it )

static

◆ AddAllScriptsConverted()

static void tesseract::AddAllScriptsConverted	(	const UNICHARSET &	sid_set,
		const UNICHARSET &	osd_set,
		GenericVector< int > *	allowed_ids
	)

static

◆ addAvailableLanguages()

static void tesseract::addAvailableLanguages	(	const STRING &	datadir,
		const STRING &	base,
		GenericVector< STRING > *	langs
	)

static

◆ AddBaselineCoordsTohOCR()

static void tesseract::AddBaselineCoordsTohOCR	(	const PageIterator *	it,
		PageIteratorLevel	level,
		STRING *	hocr_str
	)

static

Fits a line to the baseline at the given level, and appends its coefficients to the hOCR string. NOTE: The hOCR spec is unclear on how to specify baseline coefficients for rotated textlines. For this reason, on textlines that are not upright, this method currently only inserts a 'textangle' property to indicate the rotation direction and does not add any baseline information to the hocr string.

◆ AddBoxTohOCR()

static void tesseract::AddBoxTohOCR	(	const ResultIterator *	it,
		PageIteratorLevel	level,
		STRING *	hocr_str
	)

static

◆ AddBoxToTSV()

static void tesseract::AddBoxToTSV	(	const PageIterator *	it,
		PageIteratorLevel	level,
		STRING *	hocr_str
	)

static

◆ AddIdTohOCR() [1/2]

static void tesseract::AddIdTohOCR	(	STRING *	hocr_str,
		const std::string	base,
		int	num1,
		int	num2
	)

static

◆ AddIdTohOCR() [2/2]

static void tesseract::AddIdTohOCR	(	STRING *	hocr_str,
		const std::string	base,
		int	num1,
		int	num2,
		int	num3
	)

static

◆ AddNearFeatures()

static void tesseract::AddNearFeatures	(	const IntFeatureMap &	feature_map,
		int	f,
		int	levels,
		GenericVector< int > *	good_features
	)

static

◆ AddStringsToUnicharset()

static void tesseract::AddStringsToUnicharset	(	const GenericVector< STRING > &	strings,
		int	norm_mode,
		UNICHARSET *	unicharset
	)

static

◆ AffineMatrix()

static void tesseract::AffineMatrix	(	int	writing_direction,
		int	line_x1,
		int	line_y1,
		int	line_x2,
		int	line_y2,
		double *	a,
		double *	b,
		double *	c,
		double *	d
	)

static

◆ AmbigSpec_zapper()

void tesseract::AmbigSpec_zapper ( ELIST_LINK * link )

◆ AsciiLikelyListItem()

bool tesseract::AsciiLikelyListItem ( const STRING & word )

◆ assign_blobs_to_blocks2()

void tesseract::assign_blobs_to_blocks2	(	Pix *	pix,
		BLOCK_LIST *	blocks,
		TO_BLOCK_LIST *	port_blocks
	)

◆ AssignIds()

static void tesseract::AssignIds	(	const UnicityTable< FontInfo > &	all_fonts,
		UnicityTable< FontInfo > *	lang_fonts
	)

static

◆ AtLeast2LineCrossings()

static bool tesseract::AtLeast2LineCrossings ( BLOBNBOX_CLIST * blobs )

static

◆ AttemptBoxExpansion()

static TBOX tesseract::AttemptBoxExpansion	(	const TBOX &	box,
		const IntGrid &	noise_density,
		int	pad
	)

static

◆ AttemptToShrinkBox()

static void tesseract::AttemptToShrinkBox	(	const FCOORD &	rotation,
		const FCOORD &	rerotation,
		const TBOX &	im_box,
		Pix *	pix,
		TBOX *	slice
	)

static

◆ BestLabel()

static int tesseract::BestLabel	(	const GENERIC_2D_ARRAY< float > &	outputs,
		int	t
	)

static

◆ BlobToTrainingSample()

TrainingSample * tesseract::BlobToTrainingSample	(	const TBLOB &	blob,
		bool	nonlinear_norm,
		INT_FX_RESULT_STRUCT *	fx_info,
		GenericVector< INT_FEATURE_STRUCT > *	bl_features
	)

◆ BOOL_VAR() [1/13]

tesseract::BOOL_VAR	(	textord_tabfind_show_color_fit	,
		false	,
		"Show stroke widths"
	)

◆ BOOL_VAR() [2/13]

tesseract::BOOL_VAR	(	textord_tabfind_only_strokewidths	,
		false	,
		"Only run stroke widths"
	)

◆ BOOL_VAR() [3/13]

tesseract::BOOL_VAR	(	textord_tabfind_show_initial_partitions	,
		false	,
		"Show partition bounds"
	)

◆ BOOL_VAR() [4/13]

tesseract::BOOL_VAR	(	textord_tabfind_show_reject_blobs	,
		false	,
		"Show blobs rejected as noise"
	)

◆ BOOL_VAR() [5/13]

tesseract::BOOL_VAR	(	textord_tabfind_show_initialtabs	,
		false	,
		"Show tab candidates"
	)

◆ BOOL_VAR() [6/13]

tesseract::BOOL_VAR	(	textord_tabfind_show_columns	,
		false	,
		"Show column bounds"
	)

◆ BOOL_VAR() [7/13]

tesseract::BOOL_VAR	(	textord_tabfind_show_finaltabs	,
		false	,
		"Show tab vectors"
	)

◆ BOOL_VAR() [8/13]

tesseract::BOOL_VAR	(	textord_tabfind_show_blocks	,
		false	,
		"Show final block bounds"
	)

◆ BOOL_VAR() [9/13]

tesseract::BOOL_VAR	(	textord_tabfind_find_tables	,
		true	,
		"run table detection"
	)

◆ BOOL_VAR() [10/13]

tesseract::BOOL_VAR	(	textord_show_tables	,
		false	,
		"Show table regions"
	)

◆ BOOL_VAR() [11/13]

tesseract::BOOL_VAR	(	textord_tablefind_show_mark	,
		false	,
		"Debug table marking steps in detail"
	)

◆ BOOL_VAR() [12/13]

tesseract::BOOL_VAR	(	textord_tablefind_show_stats	,
		false	,
		"Show page stats used in table finding"
	)

◆ BOOL_VAR() [13/13]

tesseract::BOOL_VAR	(	textord_tablefind_recognize_tables	,
		false	,
		"Enables the table recognizer for table layout and filtering."
	)

◆ BOOL_VAR_H()

tesseract::BOOL_VAR_H	(	textord_tabfind_find_tables	,
		false	,
		"run table detection"
	)

◆ BoolFlagExists()

static bool tesseract::BoolFlagExists	(	const char *	flag_name,
		bool *	value
	)

static

◆ BoundsWithinBox()

static TBOX tesseract::BoundsWithinBox	(	Pix *	pix,
		const TBOX &	box
	)

static

◆ BoxFromHLine()

static TBOX tesseract::BoxFromHLine ( const TabVector * hline )

static

◆ BoxMissMetric()

static double tesseract::BoxMissMetric	(	const TBOX &	box1,
		const TBOX &	box2
	)

static

Helper to compute the dispute resolution metric. Disputed blob resolution. The aim is to give the blob to the most appropriate boxfile box. Most of the time it is obvious, but if two boxfile boxes overlap significantly it is not. If a small boxfile box takes most of the blob, and a large boxfile box does too, then we want the small boxfile box to get it, but if the small box is much smaller than the blob, we don't want it to get it. Details of the disputed blob resolution: Given a box with area A, and a blob with area B, with overlap area C, then the miss metric is (A-C)(B-C)/(AB) and the box with minimum miss metric gets the blob.

◆ BuildFullyConnected()

static Network* tesseract::BuildFullyConnected	(	const StaticShape &	input_shape,
		NetworkType	type,
		const STRING &	name,
		int	depth
	)

static

◆ c_blob_comparator()

static int tesseract::c_blob_comparator	(	const void *	blob1p,
		const void *	blob2p
	)

static

◆ CairoARGB32ToPixFormat()

static Pix* tesseract::CairoARGB32ToPixFormat ( cairo_surface_t * surface )

static

◆ CalculateTabStops()

static void tesseract::CalculateTabStops	(	GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end,
		int	tolerance,
		GenericVector< Cluster > *	left_tabs,
		GenericVector< Cluster > *	right_tabs
	)

static

◆ CallWithUTF8()

static void tesseract::CallWithUTF8	(	TessCallback1< const char >	cb,
		const WERD_CHOICE *	wc
	)

static

◆ CanonicalizeDetectionResults()

void tesseract::CanonicalizeDetectionResults	(	GenericVector< PARA >	row_owners,
		PARA_LIST *	paragraphs
	)

◆ ceil_log2()

static uint32_t tesseract::ceil_log2 ( uint32_t n )

inlinestatic

◆ CharCoverageMapToBitmap()

static void tesseract::CharCoverageMapToBitmap	(	PangoCoverage *	coverage,
		std::vector< bool > *	unichar_bitmap
	)

static

◆ CheckSharedLibraryVersion()

static void tesseract::CheckSharedLibraryVersion ( )

inlinestatic

◆ clear_any_old_text()

static void tesseract::clear_any_old_text ( BLOCK_LIST * block_list )

static

◆ ClearFeatureSpaceWindow()

void tesseract::ClearFeatureSpaceWindow	(	NORM_METHOD	norm_method,
		ScrollView *	window
	)

Clears the given window and draws the featurespace guides for the appropriate normalization method.

◆ ClipBaseline()

static void tesseract::ClipBaseline	(	int	ppi,
		int	x1,
		int	y1,
		int	x2,
		int	y2,
		int *	line_x1,
		int *	line_y1,
		int *	line_x2,
		int *	line_y2
	)

static

◆ ClipCoord()

static void tesseract::ClipCoord	(	const ICOORD &	bleft,
		const ICOORD &	tright,
		ICOORD *	pos
	)

static

◆ ClipVector()

template<typename T >

void tesseract::ClipVector	(	int	n,
		T	lower,
		T	upper,
		T *	vec
	)

inline

◆ ClosestCluster()

static int tesseract::ClosestCluster	(	const GenericVector< Cluster > &	clusters,
		int	value
	)

static

◆ cmp_eq()

template<typename T >

bool tesseract::cmp_eq	(	T const &	t1,
		T const &	t2
	)

◆ CmpPairSecond()

static bool tesseract::CmpPairSecond	(	const std::pair< int, int > &	p1,
		const std::pair< int, int > &	p2
	)

static

◆ CodeInBinary()

void tesseract::CodeInBinary	(	int	n,
		int	nf,
		double *	vec
	)

inline

◆ CodepointToUtf16be()

static bool tesseract::CodepointToUtf16be	(	int	code,
		char	utf16[kMaxBytesPerCodepoint]
	)

static

◆ CollectFonts()

static void tesseract::CollectFonts	(	const UnicityTable< FontInfo > &	new_fonts,
		UnicityTable< FontInfo > *	all_fonts
	)

static

◆ CollectHorizVertBlobs()

static void tesseract::CollectHorizVertBlobs	(	BLOBNBOX_LIST *	input_blobs,
		int *	num_vertical_blobs,
		int *	num_horizontal_blobs,
		BLOBNBOX_CLIST *	vertical_blobs,
		BLOBNBOX_CLIST *	horizontal_blobs,
		BLOBNBOX_CLIST *	nondescript_blobs
	)

static

◆ CombineLangModel()

int tesseract::CombineLangModel	(	const UNICHARSET &	unicharset,
		const std::string &	script_dir,
		const std::string &	version_str,
		const std::string &	output_dir,
		const std::string &	lang,
		bool	pass_through_recoder,
		const GenericVector< STRING > &	words,
		const GenericVector< STRING > &	puncs,
		const GenericVector< STRING > &	numbers,
		bool	lang_is_rtl,
		FileReader	reader,
		FileWriter	writer
	)

◆ CompareFontInfo()

bool tesseract::CompareFontInfo	(	const FontInfo &	fi1,
		const FontInfo &	fi2
	)

◆ CompareFontSet()

bool tesseract::CompareFontSet	(	const FontSet &	fs1,
		const FontSet &	fs2
	)

◆ CompareSTRING()

static int tesseract::CompareSTRING	(	const void *	p1,
		const void *	p2
	)

static

◆ ComputeBlackWhite()

static void tesseract::ComputeBlackWhite	(	Pix *	pix,
		float *	black,
		float *	white
	)

static

◆ ComputeFeatures()

static int tesseract::ComputeFeatures	(	const FCOORD &	start_pt,
		const FCOORD &	end_pt,
		double	feature_length,
		GenericVector< INT_FEATURE_STRUCT > *	features
	)

static

◆ ComputeSearchBoxAndScaling()

static void tesseract::ComputeSearchBoxAndScaling	(	BlobNeighbourDir	direction,
		const TBOX &	part_box,
		int	min_padding,
		TBOX *	search_box,
		ICOORD *	dist_scaling
	)

static

◆ ConvertHypothesizedModelRunsToParagraphs()

static void tesseract::ConvertHypothesizedModelRunsToParagraphs	(	int	debug_level,
		const GenericVector< RowScratchRegisters > &	rows,
		GenericVector< PARA >	row_owners,
		ParagraphTheory *	theory
	)

static

◆ CopyVector()

void tesseract::CopyVector	(	int	n,
		const double *	src,
		double *	dest
	)

inline

◆ CorrectRepcharChoices()

static void tesseract::CorrectRepcharChoices	(	BLOB_CHOICE *	blob_choice,
		WERD_RES *	word_res
	)

static

◆ CountNeighbourGaps()

static void tesseract::CountNeighbourGaps	(	bool	debug,
		BLOBNBOX_CLIST *	neighbours,
		int *	pure_h_count,
		int *	pure_v_count
	)

static

◆ CountNeighbourTypes()

static void tesseract::CountNeighbourTypes	(	BLOBNBOX_CLIST *	neighbours,
		int *	pure_h_count,
		int *	pure_v_count
	)

static

◆ countof()

template<typename T , size_t N>

constexpr size_t tesseract::countof ( T const(&)[N] )

noexcept

◆ CountPixelsAdjacentToLine()

static int tesseract::CountPixelsAdjacentToLine	(	int	line_width,
		Box *	line_box,
		Pix *	nonline_pix
	)

static

◆ CrownCompatible()

bool tesseract::CrownCompatible	(	const GenericVector< RowScratchRegisters > *	rows,
		int	a,
		int	b,
		const ParagraphModel *	model
	)

◆ CutChunkFromParts()

static void tesseract::CutChunkFromParts	(	const TBOX &	box,
		const TBOX &	im_box,
		const FCOORD &	rotation,
		const FCOORD &	rerotation,
		Pix *	pix,
		ColPartition_LIST *	part_list
	)

static

◆ DebugDump()

static void tesseract::DebugDump	(	bool	should_print,
		const STRING &	phase,
		const ParagraphTheory &	theory,
		const GenericVector< RowScratchRegisters > &	rows
	)

static

◆ DecodeRadicalLine()

static bool tesseract::DecodeRadicalLine	(	STRING *	radical_data_line,
		RSMap *	radical_map
	)

static

◆ DecodeRadicalTable()

static bool tesseract::DecodeRadicalTable	(	STRING *	radical_data,
		RSMap *	radical_map
	)

static

◆ DegradeImage()

struct Pix * tesseract::DegradeImage	(	Pix *	input,
		int	exposure,
		TRand *	randomizer,
		float *	rotation
	)

◆ DeleteObject()

template<typename T >

void tesseract::DeleteObject ( T * object )

◆ DeletePartition()

static void tesseract::DeletePartition ( ColPartition * part )

static

◆ DeleteSmallImages()

static void tesseract::DeleteSmallImages ( ColPartitionGrid * part_grid )

static

◆ DeSerialize() [1/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		char *	data,
		size_t	n
	)

◆ DeSerialize() [2/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		float *	data,
		size_t	n
	)

◆ DeSerialize() [3/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		int8_t *	data,
		size_t	n
	)

◆ DeSerialize() [4/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		int16_t *	data,
		size_t	n
	)

◆ DeSerialize() [5/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		int32_t *	data,
		size_t	n
	)

◆ DeSerialize() [6/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		uint8_t *	data,
		size_t	n
	)

◆ DeSerialize() [7/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		uint16_t *	data,
		size_t	n
	)

◆ DeSerialize() [8/8]

bool tesseract::DeSerialize	(	FILE *	fp,
		uint32_t *	data,
		size_t	n
	)

◆ DetectParagraphs() [1/2]

void tesseract::DetectParagraphs	(	int	debug_level,
		GenericVector< RowInfo > *	row_infos,
		GenericVector< PARA >	row_owners,
		PARA_LIST *	paragraphs,
		GenericVector< ParagraphModel >	models
	)

◆ DetectParagraphs() [2/2]

void tesseract::DetectParagraphs	(	int	debug_level,
		bool	after_text_recognition,
		const MutableIterator *	block_start,
		GenericVector< ParagraphModel >	models
	)

◆ DiscardUnusedModels()

static void tesseract::DiscardUnusedModels	(	const GenericVector< RowScratchRegisters > &	rows,
		ParagraphTheory *	theory
	)

static

◆ DisplayBoxVector()

static void tesseract::DisplayBoxVector	(	const GenericVector< BLOBNBOX *> &	boxes,
		ScrollView *	win
	)

static

◆ dist2()

static long tesseract::dist2	(	int	x1,
		int	y1,
		int	x2,
		int	y2
	)

static

◆ DivideImageIntoParts()

static void tesseract::DivideImageIntoParts	(	const TBOX &	im_box,
		const FCOORD &	rotation,
		const FCOORD &	rerotation,
		Pix *	pix,
		ColPartitionGridSearch *	rectsearch,
		ColPartition_LIST *	part_list
	)

static

◆ DotProductAVX()

double tesseract::DotProductAVX	(	const double *	u,
		const double *	v,
		int	n
	)

◆ DotProductSSE()

double tesseract::DotProductSSE	(	const double *	u,
		const double *	v,
		int	n
	)

◆ double_VAR() [1/2]

tesseract::double_VAR	(	textord_tabvector_vertical_gap_fraction	,
		0.	5,
		"max fraction of mean blob width allowed for vertical gaps in vertical text"
	)

◆ double_VAR() [2/2]

tesseract::double_VAR	(	textord_tabvector_vertical_box_ratio	,
		0.	5,
		"Fraction of box matches required to declare a line vertical"
	)

◆ double_VAR_H() [1/2]

tesseract::double_VAR_H	(	textord_tabvector_vertical_gap_fraction	,
		0.	5,
		"Max fraction of mean blob width allowed for vertical gaps in vertical text"
	)

◆ double_VAR_H() [2/2]

tesseract::double_VAR_H	(	textord_tabvector_vertical_box_ratio	,
		0.	5,
		"Fraction of box matches required to declare a line vertical"
	)

◆ DoubleFlagExists()

static bool tesseract::DoubleFlagExists	(	const char *	flag_name,
		double *	value
	)

static

◆ DowngradeWeakestToCrowns()

static void tesseract::DowngradeWeakestToCrowns	(	int	debug_level,
		ParagraphTheory *	theory,
		GenericVector< RowScratchRegisters > *	rows
	)

static

◆ DrawDiacriticJoiner()

static void tesseract::DrawDiacriticJoiner	(	const BLOBNBOX *	blob,
		ScrollView *	window
	)

static

◆ EliminateWeakParts()

static void tesseract::EliminateWeakParts	(	const TBOX &	im_box,
		ColPartitionGrid *	part_grid,
		ColPartition_LIST *	big_parts,
		ColPartition_LIST *	part_list
	)

static

◆ EncodeAsUTF8() [1/2]

static std::string tesseract::EncodeAsUTF8 ( const char32 ch32 )

static

◆ EncodeAsUTF8() [2/2]

static std::string tesseract::EncodeAsUTF8 ( const char32 ch32 )

static

◆ Epsilon()

static int tesseract::Epsilon ( int space_pix )

static

◆ EvaluateWordSpan()

static void tesseract::EvaluateWordSpan	(	const PointerVector< WERD_RES > &	words,
		int	first_index,
		int	end_index,
		float *	rating,
		float *	certainty,
		bool *	bad,
		bool *	valid_permuter
	)

static

◆ ExpandImageBottom()

static int tesseract::ExpandImageBottom	(	const TBOX &	box,
		int	bottom_limit,
		ColPartitionGrid *	part_grid
	)

static

◆ ExpandImageDir()

static int tesseract::ExpandImageDir	(	BlobNeighbourDir	dir,
		const TBOX &	im_box,
		const TBOX &	limit_box,
		ColPartitionGrid *	part_grid,
		TBOX *	expanded_box
	)

static

◆ ExpandImageIntoParts()

static bool tesseract::ExpandImageIntoParts	(	const TBOX &	max_image_box,
		ColPartitionGridSearch *	rectsearch,
		ColPartitionGrid *	part_grid,
		ColPartition **	part_ptr
	)

static

◆ ExpandImageLeft()

static int tesseract::ExpandImageLeft	(	const TBOX &	box,
		int	left_limit,
		ColPartitionGrid *	part_grid
	)

static

◆ ExpandImageRight()

static int tesseract::ExpandImageRight	(	const TBOX &	box,
		int	right_limit,
		ColPartitionGrid *	part_grid
	)

static

◆ ExpandImageTop()

static int tesseract::ExpandImageTop	(	const TBOX &	box,
		int	top_limit,
		ColPartitionGrid *	part_grid
	)

static

◆ extract_result()

static void tesseract::extract_result	(	TESS_CHAR_IT *	out,
		PAGE_RES *	page_res
	)

static

Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.

◆ ExtractFeaturesFromRun()

static void tesseract::ExtractFeaturesFromRun	(	const EDGEPT *	startpt,
		const EDGEPT *	lastpt,
		const DENORM &	denorm,
		double	feature_length,
		bool	force_poly,
		GenericVector< INT_FEATURE_STRUCT > *	features
	)

static

◆ ExtractFontName()

void tesseract::ExtractFontName	(	const STRING &	filename,
		STRING *	fontname
	)

Public Code

◆ ExtractFontProperties()

static void tesseract::ExtractFontProperties	(	const std::string &	utf8_text,
		StringRenderer *	render,
		const std::string &	output_base
	)

static

◆ FilterFalsePositives()

static int tesseract::FilterFalsePositives	(	int	resolution,
		Pix *	nonline_pix,
		Pix *	intersection_pix,
		Pix *	line_pix
	)

static

◆ FilterMusic()

static Pix* tesseract::FilterMusic	(	int	resolution,
		Pix *	pix_closed,
		Pix *	pix_vline,
		Pix *	pix_hline,
		l_int32 *	v_empty,
		l_int32 *	h_empty
	)

static

◆ find_modal_font()

static void tesseract::find_modal_font	(	STATS *	fonts,
		int16_t *	font_out,
		int8_t *	font_count
	)

static

find_modal_font

Find the modal font and remove from the stats.

◆ FindBestMatchingChoice()

static BLOB_CHOICE* tesseract::FindBestMatchingChoice	(	UNICHAR_ID	char_id,
		WERD_RES *	word_res
	)

static

◆ FirstWordWouldHaveFit() [1/2]

bool tesseract::FirstWordWouldHaveFit	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after,
		tesseract::ParagraphJustification	justification
	)

◆ FirstWordWouldHaveFit() [2/2]

bool tesseract::FirstWordWouldHaveFit	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after
	)

◆ FontInfoDeleteCallback()

void tesseract::FontInfoDeleteCallback ( FontInfo f )

◆ FontSetDeleteCallback()

void tesseract::FontSetDeleteCallback ( FontSet fs )

◆ FullwidthToHalfwidth()

char32 tesseract::FullwidthToHalfwidth ( const char32 ch )

◆ FuncInplace()

template<class Func >

void tesseract::FuncInplace	(	int	n,
		double *	inout
	)

inline

◆ FuncMultiply()

template<class Func >

void tesseract::FuncMultiply	(	const double *	u,
		const double *	v,
		int	n,
		double *	out
	)

inline

◆ GatherPoints()

static int tesseract::GatherPoints	(	const C_OUTLINE *	outline,
		double	feature_length,
		const DENORM &	denorm,
		const DENORM *	root_denorm,
		int	start_index,
		int	end_index,
		ICOORD *	pos,
		FCOORD *	pos_normed,
		LLSQ *	points,
		LLSQ *	dirs
	)

static

◆ GeneratePerspectiveDistortion()

void tesseract::GeneratePerspectiveDistortion	(	int	width,
		int	height,
		TRand *	randomizer,
		Pix **	pix,
		GenericVector< TBOX > *	boxes
	)

◆ GeometricClassify()

static void tesseract::GeometricClassify	(	int	debug_level,
		GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end,
		ParagraphTheory *	theory
	)

static

◆ GeometricClassifyThreeTabStopTextBlock()

static void tesseract::GeometricClassifyThreeTabStopTextBlock	(	int	debug_level,
		GeometricClassifierState &	s,
		ParagraphTheory *	theory
	)

static

◆ GetBlockTextOrientation()

static tesseract::Orientation tesseract::GetBlockTextOrientation ( const PageIterator * it )

static

Gets the block orientation at the current iterator position.

◆ GetWordBaseline()

static void tesseract::GetWordBaseline	(	int	writing_direction,
		int	ppi,
		int	height,
		int	word_x1,
		int	word_y1,
		int	word_x2,
		int	word_y2,
		int	line_x1,
		int	line_y1,
		int	line_x2,
		int	line_y2,
		double *	x0,
		double *	y0,
		double *	length
	)

static

◆ GetXheightString()

std::string tesseract::GetXheightString	(	const std::string &	script_dir,
		const UNICHARSET &	unicharset
	)

◆ GridReducedPix()

static Pix* tesseract::GridReducedPix	(	const TBOX &	box,
		int	gridsize,
		ICOORD	bleft,
		int *	left,
		int *	bottom
	)

static

◆ HasBetterCaseVariant()

static bool tesseract::HasBetterCaseVariant	(	const UNICHARSET &	unicharset,
		const BLOB_CHOICE *	choice,
		BLOB_CHOICE_LIST *	choices
	)

static

Helper returns true if the given choice has a better case variant before it in the choice_list that is not distinguishable by size.

◆ HistogramRect()

void tesseract::HistogramRect	(	Pix *	src_pix,
		int	channel,
		int	left,
		int	top,
		int	width,
		int	height,
		int *	histogram
	)

◆ HistogramWeight()

static void tesseract::HistogramWeight	(	double	weight,
		STATS *	histogram
	)

static

◆ HOcrEscape()

STRING tesseract::HOcrEscape ( const char * text )

Escape a char string - remove <>&"' with HTML codes.

Escape a char string - remove &<>"' with HTML codes.

◆ HScanForEdge()

static bool tesseract::HScanForEdge	(	uint32_t *	data,
		int	wpl,
		int	x_start,
		int	x_end,
		int	min_count,
		int	mid_width,
		int	max_count,
		int	y_end,
		int	y_step,
		int *	y_start
	)

static

◆ IncreaseInOverlap()

static int tesseract::IncreaseInOverlap	(	const ColPartition *	merge1,
		const ColPartition *	merge2,
		int	ok_overlap,
		ColPartition_CLIST *	parts
	)

static

◆ InitializeRowInfo()

static void tesseract::InitializeRowInfo	(	bool	after_recognition,
		const MutableIterator &	it,
		RowInfo *	info
	)

static

◆ InitializeTextAndBoxesPreRecognition()

static void tesseract::InitializeTextAndBoxesPreRecognition	(	const MutableIterator &	it,
		RowInfo *	info
	)

static

◆ INT_VAR() [1/2]

tesseract::INT_VAR	(	textord_tabfind_show_strokewidths	,
		0	,
		"Show stroke widths"
	)

◆ INT_VAR() [2/2]

tesseract::INT_VAR	(	textord_tabfind_show_partitions	,
		0	,
		"Show partition	bounds,
		waiting	if,
		1"
	)

◆ IntDotProductSSE()

int32_t tesseract::IntDotProductSSE	(	const int8_t *	u,
		const int8_t *	v,
		int	n
	)

◆ InternalParagraphModelByOutline()

static ParagraphModel tesseract::InternalParagraphModelByOutline	(	const GenericVector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		int	tolerance,
		bool *	consistent
	)

static

◆ IntersectArea()

static int tesseract::IntersectArea	(	const TBOX &	box,
		ColPartition_LIST *	part_list
	)

static

◆ InterwordSpace()

int tesseract::InterwordSpace	(	const GenericVector< RowScratchRegisters > &	rows,
		int	row_start,
		int	row_end
	)

◆ IntFlagExists()

static bool tesseract::IntFlagExists	(	const char *	flag_name,
		int32_t *	value
	)

static

◆ is_double_quote()

static bool tesseract::is_double_quote ( const char32 ch )

static

◆ is_hyphen_punc()

static bool tesseract::is_hyphen_punc ( const char32 ch )

static

◆ is_single_quote()

static bool tesseract::is_single_quote ( const char32 ch )

static

◆ IsCombiner()

static bool tesseract::IsCombiner ( int ch )

static

◆ IsDigitLike()

static bool tesseract::IsDigitLike ( int ch )

static

◆ IsInterchangeValid()

bool tesseract::IsInterchangeValid ( const char32 ch )

◆ IsInterchangeValid7BitAscii()

bool tesseract::IsInterchangeValid7BitAscii ( const char32 ch )

◆ IsLatinLetter()

static bool tesseract::IsLatinLetter ( int ch )

static

◆ IsLeftIndented()

bool tesseract::IsLeftIndented ( const EquationDetect::IndentType type )

inline

◆ IsOCREquivalent()

bool tesseract::IsOCREquivalent	(	char32	ch1,
		char32	ch2
	)

◆ IsOpeningPunct()

static bool tesseract::IsOpeningPunct ( int ch )

static

◆ IsRightIndented()

bool tesseract::IsRightIndented ( const EquationDetect::IndentType type )

inline

◆ IsStrInList()

static bool tesseract::IsStrInList	(	const STRING &	str,
		const GenericVector< STRING > &	str_list
	)

static

◆ IsTerminalPunct()

static bool tesseract::IsTerminalPunct ( int ch )

static

◆ IsTextOrEquationType()

bool tesseract::IsTextOrEquationType ( PolyBlockType type )

inline

◆ IsThaiBeforeConsonantVowel()

static bool tesseract::IsThaiBeforeConsonantVowel ( char32 ch )

static

◆ IsThaiConsonant()

static bool tesseract::IsThaiConsonant ( char32 ch )

static

◆ IsThaiTonableVowel()

static bool tesseract::IsThaiTonableVowel ( char32 ch )

static

◆ IsThaiToneMark()

static bool tesseract::IsThaiToneMark ( char32 ch )

static

◆ IsUTF8Whitespace()

bool tesseract::IsUTF8Whitespace ( const char * text )

◆ IsValidCodepoint()

bool tesseract::IsValidCodepoint ( const char32 ch )

◆ IsWhitespace()

bool tesseract::IsWhitespace ( const char32 ch )

◆ IsWhitespaceBox()

static bool tesseract::IsWhitespaceBox ( const BoxChar * boxchar )

static

◆ LeftoverSegments()

static void tesseract::LeftoverSegments	(	const GenericVector< RowScratchRegisters > &	rows,
		GenericVector< Interval > *	to_fix,
		int	row_start,
		int	row_end
	)

static

◆ LeftWordAttributes()

void tesseract::LeftWordAttributes	(	const UNICHARSET *	unicharset,
		const WERD_CHOICE *	werd,
		const STRING &	utf8,
		bool *	is_list,
		bool *	starts_idea,
		bool *	ends_idea
	)

◆ LikelyListMark()

static bool tesseract::LikelyListMark ( const STRING & word )

static

◆ LikelyListMarkUnicode()

static bool tesseract::LikelyListMarkUnicode ( int ch )

static

◆ LikelyListNumeral()

static bool tesseract::LikelyListNumeral ( const STRING & word )

static

◆ LikelyParagraphStart()

static bool tesseract::LikelyParagraphStart	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after,
		tesseract::ParagraphJustification	j
	)

static

◆ List2ndNeighbours()

static void tesseract::List2ndNeighbours	(	const BLOBNBOX *	blob,
		BLOBNBOX_CLIST *	neighbours
	)

static

◆ List3rdNeighbours()

static void tesseract::List3rdNeighbours	(	const BLOBNBOX *	blob,
		BLOBNBOX_CLIST *	neighbours
	)

static

◆ ListFontFamilies()

static void tesseract::ListFontFamilies	(	PangoFontFamily ***	families,
		int *	n_families
	)

static

◆ ListNeighbours()

static void tesseract::ListNeighbours	(	const BLOBNBOX *	blob,
		BLOBNBOX_CLIST *	neighbours
	)

static

◆ LoadDataFromFile() [1/2]

bool tesseract::LoadDataFromFile	(	const char *	filename,
		GenericVector< char > *	data
	)

inline

◆ LoadDataFromFile() [2/2]

bool tesseract::LoadDataFromFile	(	const STRING &	filename,
		GenericVector< char > *	data
	)

inline

◆ LoadFileLinesToStrings()

bool tesseract::LoadFileLinesToStrings	(	const STRING &	filename,
		GenericVector< STRING > *	lines
	)

inline

◆ LoadShapeTable()

ShapeTable * tesseract::LoadShapeTable ( const STRING & file_prefix )

◆ LoadTrainingData()

MasterTrainer * tesseract::LoadTrainingData	(	int	argc,
		const char const	argv,
		bool	replication,
		ShapeTable **	shape_table,
		STRING *	file_prefix
	)

Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:

Loads font info from -F option.
Loads xheights from -X option.
Loads samples from .tr files in remaining command-line args.
Deletes outliers and computes canonical samples.
If FLAGS_output_trainer is set, saves the trainer for future use. TODO: Who uses that? There is currently no code which reads it. Computes canonical and cloud features. If shape_table is not nullptr, but failed to load, make a fake flat one, as shape clustering was not run.

◆ Logistic()

double tesseract::Logistic ( double x )

inline

◆ LogSumExp()

static double tesseract::LogSumExp	(	double	ln_x,
		double	ln_y
	)

static

◆ Main()

static int tesseract::Main	(	int	argc,
		char **	argv
	)

static

◆ make_tesseract_blob()

static TBLOB* tesseract::make_tesseract_blob	(	float	baseline,
		float	xheight,
		float	descender,
		float	ascender,
		bool	numeric_mode,
		Pix *	pix
	)

static

Return a TBLOB * from the whole pix. To be freed later with delete.

◆ MakeIndividualGlyphs()

static bool tesseract::MakeIndividualGlyphs	(	Pix *	pix,
		const std::vector< BoxChar *> &	vbox,
		const int	input_tiff_page
	)

static

◆ MarkAndDeleteImageParts()

static void tesseract::MarkAndDeleteImageParts	(	const FCOORD &	rerotate,
		ColPartitionGrid *	part_grid,
		ColPartition_LIST *	image_parts,
		Pix *	image_pix
	)

static

◆ MarkRowsWithModel()

static void tesseract::MarkRowsWithModel	(	GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end,
		const ParagraphModel *	model,
		bool	ltr,
		int	eop_threshold
	)

static

◆ MarkStrongEvidence()

static void tesseract::MarkStrongEvidence	(	GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end
	)

static

◆ MaximalImageBoundingBox()

static void tesseract::MaximalImageBoundingBox	(	ColPartitionGrid *	part_grid,
		TBOX *	im_box
	)

static

◆ MaxStrokeWidth()

static int tesseract::MaxStrokeWidth ( Pix * pix )

static

◆ MeanDirectionVector()

static FCOORD tesseract::MeanDirectionVector	(	const LLSQ &	point_diffs,
		const LLSQ &	dirs,
		const FCOORD &	start_pt,
		const FCOORD &	end_pt
	)

static

◆ MedianSpacing()

static int tesseract::MedianSpacing	(	int	page_height,
		ColPartition_IT	it
	)

static

◆ MedianXHeight()

static double tesseract::MedianXHeight ( BLOCK_LIST * block_list )

static

◆ MergeBoxCharsToWords()

static void tesseract::MergeBoxCharsToWords ( std::vector< BoxChar *> * boxchars )

static

◆ ModelStrongEvidence()

static void tesseract::ModelStrongEvidence	(	int	debug_level,
		GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end,
		bool	allow_flush_models,
		ParagraphTheory *	theory
	)

static

◆ MoveBlobsToBlock()

static TO_BLOCK* tesseract::MoveBlobsToBlock	(	bool	vertical_text,
		int	line_spacing,
		BLOCK *	block,
		ColPartition_LIST *	block_parts,
		ColPartition_LIST *	used_parts
	)

static

◆ MultiplyAccumulate()

void tesseract::MultiplyAccumulate	(	int	n,
		const double *	u,
		const double *	v,
		double *	out
	)

inline

◆ MultiplyVectorsInPlace()

void tesseract::MultiplyVectorsInPlace	(	int	n,
		const double *	src,
		double *	inout
	)

inline

◆ MutualUnusedHNeighbour()

static BLOBNBOX* tesseract::MutualUnusedHNeighbour	(	const BLOBNBOX *	blob,
		BlobNeighbourDir	dir
	)

static

◆ MutualUnusedVNeighbour()

static BLOBNBOX* tesseract::MutualUnusedVNeighbour	(	const BLOBNBOX *	blob,
		BlobNeighbourDir	dir
	)

static

◆ my_strnmove()

static char* tesseract::my_strnmove	(	char *	dest,
		const char *	src,
		size_t	n
	)

static

◆ NonLinearity()

static NetworkType tesseract::NonLinearity ( char func )

static

◆ NormalizeCleanAndSegmentUTF8()

bool tesseract::NormalizeCleanAndSegmentUTF8	(	UnicodeNormMode	u_mode,
		OCRNorm	ocr_normalize,
		GraphemeNormMode	g_mode,
		bool	report_errors,
		const char *	str8,
		std::vector< std::string > *	graphemes
	)

◆ NormalizeDirection()

static uint8_t tesseract::NormalizeDirection	(	uint8_t	dir,
		const FCOORD &	unnormed_pos,
		const DENORM &	denorm,
		const DENORM *	root_denorm
	)

static

◆ NormalizeUTF8String()

bool tesseract::NormalizeUTF8String	(	UnicodeNormMode	u_mode,
		OCRNorm	ocr_normalize,
		GraphemeNorm	grapheme_normalize,
		const char *	str8,
		std::string *	normalized
	)

◆ NormalizeUTF8ToUTF32()

static void tesseract::NormalizeUTF8ToUTF32	(	UnicodeNormMode	u_mode,
		OCRNorm	ocr_normalize,
		const char *	str8,
		std::vector< char32 > *	normed32
	)

static

◆ NumTouchingIntersections()

static int tesseract::NumTouchingIntersections	(	Box *	line_box,
		Pix *	intersection_pix
	)

static

◆ OCRNormalize()

char32 tesseract::OCRNormalize ( char32 ch )

◆ OKMergeCandidate()

static bool tesseract::OKMergeCandidate	(	const ColPartition *	part,
		const ColPartition *	candidate,
		bool	debug
	)

static

◆ OtsuStats()

int tesseract::OtsuStats	(	const int *	histogram,
		int *	H_out,
		int *	omega0_out
	)

◆ OtsuThreshold()

int tesseract::OtsuThreshold	(	Pix *	src_pix,
		int	left,
		int	top,
		int	width,
		int	height,
		int **	thresholds,
		int **	hi_values
	)

◆ ParagraphModelByOutline()

static ParagraphModel tesseract::ParagraphModelByOutline	(	int	debug_level,
		const GenericVector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		int	tolerance
	)

static

◆ ParamsTrainingFeatureByName()

int tesseract::ParamsTrainingFeatureByName ( const char * name )

◆ ParseCommandLineFlags()

void tesseract::ParseCommandLineFlags	(	const char *	usage,
		int *	argc,
		char ***	argv,
		const bool	remove_flags
	)

◆ prec()

static double tesseract::prec ( double x )

static

◆ PrepareDistortedPix()

Pix * tesseract::PrepareDistortedPix	(	const Pix *	pix,
		bool	perspective,
		bool	invert,
		bool	white_noise,
		bool	smooth_noise,
		bool	blur,
		int	box_reduction,
		TRand *	randomizer,
		GenericVector< TBOX > *	boxes
	)

◆ PrintBoxWidths()

static void tesseract::PrintBoxWidths ( BLOBNBOX * neighbour )

static

◆ PrintCommandLineFlags()

static void tesseract::PrintCommandLineFlags ( )

static

◆ PrintDetectorState()

static void tesseract::PrintDetectorState	(	const ParagraphTheory &	theory,
		const GenericVector< RowScratchRegisters > &	rows
	)

static

◆ PrintMatrixPaths()

static void tesseract::PrintMatrixPaths	(	int	col,
		int	dim,
		const MATRIX &	ratings,
		int	length,
		const BLOB_CHOICE **	blob_choices,
		const UNICHARSET &	unicharset,
		const char *	label,
		FILE *	output_file
	)

static

◆ PrintPath()

static void tesseract::PrintPath	(	int	length,
		const BLOB_CHOICE **	blob_choices,
		const UNICHARSET &	unicharset,
		const char *	label,
		FILE *	output_file
	)

static

◆ PrintRowRange()

static void tesseract::PrintRowRange	(	const GenericVector< RowScratchRegisters > &	rows,
		int	row_start,
		int	row_end
	)

static

◆ PrintScriptDirs()

static void tesseract::PrintScriptDirs ( const GenericVector< StrongScriptDirection > & dirs )

static

◆ PrintTable()

static void tesseract::PrintTable	(	const GenericVector< GenericVector< STRING > > &	rows,
		const STRING &	colsep
	)

static

◆ ProjectiveCoeffs()

int tesseract::ProjectiveCoeffs	(	int	width,
		int	height,
		TRand *	randomizer,
		float **	im_coeffs,
		float **	box_coeffs
	)

◆ PSM_BLOCK_FIND_ENABLED()

bool tesseract::PSM_BLOCK_FIND_ENABLED ( int pageseg_mode )

inline

◆ PSM_COL_FIND_ENABLED()

bool tesseract::PSM_COL_FIND_ENABLED ( int pageseg_mode )

inline

◆ PSM_LINE_FIND_ENABLED()

bool tesseract::PSM_LINE_FIND_ENABLED ( int pageseg_mode )

inline

◆ PSM_ORIENTATION_ENABLED()

bool tesseract::PSM_ORIENTATION_ENABLED ( int pageseg_mode )

inline

◆ PSM_OSD_ENABLED()

bool tesseract::PSM_OSD_ENABLED ( int pageseg_mode )

inline

Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.

◆ PSM_SPARSE()

bool tesseract::PSM_SPARSE ( int pageseg_mode )

inline

◆ PSM_WORD_FIND_ENABLED()

bool tesseract::PSM_WORD_FIND_ENABLED ( int pageseg_mode )

inline

◆ RadicalPreHash()

static int tesseract::RadicalPreHash ( const std::vector< int > & rs )

static

◆ RandBool()

static bool tesseract::RandBool	(	const double	prob,
		TRand *	rand
	)

static

◆ rating_to_cost()

static float tesseract::rating_to_cost ( float rating )

static

◆ read_info()

bool tesseract::read_info	(	TFile *	f,
		FontInfo *	fi
	)

◆ read_set()

bool tesseract::read_set	(	TFile *	f,
		FontSet *	fs
	)

◆ read_spacing_info()

bool tesseract::read_spacing_info	(	TFile *	f,
		FontInfo *	fi
	)

◆ read_t()

static bool tesseract::read_t	(	PAGE_RES_IT *	page_res_it,
		TBOX *	tbox
	)

static

◆ ReadFile()

STRING tesseract::ReadFile	(	const std::string &	filename,
		FileReader	reader
	)

◆ ReCachePagesFunc()

void* tesseract::ReCachePagesFunc ( void * data )

◆ RecomputeMarginsAndClearHypotheses()

void tesseract::RecomputeMarginsAndClearHypotheses	(	GenericVector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		int	percentile
	)

◆ ReflectBlobList()

static void tesseract::ReflectBlobList ( BLOBNBOX_LIST * bblobs )

static

◆ ReleaseAllBlobsAndDeleteUnused()

static void tesseract::ReleaseAllBlobsAndDeleteUnused ( BLOBNBOX_LIST * blobs )

static

◆ RemoveBadBox()

static void tesseract::RemoveBadBox	(	BLOBNBOX *	box,
		ColPartition *	part,
		ColPartition_LIST *	part_list
	)

static

◆ RemoveEnclosingCircle()

static Pix* tesseract::RemoveEnclosingCircle ( Pix * pixs )

static

◆ RemoveUnusedLineSegments()

static void tesseract::RemoveUnusedLineSegments	(	bool	horizontal_lines,
		BLOBNBOX_LIST *	line_bblobs,
		Pix *	line_pix
	)

static

◆ RightWordAttributes()

void tesseract::RightWordAttributes	(	const UNICHARSET *	unicharset,
		const WERD_CHOICE *	werd,
		const STRING &	utf8,
		bool *	is_list,
		bool *	starts_idea,
		bool *	ends_idea
	)

◆ RotateAndExplodeBlobList()

static void tesseract::RotateAndExplodeBlobList	(	const FCOORD &	blob_rotation,
		BLOBNBOX_LIST *	bblobs,
		STATS *	widths,
		STATS *	heights
	)

static

◆ RowIsStranded()

static bool tesseract::RowIsStranded	(	const GenericVector< RowScratchRegisters > &	rows,
		int	row
	)

static

◆ RowsFitModel()

bool tesseract::RowsFitModel	(	const GenericVector< RowScratchRegisters > *	rows,
		int	start,
		int	end,
		const ParagraphModel *	model
	)

◆ RtlEmbed()

static STRING tesseract::RtlEmbed	(	const STRING &	word,
		bool	rtlify
	)

static

◆ SafeAtod()

static bool tesseract::SafeAtod	(	const char *	str,
		double *	val
	)

static

◆ SafeAtoi()

static bool tesseract::SafeAtoi	(	const char *	str,
		int *	val
	)

static

◆ SaveDataToFile()

bool tesseract::SaveDataToFile	(	const GenericVector< char > &	data,
		const STRING &	filename
	)

inline

◆ ScanForOverlappingText()

static bool tesseract::ScanForOverlappingText	(	ColPartitionGrid *	part_grid,
		TBOX *	box
	)

static

◆ ScanParentsForCaseMix()

static void tesseract::ScanParentsForCaseMix	(	const UNICHARSET &	unicharset,
		LanguageModelState *	parent_node
	)

static

Helper scans the collection of predecessors for competing siblings that have the same letter with the opposite case, setting competing_vse.

◆ ScriptPosToString()

const char * tesseract::ScriptPosToString ( enum ScriptPos script_pos )

◆ SelectBestWords()

static int tesseract::SelectBestWords	(	double	rating_ratio,
		double	certainty_margin,
		bool	debug,
		PointerVector< WERD_RES > *	new_words,
		PointerVector< WERD_RES > *	best_words
	)

static

◆ SeparateSimpleLeaderLines()

static void tesseract::SeparateSimpleLeaderLines	(	GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end,
		ParagraphTheory *	theory
	)

static

◆ Serialize() [1/8]

bool tesseract::Serialize	(	FILE *	fp,
		const char *	data,
		size_t	n
	)

◆ Serialize() [2/8]

bool tesseract::Serialize	(	FILE *	fp,
		const float *	data,
		size_t	n
	)

◆ Serialize() [3/8]

bool tesseract::Serialize	(	FILE *	fp,
		const int8_t *	data,
		size_t	n
	)

◆ Serialize() [4/8]

bool tesseract::Serialize	(	FILE *	fp,
		const int16_t *	data,
		size_t	n
	)

◆ Serialize() [5/8]

bool tesseract::Serialize	(	FILE *	fp,
		const int32_t *	data,
		size_t	n
	)

◆ Serialize() [6/8]

bool tesseract::Serialize	(	FILE *	fp,
		const uint8_t *	data,
		size_t	n
	)

◆ Serialize() [7/8]

bool tesseract::Serialize	(	FILE *	fp,
		const uint16_t *	data,
		size_t	n
	)

◆ Serialize() [8/8]

bool tesseract::Serialize	(	FILE *	fp,
		const uint32_t *	data,
		size_t	n
	)

◆ SetBlobStrokeWidth()

void tesseract::SetBlobStrokeWidth	(	Pix *	pix,
		BLOBNBOX *	blob
	)

◆ SetBoolFlagValue()

static void tesseract::SetBoolFlagValue	(	const char *	flag_name,
		const bool	new_val
	)

static

◆ SetDoubleFlagValue()

static void tesseract::SetDoubleFlagValue	(	const char *	flag_name,
		const double	new_val
	)

static

◆ SetIntFlagValue()

static void tesseract::SetIntFlagValue	(	const char *	flag_name,
		const int32_t	new_val
	)

static

◆ SetPropertiesForInputFile()

void tesseract::SetPropertiesForInputFile	(	const std::string &	script_dir,
		const std::string &	input_unicharset_file,
		const std::string &	output_unicharset_file,
		const std::string &	output_xheights_file
	)

◆ SetScriptProperties()

void tesseract::SetScriptProperties	(	const std::string &	script_dir,
		UNICHARSET *	unicharset
	)

◆ SetStringFlagValue()

static void tesseract::SetStringFlagValue	(	const char *	flag_name,
		const char *	new_val
	)

static

◆ SetupBasicProperties() [1/2]

void tesseract::SetupBasicProperties	(	bool	report_errors,
		UNICHARSET *	unicharset
	)

inline

◆ SetupBasicProperties() [2/2]

void tesseract::SetupBasicProperties	(	bool	report_errors,
		bool	decompose,
		UNICHARSET *	unicharset
	)

◆ ShouldIgnoreFontFamilyName()

static bool tesseract::ShouldIgnoreFontFamilyName ( const char * query )

static

◆ SkipChars() [1/2]

static const char* tesseract::SkipChars	(	const char *	str,
		const char *	toskip
	)

static

◆ SkipChars() [2/2]

static const char* tesseract::SkipChars	(	const char *	str,
		bool(*)(int)	skip
	)

static

◆ SkipOne()

static const char* tesseract::SkipOne	(	const char *	str,
		const char *	toskip
	)

static

◆ SkipWhitespace()

static void tesseract::SkipWhitespace ( char ** str )

static

◆ SoftmaxInPlace()

template<typename T >

void tesseract::SoftmaxInPlace	(	int	n,
		T *	inout
	)

inline

◆ sort_cmp()

template<typename T >

int tesseract::sort_cmp	(	const void *	t1,
		const void *	t2
	)

◆ sort_ptr_cmp()

template<typename T >

int tesseract::sort_ptr_cmp	(	const void *	t1,
		const void *	t2
	)

◆ sort_strings_by_dec_length()

static int tesseract::sort_strings_by_dec_length	(	const void *	v1,
		const void *	v2
	)

static

◆ SortByBoxBottom()

template<class BBC >

int tesseract::SortByBoxBottom	(	const void *	void1,
		const void *	void2
	)

◆ SortByBoxLeft()

template<class BBC >

int tesseract::SortByBoxLeft	(	const void *	void1,
		const void *	void2
	)

◆ SortByRating()

template<class BLOB_CHOICE >

int tesseract::SortByRating	(	const void *	void1,
		const void *	void2
	)

◆ SortByUnicharID()

template<class BLOB_CHOICE >

int tesseract::SortByUnicharID	(	const void *	void1,
		const void *	void2
	)

◆ SortCPByBottom()

static int tesseract::SortCPByBottom	(	const void *	p1,
		const void *	p2
	)

static

◆ SortCPByHeight()

static int tesseract::SortCPByHeight	(	const void *	p1,
		const void *	p2
	)

static

◆ SortCPByTopReverse()

static int tesseract::SortCPByTopReverse	(	const void *	p1,
		const void *	p2
	)

static

◆ SortRightToLeft()

template<class BBC >

int tesseract::SortRightToLeft	(	const void *	void1,
		const void *	void2
	)

◆ SpanUTF8NotWhitespace()

unsigned int tesseract::SpanUTF8NotWhitespace ( const char * text )

◆ SpanUTF8Whitespace()

unsigned int tesseract::SpanUTF8Whitespace ( const char * text )

◆ StringFlagExists()

static bool tesseract::StringFlagExists	(	const char *	flag_name,
		const char **	value
	)

static

◆ StringReplace()

static std::string tesseract::StringReplace	(	const std::string &	in,
		const std::string &	oldsub,
		const std::string &	newsub
	)

static

◆ StripJoiners()

static void tesseract::StripJoiners ( std::vector< char32 > * str32 )

static

◆ StrOf()

static STRING tesseract::StrOf ( int num )

static

◆ StrongEvidenceClassify()

static void tesseract::StrongEvidenceClassify	(	int	debug_level,
		GenericVector< RowScratchRegisters > *	rows,
		int	row_start,
		int	row_end,
		ParagraphTheory *	theory
	)

static

◆ StrongModel()

bool tesseract::StrongModel ( const ParagraphModel * model )

inline

◆ SubtractLinesAndResidue()

static void tesseract::SubtractLinesAndResidue	(	Pix *	line_pix,
		Pix *	non_line_pix,
		int	resolution,
		Pix *	src_pix
	)

static

◆ SumVectors()

void tesseract::SumVectors	(	int	n,
		const double *	v1,
		const double *	v2,
		const double *	v3,
		const double *	v4,
		const double *	v5,
		double *	sum
	)

inline

◆ Tanh()

double tesseract::Tanh ( double x )

inline

◆ TestCompatibleCandidates()

static bool tesseract::TestCompatibleCandidates	(	const ColPartition &	part,
		bool	debug,
		ColPartition_CLIST *	candidates
	)

static

◆ TestWeakIntersectedPart()

static bool tesseract::TestWeakIntersectedPart	(	const TBOX &	im_box,
		ColPartition_LIST *	part_list,
		ColPartition *	part
	)

static

◆ TextSupportsBreak()

static bool tesseract::TextSupportsBreak	(	const RowScratchRegisters &	before,
		const RowScratchRegisters &	after
	)

static

◆ TraceBlockOnReducedPix()

Pix * tesseract::TraceBlockOnReducedPix	(	BLOCK *	block,
		int	gridsize,
		ICOORD	bleft,
		int *	left,
		int *	bottom
	)

◆ TraceOutlineOnReducedPix()

Pix * tesseract::TraceOutlineOnReducedPix	(	C_OUTLINE *	outline,
		int	gridsize,
		ICOORD	bleft,
		int *	left,
		int *	bottom
	)

◆ TruncateBoxToMissNonText()

static void tesseract::TruncateBoxToMissNonText	(	int	x_middle,
		int	y_middle,
		bool	split_on_x,
		Pix *	nontext_map,
		TBOX *	bbox
	)

static

◆ UnicodeFor()

int tesseract::UnicodeFor	(	const UNICHARSET *	u,
		const WERD_CHOICE *	werd,
		int	pos
	)

◆ UniLikelyListItem()

static bool tesseract::UniLikelyListItem	(	const UNICHARSET *	u,
		const WERD_CHOICE *	werd
	)

static

◆ UpdateLeftMargin()

static bool tesseract::UpdateLeftMargin	(	const ColPartition &	part,
		int *	margin_left,
		int *	margin_right
	)

static

◆ UpdateRightMargin()

static bool tesseract::UpdateRightMargin	(	const ColPartition &	part,
		int *	margin_left,
		int *	margin_right
	)

static

◆ UpperQuartileCJKSize()

static int tesseract::UpperQuartileCJKSize	(	int	gridsize,
		BLOBNBOX_LIST *	blobs
	)

static

◆ ValidBodyLine()

bool tesseract::ValidBodyLine	(	const GenericVector< RowScratchRegisters > *	rows,
		int	row,
		const ParagraphModel *	model
	)

◆ ValidFirstLine()

bool tesseract::ValidFirstLine	(	const GenericVector< RowScratchRegisters > *	rows,
		int	row,
		const ParagraphModel *	model
	)

◆ VScanForEdge()

static bool tesseract::VScanForEdge	(	uint32_t *	data,
		int	wpl,
		int	y_start,
		int	y_end,
		int	min_count,
		int	mid_width,
		int	max_count,
		int	x_end,
		int	x_step,
		int *	x_start
	)

static

◆ WordGap()

static void tesseract::WordGap	(	const PointerVector< WERD_RES > &	words,
		int	index,
		int *	right,
		int *	next_left
	)

static

◆ WordsAcceptable()

static bool tesseract::WordsAcceptable ( const PointerVector< WERD_RES > & words )

static

◆ write_info()

bool tesseract::write_info	(	FILE *	f,
		const FontInfo &	fi
	)

◆ write_set()

bool tesseract::write_set	(	FILE *	f,
		const FontSet &	fs
	)

◆ write_spacing_info()

bool tesseract::write_spacing_info	(	FILE *	f,
		const FontInfo &	fi
	)

◆ WriteDawg()

static bool tesseract::WriteDawg	(	const GenericVector< STRING > &	words,
		const UNICHARSET &	unicharset,
		Trie::RTLReversePolicy	reverse_policy,
		TessdataType	file_type,
		TessdataManager *	traineddata
	)

static

◆ WriteDawgs()

static bool tesseract::WriteDawgs	(	const GenericVector< STRING > &	words,
		const GenericVector< STRING > &	puncs,
		const GenericVector< STRING > &	numbers,
		bool	lang_is_rtl,
		const UNICHARSET &	unicharset,
		TessdataManager *	traineddata
	)

static

◆ WriteFile()

bool tesseract::WriteFile	(	const std::string &	output_dir,
		const std::string &	lang,
		const std::string &	suffix,
		const GenericVector< char > &	data,
		FileWriter	writer
	)

◆ WriteRecoder()

bool tesseract::WriteRecoder	(	const UNICHARSET &	unicharset,
		bool	pass_through,
		const std::string &	output_dir,
		const std::string &	lang,
		FileWriter	writer,
		STRING *	radical_table_data,
		TessdataManager *	traineddata
	)

◆ WriteShapeTable()

void tesseract::WriteShapeTable	(	const STRING &	file_prefix,
		const ShapeTable &	shape_table
	)

◆ WriteUnicharset()

bool tesseract::WriteUnicharset	(	const UNICHARSET &	unicharset,
		const std::string &	output_dir,
		const std::string &	lang,
		FileWriter	writer,
		TessdataManager *	traineddata
	)

◆ YOutlierPieces()

static void tesseract::YOutlierPieces	(	WERD_RES *	word,
		int	rebuilt_blob_index,
		int	super_y_bottom,
		int	sub_y_top,
		ScriptPos *	leading_pos,
		int *	num_leading_outliers,
		ScriptPos *	trailing_pos,
		int *	num_trailing_outliers
	)

static

Given a recognized blob, see if a contiguous collection of sub-pieces (chopped blobs) starting at its left might qualify as being a subscript or superscript letter based only on y position. Also do this for the right side.

◆ ZeroVector()

template<typename T >

void tesseract::ZeroVector	(	int	n,
		T *	vec
	)

inline

Variable Documentation

◆ case_state_table

const int tesseract::case_state_table[6][4]

Initial value:

= {
    {
     
     
     0, 1, 5, 4},
    {
     0, 3, 2, 4},
    {
     0, -1, 2, -1},
    {
     0, 3, -1, 4},
    {
     0, -1, -1, 4},
    {
     5, -1, 2, -1},
}

◆ kAdamCorrectionIterations

const int tesseract::kAdamCorrectionIterations = 200000

◆ kAdamEpsilon

const double tesseract::kAdamEpsilon = 1e-8

◆ kAdamFlag

const int tesseract::kAdamFlag = 4

◆ kAdjacentLeaderSearchPadding

const int tesseract::kAdjacentLeaderSearchPadding = 2

◆ kAlignedFraction

const double tesseract::kAlignedFraction = 0.03125

◆ kAlignedGapFraction

const double tesseract::kAlignedGapFraction = 0.75

◆ kAlignmentNames

const char* tesseract::kAlignmentNames[]

Initial value:

= {
  "Left Aligned",
  "Left Ragged",
  "Center",
  "Right Aligned",
  "Right Ragged",
  "Separator"
}

◆ kAllowBlobArea

const double tesseract::kAllowBlobArea = 0.05

◆ kAllowBlobHeight

const double tesseract::kAllowBlobHeight = 0.3

◆ kAllowBlobWidth

const double tesseract::kAllowBlobWidth = 0.4

◆ kAllowTextArea

const double tesseract::kAllowTextArea = 0.8

◆ kAllowTextHeight

const double tesseract::kAllowTextHeight = 0.5

◆ kAllowTextWidth

const double tesseract::kAllowTextWidth = 0.6

◆ kAmbigDelimiters

const char tesseract::kAmbigDelimiters[] = "\t "

static

◆ kAmbigNgramSeparator

const char tesseract::kAmbigNgramSeparator[] = { ' ', '\0' }

static

◆ kApostropheLikeUTF8

const char * tesseract::kApostropheLikeUTF8

Initial value:

= {
  "'",       
  "`",       
  "\u2018",  
  "\u2019",  
  "\u2032",  
  nullptr,      
}

◆ kApostropheSymbol

const char tesseract::kApostropheSymbol[] = "'"

static

◆ kBasicBufSize

const int tesseract::kBasicBufSize = 2048

static

◆ kBestCheckpointFraction

const double tesseract::kBestCheckpointFraction = 31.0 / 32.0

◆ kBigPartSizeRatio

const double tesseract::kBigPartSizeRatio = 1.75

◆ kBlobTypes

char tesseract::kBlobTypes[BRT_COUNT+1] = "NHSRIUVT"

static

◆ kBoxClipTolerance

const int tesseract::kBoxClipTolerance = 2

◆ kBrokenCJKIterationFraction

const double tesseract::kBrokenCJKIterationFraction = 0.125

◆ kBytesPer64BitNumber

const int tesseract::kBytesPer64BitNumber = 20

Max bytes in the decimal representation of int64_t.

◆ kBytesPerBoxFileLine

const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

Multiplier for max expected textlength assumes (kBytesPerNumber + space)

kNumbersPerBlob plus the newline. Add to this the original UTF8 characters, and one kMaxBytesPerLine for safety.

◆ kBytesPerNumber

const int tesseract::kBytesPerNumber = 5

The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.

◆ kCellSplitColumnThreshold

const int tesseract::kCellSplitColumnThreshold = 0

◆ kCellSplitRowThreshold

const int tesseract::kCellSplitRowThreshold = 0

◆ kCertaintyScale

const float tesseract::kCertaintyScale = 7.0f

◆ kCertOffset

const double tesseract::kCertOffset = -0.085

◆ kCharWidth

const int tesseract::kCharWidth = 2

static

◆ kCJKAspectRatio

const double tesseract::kCJKAspectRatio = 1.25

◆ kCJKAspectRatioIncrease

const double tesseract::kCJKAspectRatioIncrease = 1.0625

◆ kCJKBrokenDistanceFraction

const double tesseract::kCJKBrokenDistanceFraction = 0.25

◆ kCJKMaxComponents

const int tesseract::kCJKMaxComponents = 8

◆ kCJKRadius

const int tesseract::kCJKRadius = 2

◆ kColumnWidthFactor

const int tesseract::kColumnWidthFactor = 20

Pixel resolution of column width estimates.

◆ kCosMaxSkewAngle

const double tesseract::kCosMaxSkewAngle = 0.866025

◆ kCrackSpacing

const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

◆ kCrownLeft

const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F)

◆ kCrownRight

const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F)

◆ kDawgSuccessors

const bool tesseract::kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT]

static

Initial value:

= {
  { 0, 1, 1, 0 },  
  { 1, 0, 0, 0 },  
  { 1, 0, 0, 0 },  
  { 0, 0, 0, 0 },  
}

◆ kDefaultOutputResolution

const int tesseract::kDefaultOutputResolution = 300

static

◆ kDefaultResolution

const int tesseract::kDefaultResolution = 300

◆ kDiacriticXPadRatio

const double tesseract::kDiacriticXPadRatio = 7.0

◆ kDiacriticYPadRatio

const double tesseract::kDiacriticYPadRatio = 1.75

◆ kDictMaxWildcards

const int tesseract::kDictMaxWildcards = 2

static

◆ kDictRatio

const double tesseract::kDictRatio = 2.25

◆ kDictWildcard

const char tesseract::kDictWildcard[] = "\u2606"

static

◆ kDocDictMaxRepChars

const int tesseract::kDocDictMaxRepChars = 4

static

◆ kDoNotReverse

const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"

◆ kDoubleFlag

const int tesseract::kDoubleFlag = 128

◆ kErrClip

const double tesseract::kErrClip = 1.0f

◆ kErrorGraphInterval

const int tesseract::kErrorGraphInterval = 1000

◆ kExposureFactor

const int tesseract::kExposureFactor = 16

◆ kFeaturePadding

const int tesseract::kFeaturePadding = 2

◆ kFontMergeDistance

const float tesseract::kFontMergeDistance = 0.025

◆ kForceReverse

const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"

◆ kGoodRowNumberOfColumnsLarge

const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7

◆ kGoodRowNumberOfColumnsSmall

const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }

◆ kGoodRowNumberOfColumnsSmallSize

const int tesseract::kGoodRowNumberOfColumnsSmallSize

Initial value:

=

sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1

tesseract::kGoodRowNumberOfColumnsSmall

const double kGoodRowNumberOfColumnsSmall[]

Definition: tablerecog.cpp:56

◆ kGutterMultiple

const int tesseract::kGutterMultiple = 4

◆ kGutterToNeighbourRatio

const int tesseract::kGutterToNeighbourRatio = 3

◆ kHighConfidence

const double tesseract::kHighConfidence = 0.9375

◆ kHistogramBuckets

const int tesseract::kHistogramBuckets = 16

◆ kHistogramSize

const int tesseract::kHistogramSize = 256

◆ kHorizontalGapMergeFraction

const double tesseract::kHorizontalGapMergeFraction = 0.5

◆ kHorizontalSpacing

const double tesseract::kHorizontalSpacing = 0.30

◆ kHorzStrongTextlineAspect

const int tesseract::kHorzStrongTextlineAspect = 5

◆ kHorzStrongTextlineCount

const int tesseract::kHorzStrongTextlineCount = 8

◆ kHorzStrongTextlineHeight

const int tesseract::kHorzStrongTextlineHeight = 10

◆ kHyphenLikeUTF8

const char * tesseract::kHyphenLikeUTF8

Initial value:

= {
  "-",       
  "\u05BE",  
  "\u2010",  
  "\u2011",  
  "\u2012",  
  "\u2013",  
  "\u2014",  
  "\u2015",  
  "\u2212",  
  "\uFE58",  
  "\uFE63",  
  "\uFF0D",  
  nullptr,      
}

The following are confusable internal word punctuation symbols which we normalize to the first variant when matching in dawgs.

◆ kHyphenSymbol

const char tesseract::kHyphenSymbol[] = "-"

static

◆ kIllegalMsg

const char tesseract::kIllegalMsg[]

static

Initial value:

=

"Illegal ambiguity specification on line %d\n"

◆ kIllegalUnicharMsg

const char tesseract::kIllegalUnicharMsg[]

static

Initial value:

=

"Illegal unichar %s in ambiguity specification\n"

◆ kImagePadding

const int tesseract::kImagePadding = 4

◆ kImprovementFraction

const double tesseract::kImprovementFraction = 15.0 / 16.0

◆ kInfiniteDist

const float tesseract::kInfiniteDist = 999.0f

◆ kInputFile

const char* tesseract::kInputFile = "noname.tif"

Filename used for input image file, from which to derive a name to search for a possible UNLV zone file, if none is specified by SetInputName.

◆ kInt8Flag

const int tesseract::kInt8Flag = 1

◆ kLargeTableProjectionThreshold

const double tesseract::kLargeTableProjectionThreshold = 0.45

◆ kLargeTableRowCount

const int tesseract::kLargeTableRowCount = 6

◆ kLatinChs

const int tesseract::kLatinChs[]

Initial value:

= {
  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
}

Latin chars corresponding to the unicode chars above.

◆ kLearningRateDecay

const double tesseract::kLearningRateDecay = sqrt(0.5)

◆ kLeftIndentAlignmentCountTh

const int tesseract::kLeftIndentAlignmentCountTh = 1

◆ kLineCountReciprocal

const double tesseract::kLineCountReciprocal = 4.0

◆ kLinedTableMinHorizontalLines

const int tesseract::kLinedTableMinHorizontalLines = 3

◆ kLinedTableMinVerticalLines

const int tesseract::kLinedTableMinVerticalLines = 3

◆ kLineFindGridSize

const int tesseract::kLineFindGridSize = 50

Grid size used by line finder. Not very critical.

◆ kLineFragmentAspectRatio

const double tesseract::kLineFragmentAspectRatio = 10.0

◆ kLineResidueAspectRatio

const double tesseract::kLineResidueAspectRatio = 8.0

◆ kLineResiduePadRatio

const int tesseract::kLineResiduePadRatio = 3

◆ kLineResidueSizeRatio

const double tesseract::kLineResidueSizeRatio = 1.75

◆ kLineTrapLongest

const int tesseract::kLineTrapLongest = 4

◆ kLineTrapShortest

const int tesseract::kLineTrapShortest = 2

◆ kLRM

const char * tesseract::kLRM = "\u200E"

◆ kMarginFactor

const double tesseract::kMarginFactor = 1.1

◆ kMarginOverlapFraction

const double tesseract::kMarginOverlapFraction = 0.25

◆ kMathDigitDensityTh1

const float tesseract::kMathDigitDensityTh1 = 0.25

◆ kMathDigitDensityTh2

const float tesseract::kMathDigitDensityTh2 = 0.1

◆ kMathItalicDensityTh

const float tesseract::kMathItalicDensityTh = 0.5

◆ kMaxAmbigStringSize

const int tesseract::kMaxAmbigStringSize = 30 * ( 10 + 1)

◆ kMaxBaselineError

const double tesseract::kMaxBaselineError = 0.4375

◆ kMaxBlobOverlapFactor

const double tesseract::kMaxBlobOverlapFactor = 4.0

◆ kMaxBlobWidth

const int tesseract::kMaxBlobWidth = 500

◆ kMaxBoxEdgeDiff

const int16_t tesseract::kMaxBoxEdgeDiff = 2

◆ kMaxBoxesInDataPartition

const int tesseract::kMaxBoxesInDataPartition = 20

◆ kMaxBytesPerCodepoint

const int tesseract::kMaxBytesPerCodepoint = 20

static

◆ kMaxBytesPerLine

const int tesseract::kMaxBytesPerLine

Initial value:

= kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +

UNICHAR_LEN

tesseract::kNumbersPerBlob

const int kNumbersPerBlob

Definition: baseapi.cpp:1815

tesseract::kBytesPer64BitNumber

const int kBytesPer64BitNumber

Definition: baseapi.cpp:1828

A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.

◆ kMaxCaptionLines

const int tesseract::kMaxCaptionLines = 7

◆ kMaxCharTopRange

const int tesseract::kMaxCharTopRange = 48

◆ kMaxCircleErosions

const int tesseract::kMaxCircleErosions = 8

◆ kMaxCJKSizeRatio

const int tesseract::kMaxCJKSizeRatio = 5

◆ kMaxColorDistance

const int tesseract::kMaxColorDistance = 900

◆ kMaxColumnHeaderDistance

const int tesseract::kMaxColumnHeaderDistance = 4

◆ kMaxDiacriticDistanceRatio

const double tesseract::kMaxDiacriticDistanceRatio = 1.25

◆ kMaxDiacriticGapToBaseCharHeight

const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0

◆ kMaxDistToPartSizeRatio

const double tesseract::kMaxDistToPartSizeRatio = 1.5

◆ kMaxFillinMultiple

const int tesseract::kMaxFillinMultiple = 11

◆ kMaxFinalCost

const float tesseract::kMaxFinalCost = 100.0f

static

◆ kMaxGapInTextPartition

const double tesseract::kMaxGapInTextPartition = 4.0

◆ kMaxGutterWidthAbsolute

const double tesseract::kMaxGutterWidthAbsolute = 2.00

◆ kMaxIncompatibleColumnCount

const int tesseract::kMaxIncompatibleColumnCount = 2

◆ kMaxInputHeight

const int tesseract::kMaxInputHeight = 48

◆ kMaxIntSize

const int tesseract::kMaxIntSize = 22

Max string length of an int.

◆ kMaxLargeOverlapsWithMedium

const int tesseract::kMaxLargeOverlapsWithMedium = 12

◆ kMaxLargeOverlapsWithSmall

const int tesseract::kMaxLargeOverlapsWithSmall = 3

◆ kMaxLeaderGapFractionOfMax

const double tesseract::kMaxLeaderGapFractionOfMax = 0.25

◆ kMaxLeaderGapFractionOfMin

const double tesseract::kMaxLeaderGapFractionOfMin = 0.5

◆ kMaxLigature

const int tesseract::kMaxLigature = 0xfb17

◆ kMaxLineLength

const int tesseract::kMaxLineLength = 1024

◆ kMaxLineResidue

const int tesseract::kMaxLineResidue = 6

◆ kMaxMediumOverlapsWithSmall

const int tesseract::kMaxMediumOverlapsWithSmall = 12

◆ kMaxMediumWordUnichars

const int tesseract::kMaxMediumWordUnichars = 6

static

◆ kMaxNeighbourDistFactor

const int tesseract::kMaxNeighbourDistFactor = 4

◆ kMaxNonLineDensity

const double tesseract::kMaxNonLineDensity = 0.25

◆ kMaxNumTessdataEntries

const int tesseract::kMaxNumTessdataEntries = 1000

static

TessdataType could be updated to contain more entries, however we do not expect that number to be astronomically high. In order to automatically detect endianness TessdataManager will flip the bits if actual_tessdata_num_entries_ is larger than kMaxNumTessdataEntries.

◆ kMaxOffsetDist

const int tesseract::kMaxOffsetDist = 32

◆ kMaxPadFactor

const int tesseract::kMaxPadFactor = 6

◆ kMaxParagraphEndingLeftSpaceMultiple

const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0

◆ kMaxPartitionSpacing

const double tesseract::kMaxPartitionSpacing = 1.75

◆ kMaxRaggedSearch

const int tesseract::kMaxRaggedSearch = 25

◆ kMaxRealDistance

const int tesseract::kMaxRealDistance = 2.0

◆ kMaxRectangularFraction

const double tesseract::kMaxRectangularFraction = 0.75

◆ kMaxRectangularGradient

const double tesseract::kMaxRectangularGradient = 0.1

◆ kMaxRMSColorNoise

const int tesseract::kMaxRMSColorNoise = 128

◆ kMaxRowSize

const double tesseract::kMaxRowSize = 2.5

◆ kMaxSameBlockLineSpacing

const double tesseract::kMaxSameBlockLineSpacing = 3

◆ kMaxSizeRatio

const double tesseract::kMaxSizeRatio = 1.5

◆ kMaxSkewFactor

const int tesseract::kMaxSkewFactor = 15

◆ kMaxSmallNeighboursPerPix

const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32

◆ kMaxSmallWordUnichars

const int tesseract::kMaxSmallWordUnichars = 3

static

◆ kMaxSpacingDrift

const double tesseract::kMaxSpacingDrift = 1.0 / 72

◆ kMaxStaveHeight

const double tesseract::kMaxStaveHeight = 1.0

◆ kMaxTableCellXheight

const double tesseract::kMaxTableCellXheight = 2.0

◆ kMaxTopSpacingFraction

const double tesseract::kMaxTopSpacingFraction = 0.25

◆ kMaxUnicharsPerCluster

const int tesseract::kMaxUnicharsPerCluster = 2000

◆ kMaxVerticalSearch

const int tesseract::kMaxVerticalSearch = 12

◆ kMaxVerticalSpacing

const int tesseract::kMaxVerticalSpacing = 500

◆ kMaxWinSize

const int tesseract::kMaxWinSize = 2000

◆ kMaxXProjectionGapFactor

const double tesseract::kMaxXProjectionGapFactor = 2.0

◆ kMinAbsoluteGarbageAlphanumFrac

const float tesseract::kMinAbsoluteGarbageAlphanumFrac = 0.5f

static

◆ kMinAbsoluteGarbageWordLength

const int tesseract::kMinAbsoluteGarbageWordLength = 10

static

◆ kMinAlignedGutter

const double tesseract::kMinAlignedGutter = 0.25

◆ kMinAlignedTabs

const int tesseract::kMinAlignedTabs = 4

◆ kMinBaselineCoverage

const double tesseract::kMinBaselineCoverage = 0.5

◆ kMinBoxesInTextPartition

const int tesseract::kMinBoxesInTextPartition = 10

◆ kMinCaptionGapHeightRatio

const double tesseract::kMinCaptionGapHeightRatio = 0.5

◆ kMinCaptionGapRatio

const double tesseract::kMinCaptionGapRatio = 2.0

◆ kMinCertainty

const float tesseract::kMinCertainty = -20.0f

◆ kMinChainTextValue

const int tesseract::kMinChainTextValue = 3

◆ kMinClusteredShapes

const int tesseract::kMinClusteredShapes = 1

◆ kMinColorDifference

const int tesseract::kMinColorDifference = 16

◆ kMinColumnWidth

const int tesseract::kMinColumnWidth = 2.0 / 3

◆ kMinDiacriticSizeRatio

const double tesseract::kMinDiacriticSizeRatio = 1.0625

◆ kMinDivergenceRate

const double tesseract::kMinDivergenceRate = 50.0

◆ kMinEvaluatedTabs

const int tesseract::kMinEvaluatedTabs = 3

◆ kMinFilledArea

const double tesseract::kMinFilledArea = 0.35

◆ kMinFinalCost

const float tesseract::kMinFinalCost = 0.001f

static

◆ kMinFractionalLinesInColumn

const double tesseract::kMinFractionalLinesInColumn = 0.125

◆ kMinGoodTextPARatio

const double tesseract::kMinGoodTextPARatio = 1.5

◆ kMinGutterFraction

const double tesseract::kMinGutterFraction = 0.5

◆ kMinGutterWidthGrid

const double tesseract::kMinGutterWidthGrid = 0.5

◆ kMinImageFindSize

const int tesseract::kMinImageFindSize = 100

◆ kMinLeaderCount

const int tesseract::kMinLeaderCount = 5

◆ kMinLigature

const int tesseract::kMinLigature = 0xfb00

◆ kMinLineLengthFraction

const int tesseract::kMinLineLengthFraction = 4

Denominator of resolution makes min pixels to demand line lengths to be.

◆ kMinLinesInColumn

const int tesseract::kMinLinesInColumn = 10

◆ kMinMaxGapInTextPartition

const double tesseract::kMinMaxGapInTextPartition = 0.5

◆ kMinMusicPixelFraction

const double tesseract::kMinMusicPixelFraction = 0.75

◆ kMinOverlapWithTable

const double tesseract::kMinOverlapWithTable = 0.6

◆ kMinParagraphEndingTextToWhitespaceRatio

const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0

◆ kMinPointsForErrorCount

const int tesseract::kMinPointsForErrorCount = 16

◆ kMinProb

const float tesseract::kMinProb = exp(kMinCertainty)

◆ kMinRaggedGutter

const double tesseract::kMinRaggedGutter = 1.5

◆ kMinRaggedTabs

const int tesseract::kMinRaggedTabs = 5

◆ kMinRampSize

const int tesseract::kMinRampSize = 1000

◆ kMinRectangularFraction

const double tesseract::kMinRectangularFraction = 0.125

◆ kMinRectSize

const int tesseract::kMinRectSize = 10

Minimum sensible image size to be worth running tesseract.

◆ kMinRowsInTable

const int tesseract::kMinRowsInTable = 3

◆ kMinStallIterations

const int tesseract::kMinStallIterations = 10000

◆ kMinStartedErrorRate

const int tesseract::kMinStartedErrorRate = 75

◆ kMinStrongTextValue

const int tesseract::kMinStrongTextValue = 6

◆ kMinTabGradient

const double tesseract::kMinTabGradient = 4.0

◆ kMinThickLineWidth

const int tesseract::kMinThickLineWidth = 12

◆ kMinVerticalSearch

const int tesseract::kMinVerticalSearch = 3

◆ kMinWinSize

const int tesseract::kMinWinSize = 500

◆ kMostlyOneDirRatio

const int tesseract::kMostlyOneDirRatio = 3

◆ kNeighbourSearchFactor

const double tesseract::kNeighbourSearchFactor = 2.5

◆ kNodeContNames

const char* tesseract::kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"}

◆ kNoiseOverlapAreaFactor

const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512

◆ kNoiseOverlapGrowthFactor

const double tesseract::kNoiseOverlapGrowthFactor = 4.0

◆ kNoisePadding

const int tesseract::kNoisePadding = 4

◆ kNullChar

const char* tesseract::kNullChar = "<nul>"

◆ kNumAdjustmentIterations

const int tesseract::kNumAdjustmentIterations = 100

◆ kNumbersPerBlob

const int tesseract::kNumbersPerBlob = 5

The 5 numbers output for each box (the usual 4 and a page number.)

◆ kNumCNParams

const int tesseract::kNumCNParams = 4

static

◆ kNumEndPoints

const int tesseract::kNumEndPoints = 3

◆ kNumOffsetMaps

const int tesseract::kNumOffsetMaps = 2

static

◆ kNumPagesPerBatch

const int tesseract::kNumPagesPerBatch = 100

◆ kOldVarsFile

const char* tesseract::kOldVarsFile = "failed_vars.txt"

Temp file used for storing current parameters before applying retry values.

◆ kOriginalNoiseMultiple

const int tesseract::kOriginalNoiseMultiple = 8

◆ kParagraphEndingPreviousLineRatio

const double tesseract::kParagraphEndingPreviousLineRatio = 1.3

◆ kParamsTrainingFeatureTypeName

const char* const tesseract::kParamsTrainingFeatureTypeName[]

static

Initial value:

= {
    "PTRAIN_DIGITS_SHORT",             
    "PTRAIN_DIGITS_MED",               
    "PTRAIN_DIGITS_LONG",              
    "PTRAIN_NUM_SHORT",                
    "PTRAIN_NUM_MED",                  
    "PTRAIN_NUM_LONG",                 
    "PTRAIN_DOC_SHORT",                
    "PTRAIN_DOC_MED",                  
    "PTRAIN_DOC_LONG",                 
    "PTRAIN_DICT_SHORT",               
    "PTRAIN_DICT_MED",                 
    "PTRAIN_DICT_LONG",                
    "PTRAIN_FREQ_SHORT",               
    "PTRAIN_FREQ_MED",                 
    "PTRAIN_FREQ_LONG",                
    "PTRAIN_SHAPE_COST_PER_CHAR",      
    "PTRAIN_NGRAM_COST_PER_CHAR",      
    "PTRAIN_NUM_BAD_PUNC",             
    "PTRAIN_NUM_BAD_CASE",             
    "PTRAIN_XHEIGHT_CONSISTENCY",      
    "PTRAIN_NUM_BAD_CHAR_TYPE",        
    "PTRAIN_NUM_BAD_SPACING",          
    "PTRAIN_NUM_BAD_FONT",             
    "PTRAIN_RATING_PER_CHAR",          
}

◆ kPDF

const char * tesseract::kPDF = "\u202C"

◆ kPhotoOffsetFraction

const double tesseract::kPhotoOffsetFraction = 0.375

◆ kPrime1

const int tesseract::kPrime1 = 17

◆ kPrime2

const int tesseract::kPrime2 = 13

◆ kQuestionSymbol

const char tesseract::kQuestionSymbol[] = "?"

static

◆ kRadicalRadix

const int tesseract::kRadicalRadix = 29

◆ kRaggedFraction

const double tesseract::kRaggedFraction = 2.5

◆ kRaggedGapFraction

const double tesseract::kRaggedGapFraction = 1.0

◆ kRaggedGutterMultiple

const int tesseract::kRaggedGutterMultiple = 5

◆ kRandomizingCenter

const int tesseract::kRandomizingCenter = 128

◆ kRatingEpsilon

const double tesseract::kRatingEpsilon = 1.0 / 32

◆ kRatingPad

const int tesseract::kRatingPad = 4

static

◆ kRequiredColumns

const double tesseract::kRequiredColumns = 0.7

◆ kReverseIfHasRTL

const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"

◆ kRGBRMSColors

const int tesseract::kRGBRMSColors = 4

◆ kRLE

const char * tesseract::kRLE = "\u202A"

◆ kRLM

const char * tesseract::kRLM = "\u200F"

◆ kRMSFitScaling

const double tesseract::kRMSFitScaling = 8.0

◆ kRotationRange

const float tesseract::kRotationRange = 0.02f

◆ kRulingVerticalMargin

const int tesseract::kRulingVerticalMargin = 3

◆ kSaltnPepper

const int tesseract::kSaltnPepper = 5

◆ kSampleRandomSize

const int tesseract::kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2

static

◆ kSampleScaleSize

const int tesseract::kSampleScaleSize = 3

static

◆ kSampleYShiftSize

const int tesseract::kSampleYShiftSize = 5

static

◆ kScaleFactor

const double tesseract::kScaleFactor = 256.0

◆ kScoreScaleFactor

const float tesseract::kScoreScaleFactor = 100.0f

static

◆ kSeedBlobsCountTh

const int tesseract::kSeedBlobsCountTh = 10

◆ kSideSpaceMargin

const int tesseract::kSideSpaceMargin = 10

◆ kSimCertaintyOffset

const float tesseract::kSimCertaintyOffset = -10.0

static

◆ kSimCertaintyScale

const float tesseract::kSimCertaintyScale = -10.0

static

◆ kSimilarityFloor

const float tesseract::kSimilarityFloor = 100.0

static

◆ kSimilarRaggedDist

const int tesseract::kSimilarRaggedDist = 50

◆ kSimilarVectorDist

const int tesseract::kSimilarVectorDist = 10

◆ ksizeofUniversalAmbigsFile

const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

◆ kSizeRatioToReject

const float tesseract::kSizeRatioToReject = 2.0

◆ kSlashSymbol

const char tesseract::kSlashSymbol[] = "/"

static

◆ kSmallTableProjectionThreshold

const double tesseract::kSmallTableProjectionThreshold = 0.35

◆ kSmoothDecisionMargin

const int tesseract::kSmoothDecisionMargin = 4

◆ kSplitPartitionSize

const double tesseract::kSplitPartitionSize = 2.0

◆ kSquareLimit

const int tesseract::kSquareLimit = 25

◆ kStageTransitionThreshold

const double tesseract::kStageTransitionThreshold = 10.0

◆ kStateClip

const double tesseract::kStateClip = 100.0

◆ kStrokeWidthCJK

const double tesseract::kStrokeWidthCJK = 2.0

◆ kStrokeWidthConstantTolerance

const double tesseract::kStrokeWidthConstantTolerance = 2.0

◆ kStrokeWidthFractionalTolerance

const double tesseract::kStrokeWidthFractionalTolerance = 0.25

◆ kStrokeWidthFractionCJK

const double tesseract::kStrokeWidthFractionCJK = 0.25

◆ kStrokeWidthFractionTolerance

const double tesseract::kStrokeWidthFractionTolerance = 0.125

Allowed proportional change in stroke width to be the same font.

◆ kStrokeWidthTolerance

const double tesseract::kStrokeWidthTolerance = 1.5

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

◆ kSubTrainerMarginFraction

const double tesseract::kSubTrainerMarginFraction = 3.0 / 128

◆ kTableColumnThreshold

const double tesseract::kTableColumnThreshold = 3.0

◆ kTableSize

const int tesseract::kTableSize = 4096

◆ kTabRadiusFactor

const int tesseract::kTabRadiusFactor = 5

◆ kTargetXScale

const int tesseract::kTargetXScale = 5

◆ kTargetYScale

const int tesseract::kTargetYScale = 100

◆ kTessdataFileSuffixes

const char* const tesseract::kTessdataFileSuffixes[]

static

Initial value:

= {
    kLangConfigFileSuffix,        
    kUnicharsetFileSuffix,        
    kAmbigsFileSuffix,            
    kBuiltInTemplatesFileSuffix,  
    kBuiltInCutoffsFileSuffix,    
    kNormProtoFileSuffix,         
    kPuncDawgFileSuffix,          
    kSystemDawgFileSuffix,        
    kNumberDawgFileSuffix,        
    kFreqDawgFileSuffix,          
    kFixedLengthDawgsFileSuffix,  
    kCubeUnicharsetFileSuffix,    
    kCubeSystemDawgFileSuffix,    
    kShapeTableFileSuffix,        
    kBigramDawgFileSuffix,        
    kUnambigDawgFileSuffix,       
    kParamsModelFileSuffix,       
    kLSTMModelFileSuffix,         
    kLSTMPuncDawgFileSuffix,      
    kLSTMSystemDawgFileSuffix,    
    kLSTMNumberDawgFileSuffix,    
    kLSTMUnicharsetFileSuffix,    
    kLSTMRecoderFileSuffix,       
    kVersionFileSuffix,           
}

kTessdataFileSuffixes[i] indicates the file suffix for tessdata of type i (from TessdataType enum).

◆ kTesseractReject

const char tesseract::kTesseractReject = '~'

Character returned when Tesseract couldn't recognize as anything.

◆ kTestChar

const int tesseract::kTestChar = -1

◆ kThickLengthMultiple

const double tesseract::kThickLengthMultiple = 0.75

◆ kThinLineFraction

const int tesseract::kThinLineFraction = 20

Denominator of resolution makes max pixel width to allow thin lines.

◆ kTinyEnoughTextlineOverlapFraction

const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25

◆ kUnclearDensityTh

const float tesseract::kUnclearDensityTh = 0.25

◆ kUniChs

const int tesseract::kUniChs[]

Initial value:

= {
  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
}

Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.

◆ kUnigramAmbigsBufferSize

const int tesseract::kUnigramAmbigsBufferSize = 1000

static

◆ kUniversalAmbigsFile

const char tesseract::kUniversalAmbigsFile

◆ kUNLVReject

const char tesseract::kUNLVReject = '~'

Character used by UNLV error counter as a reject.

◆ kUNLVSuspect

const char tesseract::kUNLVSuspect = '^'

Character used by UNLV as a suspect marker.

◆ kUTF8LineSeparator

const char * tesseract::kUTF8LineSeparator = "\u2028"

◆ kUTF8ParagraphSeparator

const char * tesseract::kUTF8ParagraphSeparator = "\u2029"

◆ kVerticalSpacing

const double tesseract::kVerticalSpacing = -0.2

◆ kVLineAlignment

const int tesseract::kVLineAlignment = 3

◆ kVLineGutter

const int tesseract::kVLineGutter = 1

◆ kVLineMinLength

const int tesseract::kVLineMinLength = 500

◆ kVLineSearchSize

const int tesseract::kVLineSearchSize = 150

◆ kWildcard

const char tesseract::kWildcard[] = "*"

static

◆ kWordJoinerUTF8

const char* tesseract::kWordJoinerUTF8 = "\u2060"

static

◆ kWorstDictCertainty

const float tesseract::kWorstDictCertainty = -25.0f

◆ kXWinFrameSize

const int tesseract::kXWinFrameSize = 30

◆ kYWinFrameSize

const int tesseract::kYWinFrameSize = 80

◆ LMPainPointsTypeName

const char* const tesseract::LMPainPointsTypeName[]

static

Initial value:

= {
    "LM_PPTYPE_BLAMER",
    "LM_PPTYPE_AMBIGS",
    "LM_PPTYPE_PATH",
    "LM_PPTYPE_SHAPE",
}

◆ LogisticTable

double tesseract::LogisticTable

◆ RTLReversePolicyNames

const char* const tesseract::RTLReversePolicyNames[]

Initial value:

= {
  kDoNotReverse,
  kReverseIfHasRTL,
  kForceReverse
}

◆ TanhTable

double tesseract::TanhTable

◆ tprintfMutex

CCUtilMutex tesseract::tprintfMutex

◆ XHeightConsistencyEnumName

const char* const tesseract::XHeightConsistencyEnumName[]

static

Initial value:

= {
    "XH_GOOD",
    "XH_SUBNORMAL",
    "XH_INCONSISTENT",
}

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Public Function Prototypes

Include Files and Type Defines

Typedef Documentation

◆ BlobGridSearch

◆ char32

◆ CheckPointReader

◆ CheckPointWriter

◆ ColPartitionGridSearch

◆ ColSegmentGrid

◆ ColSegmentGridSearch

◆ DawgVector

◆ DictFunc

◆ FileReader

◆ FileWriter

◆ FillLatticeFunc

◆ IntKDPair

◆ LanguageModelFlagsType

◆ LigHash

◆ NodeChildVector

◆ PainPointHeap

◆ ParamsModelClassifyFunc

◆ ParamsTrainingHypothesisList

◆ PartSetVector

◆ ProbabilityInContextFunc

◆ RecodeHeap

◆ RecodePair

◆ RSCounts

◆ RSMap

◆ SetOfModels

◆ ShapeQueue

◆ SuccessorList

◆ SuccessorListsVector

◆ TestCallback

◆ TruthCallback

◆ UnicharAmbigsVector

◆ UnicharIdVector

◆ WidthCallback

◆ WordGrid

◆ WordRecognizer

◆ WordSearch

Enumeration Type Documentation

◆ AmbigType

◆ CachingStrategy

◆ CharSegmentationType

◆ CMD_EVENTS

◆ ColSegType

◆ ColumnSpanningType

◆ CountTypes

◆ DawgType

◆ ErrorTypes

◆ FactorNames

◆ FlexDimensions

◆ GraphemeNorm

◆ GraphemeNormMode

◆ kParamsTrainingFeatureType

◆ LeftOrRight

◆ LineType

◆ LMPainPointsType

◆ LossType

◆ NeighbourPartitionType

◆ NetworkFlags

◆ NetworkType

◆ NodeContinuation

◆ NormalizationMode

◆ OcrEngineMode

◆ OCRNorm

◆ Orientation

◆ PageIteratorLevel

◆ PageSegMode

◆ ParagraphJustification

◆ PartitionFindResult

◆ ScriptPos

◆ SerializeAmount

◆ SetParamConstraint