#include <resultiterator.h>

Inheritance diagram for tesseract::ResultIterator:

Collaboration diagram for tesseract::ResultIterator:

[legend]

Public Member Functions
virtual	~ResultIterator ()=default

virtual void	Begin ()

virtual bool	Next (PageIteratorLevel level)

virtual bool	IsAtBeginningOf (PageIteratorLevel level) const

virtual bool	IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const

int	BlanksBeforeWord () const

virtual char *	GetUTF8Text (PageIteratorLevel level) const

virtual std::vector< std::vector< std::pair< const char , float > > >	GetBestLSTMSymbolChoices () const

bool	ParagraphIsLtr () const

Public Member Functions inherited from tesseract::LTRResultIterator
	LTRResultIterator (PAGE_RES page_res, Tesseract tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)

virtual	~LTRResultIterator ()

char *	GetUTF8Text (PageIteratorLevel level) const

void	SetLineSeparator (const char *new_line)

void	SetParagraphSeparator (const char *new_para)

float	Confidence (PageIteratorLevel level) const

void	RowAttributes (float row_height, float descenders, float *ascenders) const

const char *	WordFontAttributes (bool is_bold, bool is_italic, bool is_underlined, bool is_monospace, bool is_serif, bool is_smallcaps, int pointsize, int font_id) const

const char *	WordRecognitionLanguage () const

StrongScriptDirection	WordDirection () const

bool	WordIsFromDictionary () const

int	BlanksBeforeWord () const

bool	WordIsNumeric () const

bool	HasBlamerInfo () const

const void *	GetParamsTrainingBundle () const

const char *	GetBlamerDebug () const

const char *	GetBlamerMisadaptionDebug () const

bool	HasTruthString () const

bool	EquivalentToTruth (const char *str) const

char *	WordTruthUTF8Text () const

char *	WordNormedUTF8Text () const

const char *	WordLattice (int *lattice_size) const

bool	SymbolIsSuperscript () const

bool	SymbolIsSubscript () const

bool	SymbolIsDropcap () const

Public Member Functions inherited from tesseract::PageIterator
	PageIterator (PAGE_RES page_res, Tesseract tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)

virtual	~PageIterator ()

	PageIterator (const PageIterator &src)

const PageIterator &	operator= (const PageIterator &src)

bool	PositionedAtSameWord (const PAGE_RES_IT *other) const

virtual void	RestartParagraph ()

bool	IsWithinFirstTextlineOfParagraph () const

virtual void	RestartRow ()

int	Cmp (const PageIterator &other) const

void	SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots)

bool	BoundingBox (PageIteratorLevel level, int left, int top, int right, int bottom) const

bool	BoundingBox (PageIteratorLevel level, const int padding, int left, int top, int right, int bottom) const

bool	BoundingBoxInternal (PageIteratorLevel level, int left, int top, int right, int bottom) const

bool	Empty (PageIteratorLevel level) const

PolyBlockType	BlockType () const

Pta *	BlockPolygon () const

Pix *	GetBinaryImage (PageIteratorLevel level) const

Pix *	GetImage (PageIteratorLevel level, int padding, Pix original_img, int left, int *top) const

bool	Baseline (PageIteratorLevel level, int x1, int y1, int x2, int y2) const

void	Orientation (tesseract::Orientation orientation, tesseract::WritingDirection writing_direction, tesseract::TextlineOrder textline_order, float deskew_angle) const

void	ParagraphInfo (tesseract::ParagraphJustification justification, bool is_list_item, bool is_crown, int first_line_indent) const

bool	SetWordBlamerBundle (BlamerBundle *blamer_bundle)

Static Public Member Functions
static ResultIterator *	StartOfParagraph (const LTRResultIterator &resit)

static void	CalculateTextlineOrder (bool paragraph_is_ltr, const GenericVector< StrongScriptDirection > &word_dirs, GenericVectorEqEq< int > *reading_order)

Static Public Attributes
static const int	kMinorRunStart = -1

static const int	kMinorRunEnd = -2

static const int	kComplexWord = -3

Protected Member Functions
TESS_LOCAL	ResultIterator (const LTRResultIterator &resit)

Protected Member Functions inherited from tesseract::PageIterator
TESS_LOCAL void	BeginWord (int offset)

Private Member Functions
bool	CurrentParagraphIsLtr () const

void	CalculateTextlineOrder (bool paragraph_is_ltr, const LTRResultIterator &resit, GenericVectorEqEq< int > *indices) const

void	CalculateTextlineOrder (bool paragraph_is_ltr, const LTRResultIterator &resit, GenericVector< StrongScriptDirection > ssd, GenericVectorEqEq< int > indices) const

int	LTRWordIndex () const

void	CalculateBlobOrder (GenericVector< int > *blob_indices) const

void	MoveToLogicalStartOfTextline ()

void	MoveToLogicalStartOfWord ()

bool	IsAtFinalSymbolOfWord () const

bool	IsAtFirstSymbolOfWord () const

void	AppendSuffixMarks (STRING *text) const

void	AppendUTF8WordText (STRING *text) const

void	IterateAndAppendUTF8TextlineText (STRING *text)

void	AppendUTF8ParagraphText (STRING *text) const

bool	BidiDebug (int min_level) const

Private Attributes
bool	current_paragraph_is_ltr_

bool	at_beginning_of_minor_run_

bool	in_minor_direction_

bool	preserve_interword_spaces_

Additional Inherited Members
Protected Attributes inherited from tesseract::LTRResultIterator
const char *	line_separator_

const char *	paragraph_separator_

Protected Attributes inherited from tesseract::PageIterator
PAGE_RES *	page_res_

Tesseract *	tesseract_

PAGE_RES_IT *	it_

WERD *	word_

int	word_length_

int	blob_index_

C_BLOB_IT *	cblob_it_

bool	include_upper_dots_

bool	include_lower_dots_

int	scale_

int	scaled_yres_

int	rect_left_

int	rect_top_

int	rect_width_

int	rect_height_

Constructor & Destructor Documentation

◆ ~ResultIterator()

virtual tesseract::ResultIterator::~ResultIterator ( )

virtualdefault

ResultIterator is copy constructible! The default copy constructor works just fine for us.

◆ ResultIterator()

tesseract::ResultIterator::ResultIterator ( const LTRResultIterator & resit )

explicitprotected

We presume the data associated with the given iterator will outlive us. NB: This is private because it does something that is non-obvious: it resets to the beginning of the paragraph instead of staying wherever resit might have pointed.

Member Function Documentation

◆ AppendSuffixMarks()

void tesseract::ResultIterator::AppendSuffixMarks ( STRING * text ) const

private

Append any extra marks that should be appended to this word when printed. Mostly, these are Unicode BiDi control characters.

◆ AppendUTF8ParagraphText()

void tesseract::ResultIterator::AppendUTF8ParagraphText ( STRING * text ) const

private

Appends the text of the current paragraph in reading order to the given buffer. Each textline is terminated in a single newline character, and the paragraph gets an extra newline at the end.

◆ AppendUTF8WordText()

void tesseract::ResultIterator::AppendUTF8WordText ( STRING * text ) const

private

Appends the current word in reading order to the given buffer.

◆ Begin()

void tesseract::ResultIterator::Begin ( )

virtual

Moves the iterator to point to the start of the page to begin an iteration.

Reimplemented from tesseract::PageIterator.

◆ BidiDebug()

bool tesseract::ResultIterator::BidiDebug ( int min_level ) const

private

Returns whether the bidi_debug flag is set to at least min_level.

◆ BlanksBeforeWord()

int tesseract::ResultIterator::BlanksBeforeWord ( ) const

◆ CalculateBlobOrder()

void tesseract::ResultIterator::CalculateBlobOrder ( GenericVector< int > * blob_indices ) const

private

Given an iterator pointing at a word, returns the logical reading order of blob indices for the word.

◆ CalculateTextlineOrder() [1/3]

void tesseract::ResultIterator::CalculateTextlineOrder	(	bool	paragraph_is_ltr,
		const GenericVector< StrongScriptDirection > &	word_dirs,
		GenericVectorEqEq< int > *	reading_order
	)

static

Yields the reading order as a sequence of indices and (optional) meta-marks for a set of words (given left-to-right). The meta marks are passed as negative values: kMinorRunStart Start of minor direction text. kMinorRunEnd End of minor direction text. kComplexWord The next indexed word contains both left-to-right and right-to-left characters and was treated as neutral.

For example, suppose we have five words in a text line, indexed [0,1,2,3,4] from the leftmost side of the text line. The following are all believable reading_orders:

Left-to-Right (in ltr paragraph): { 0, 1, 2, 3, 4 } Left-to-Right (in rtl paragraph): { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd } Right-to-Left (in rtl paragraph): { 4, 3, 2, 1, 0 } Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph: { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }

◆ CalculateTextlineOrder() [2/3]

void tesseract::ResultIterator::CalculateTextlineOrder	(	bool	paragraph_is_ltr,
		const LTRResultIterator &	resit,
		GenericVectorEqEq< int > *	indices
	)		const

private

Returns word indices as measured from resit->RestartRow() = index 0 for the reading order of words within a textline given an iterator into the middle of the text line. In addition to non-negative word indices, the following negative values may be inserted: kMinorRunStart Start of minor direction text. kMinorRunEnd End of minor direction text. kComplexWord The previous word contains both left-to-right and right-to-left characters and was treated as neutral.

◆ CalculateTextlineOrder() [3/3]

void tesseract::ResultIterator::CalculateTextlineOrder	(	bool	paragraph_is_ltr,
		const LTRResultIterator &	resit,
		GenericVector< StrongScriptDirection > *	ssd,
		GenericVectorEqEq< int > *	indices
	)		const

private

Same as above, but the caller's ssd gets filled in if ssd != nullptr.

◆ CurrentParagraphIsLtr()

bool tesseract::ResultIterator::CurrentParagraphIsLtr ( ) const

private

Calculates the current paragraph's dominant writing direction. Typically, members should use current_paragraph_ltr_ instead.

◆ GetBestLSTMSymbolChoices()

std::vector< std::vector< std::pair< const char *, float > > > * tesseract::ResultIterator::GetBestLSTMSymbolChoices ( ) const

virtual

Returns the LSTM choices for every LSTM timestep for the current word.

◆ GetUTF8Text()

char * tesseract::ResultIterator::GetUTF8Text ( PageIteratorLevel level ) const

virtual

Returns the null terminated UTF-8 encoded text string for the current object at the given level. Use delete [] to free after use.

◆ IsAtBeginningOf()

bool tesseract::ResultIterator::IsAtBeginningOf ( PageIteratorLevel level ) const

virtual

IsAtBeginningOf() returns whether we're at the logical beginning of the given level. (as opposed to ResultIterator's left-to-right top-to-bottom order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf(). For a full description, see pageiterator.h

Reimplemented from tesseract::PageIterator.

◆ IsAtFinalElement()

bool tesseract::ResultIterator::IsAtFinalElement	(	PageIteratorLevel	level,
		PageIteratorLevel	element
	)		const

virtual

Implement PageIterator's IsAtFinalElement correctly in a BiDi context. For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we point at the last word in a paragraph. See PageIterator for full comment.

NOTE! This is an exact copy of PageIterator::IsAtFinalElement with the change that the variable next is now a ResultIterator instead of a PageIterator.

Reimplemented from tesseract::PageIterator.

◆ IsAtFinalSymbolOfWord()

bool tesseract::ResultIterator::IsAtFinalSymbolOfWord ( ) const

private

Are we pointing at the final (reading order) symbol of the word?

◆ IsAtFirstSymbolOfWord()

bool tesseract::ResultIterator::IsAtFirstSymbolOfWord ( ) const

private

Are we pointing at the first (reading order) symbol of the word?

◆ IterateAndAppendUTF8TextlineText()

void tesseract::ResultIterator::IterateAndAppendUTF8TextlineText ( STRING * text )

private

Appends the text of the current text line, assuming this iterator is positioned at the beginning of the text line This function updates the iterator to point to the first position past the text line. Each textline is terminated in a single newline character. If the textline ends a paragraph, it gets a second terminal newline.

◆ LTRWordIndex()

int tesseract::ResultIterator::LTRWordIndex ( ) const

private

What is the index of the current word in a strict left-to-right reading of the row?

◆ MoveToLogicalStartOfTextline()

void tesseract::ResultIterator::MoveToLogicalStartOfTextline ( )

private

Precondition: current_paragraph_is_ltr_ is set.

◆ MoveToLogicalStartOfWord()

void tesseract::ResultIterator::MoveToLogicalStartOfWord ( )

private

Precondition: current_paragraph_is_ltr_ and in_minor_direction_ are set.

◆ Next()

bool tesseract::ResultIterator::Next ( PageIteratorLevel level )

virtual

Moves to the start of the next object at the given level in the page hierarchy in the appropriate reading order and returns false if the end of the page was reached. NOTE that RIL_SYMBOL will skip non-text blocks, but all other PageIteratorLevel level values will visit each non-text block once. Think of non text blocks as containing a single para, with a single line, with a single imaginary word. Calls to Next with different levels may be freely intermixed. This function iterates words in right-to-left scripts correctly, if the appropriate language has been loaded into Tesseract.

Reimplemented from tesseract::PageIterator.

◆ ParagraphIsLtr()

bool tesseract::ResultIterator::ParagraphIsLtr ( ) const

Return whether the current paragraph's dominant reading direction is left-to-right (as opposed to right-to-left).

◆ StartOfParagraph()

ResultIterator * tesseract::ResultIterator::StartOfParagraph ( const LTRResultIterator & resit )

static

Member Data Documentation

◆ at_beginning_of_minor_run_

bool tesseract::ResultIterator::at_beginning_of_minor_run_

private

Is the currently pointed-at character at the beginning of a minor-direction run?

◆ current_paragraph_is_ltr_

bool tesseract::ResultIterator::current_paragraph_is_ltr_

private

◆ in_minor_direction_

bool tesseract::ResultIterator::in_minor_direction_

private

Is the currently pointed-at character in a minor-direction sequence?

◆ kComplexWord

const int tesseract::ResultIterator::kComplexWord = -3

static

◆ kMinorRunEnd

const int tesseract::ResultIterator::kMinorRunEnd = -2

static

◆ kMinorRunStart

const int tesseract::ResultIterator::kMinorRunStart = -1

static

◆ preserve_interword_spaces_

bool tesseract::ResultIterator::preserve_interword_spaces_

private

Should detected inter-word spaces be preserved, or "compressed" to a single space character (default behavior).

The documentation for this class was generated from the following files:

/home/stephane/src/tesseract/src/ccmain/resultiterator.h
/home/stephane/src/tesseract/src/ccmain/resultiterator.cpp

Public Member Functions

Static Public Member Functions

Static Public Attributes

Protected Member Functions

Private Member Functions

Private Attributes

Additional Inherited Members

Constructor & Destructor Documentation

◆ ~ResultIterator()

◆ ResultIterator()

Member Function Documentation

◆ AppendSuffixMarks()

◆ AppendUTF8ParagraphText()

◆ AppendUTF8WordText()

◆ Begin()

◆ BidiDebug()

◆ BlanksBeforeWord()

◆ CalculateBlobOrder()

◆ CalculateTextlineOrder() [1/3]

◆ CalculateTextlineOrder() [2/3]

◆ CalculateTextlineOrder() [3/3]

◆ CurrentParagraphIsLtr()

◆ GetBestLSTMSymbolChoices()

◆ GetUTF8Text()

◆ IsAtBeginningOf()

◆ IsAtFinalElement()

◆ IsAtFinalSymbolOfWord()

◆ IsAtFirstSymbolOfWord()

◆ IterateAndAppendUTF8TextlineText()

◆ LTRWordIndex()

◆ MoveToLogicalStartOfTextline()

◆ MoveToLogicalStartOfWord()

◆ Next()

◆ ParagraphIsLtr()

◆ StartOfParagraph()

Member Data Documentation

◆ at_beginning_of_minor_run_

◆ current_paragraph_is_ltr_

◆ in_minor_direction_

◆ kComplexWord

◆ kMinorRunEnd

◆ kMinorRunStart

◆ preserve_interword_spaces_