#include <unicharset.h>

Collaboration diagram for UNICHARSET:

[legend]

Classes
struct	UNICHAR_PROPERTIES

struct	UNICHAR_SLOT

Public Types
enum	Direction { U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3, U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7, U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11, U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15, U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_CHAR_DIRECTION_COUNT }

Public Member Functions
	UNICHARSET ()

	~UNICHARSET ()

UNICHAR_ID	unichar_to_id (const char *const unichar_repr) const

UNICHAR_ID	unichar_to_id (const char *const unichar_repr, int length) const

int	step (const char *str) const

bool	encodable_string (const char str, int first_bad_position) const

bool	encode_string (const char str, bool give_up_on_failure, GenericVector< UNICHAR_ID > encoding, GenericVector< char > lengths, int encoded_length) const

const char *	id_to_unichar (UNICHAR_ID id) const

const char *	id_to_unichar_ext (UNICHAR_ID id) const

STRING	debug_str (UNICHAR_ID id) const

STRING	debug_str (const char *unichar_repr) const

void	unichar_insert (const char *const unichar_repr, OldUncleanUnichars old_style)

void	unichar_insert (const char *const unichar_repr)

void	unichar_insert_backwards_compatible (const char *const unichar_repr)

bool	contains_unichar_id (UNICHAR_ID unichar_id) const

bool	contains_unichar (const char *const unichar_repr) const

bool	contains_unichar (const char *const unichar_repr, int length) const

bool	eq (UNICHAR_ID unichar_id, const char *const unichar_repr) const

void	delete_pointers_in_unichars ()

void	clear ()

int	size () const

void	reserve (int unichars_number)

bool	save_to_file (const char *const filename) const

bool	save_to_file (FILE *file) const

bool	save_to_file (tesseract::TFile *file) const

bool	save_to_string (STRING *str) const

bool	load_from_inmemory_file (const char *const memory, int mem_size, bool skip_fragments)

bool	load_from_inmemory_file (const char *const memory, int mem_size)

bool	load_from_file (const char *const filename, bool skip_fragments)

bool	load_from_file (const char *const filename)

bool	load_from_file (FILE *file, bool skip_fragments)

bool	load_from_file (FILE *file)

bool	load_from_file (tesseract::TFile *file, bool skip_fragments)

void	post_load_setup ()

bool	major_right_to_left () const

void	set_black_and_whitelist (const char blacklist, const char whitelist, const char *unblacklist)

void	set_isalpha (UNICHAR_ID unichar_id, bool value)

void	set_islower (UNICHAR_ID unichar_id, bool value)

void	set_isupper (UNICHAR_ID unichar_id, bool value)

void	set_isdigit (UNICHAR_ID unichar_id, bool value)

void	set_ispunctuation (UNICHAR_ID unichar_id, bool value)

void	set_isngram (UNICHAR_ID unichar_id, bool value)

void	set_script (UNICHAR_ID unichar_id, const char *value)

void	set_other_case (UNICHAR_ID unichar_id, UNICHAR_ID other_case)

void	set_direction (UNICHAR_ID unichar_id, UNICHARSET::Direction value)

void	set_mirror (UNICHAR_ID unichar_id, UNICHAR_ID mirror)

void	set_normed (UNICHAR_ID unichar_id, const char *normed)

void	set_normed_ids (UNICHAR_ID unichar_id)

bool	get_isalpha (UNICHAR_ID unichar_id) const

bool	get_islower (UNICHAR_ID unichar_id) const

bool	get_isupper (UNICHAR_ID unichar_id) const

bool	get_isdigit (UNICHAR_ID unichar_id) const

bool	get_ispunctuation (UNICHAR_ID unichar_id) const

bool	get_isngram (UNICHAR_ID unichar_id) const

bool	get_isprivate (UNICHAR_ID unichar_id) const

bool	top_bottom_useful () const

void	set_ranges_empty ()

void	SetPropertiesFromOther (const UNICHARSET &src)

void	PartialSetPropertiesFromOther (int start_index, const UNICHARSET &src)

void	ExpandRangesFromOther (const UNICHARSET &src)

void	CopyFrom (const UNICHARSET &src)

void	AppendOtherUnicharset (const UNICHARSET &src)

bool	SizesDistinct (UNICHAR_ID id1, UNICHAR_ID id2) const

void	get_top_bottom (UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top) const

void	set_top_bottom (UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top)

void	get_width_stats (UNICHAR_ID unichar_id, float width, float width_sd) const

void	set_width_stats (UNICHAR_ID unichar_id, float width, float width_sd)

void	get_bearing_stats (UNICHAR_ID unichar_id, float bearing, float bearing_sd) const

void	set_bearing_stats (UNICHAR_ID unichar_id, float bearing, float bearing_sd)

void	get_advance_stats (UNICHAR_ID unichar_id, float advance, float advance_sd) const

void	set_advance_stats (UNICHAR_ID unichar_id, float advance, float advance_sd)

bool	PropertiesIncomplete (UNICHAR_ID unichar_id) const

bool	IsSpaceDelimited (UNICHAR_ID unichar_id) const

int	get_script (UNICHAR_ID unichar_id) const

unsigned int	get_properties (UNICHAR_ID unichar_id) const

char	get_chartype (UNICHAR_ID unichar_id) const

UNICHAR_ID	get_other_case (UNICHAR_ID unichar_id) const

Direction	get_direction (UNICHAR_ID unichar_id) const

UNICHAR_ID	get_mirror (UNICHAR_ID unichar_id) const

UNICHAR_ID	to_lower (UNICHAR_ID unichar_id) const

UNICHAR_ID	to_upper (UNICHAR_ID unichar_id) const

bool	has_special_codes () const

bool	AnyRepeatedUnicodes () const

const CHAR_FRAGMENT *	get_fragment (UNICHAR_ID unichar_id) const

bool	get_isalpha (const char *const unichar_repr) const

bool	get_islower (const char *const unichar_repr) const

bool	get_isupper (const char *const unichar_repr) const

bool	get_isdigit (const char *const unichar_repr) const

bool	get_ispunctuation (const char *const unichar_repr) const

unsigned int	get_properties (const char *const unichar_repr) const

char	get_chartype (const char *const unichar_repr) const

int	get_script (const char *const unichar_repr) const

const CHAR_FRAGMENT *	get_fragment (const char *const unichar_repr) const

bool	get_isalpha (const char *const unichar_repr, int length) const

bool	get_islower (const char *const unichar_repr, int length) const

bool	get_isupper (const char *const unichar_repr, int length) const

bool	get_isdigit (const char *const unichar_repr, int length) const

bool	get_ispunctuation (const char *const unichar_repr, int length) const

const char *	get_normed_unichar (UNICHAR_ID unichar_id) const

const GenericVector< UNICHAR_ID > &	normed_ids (UNICHAR_ID unichar_id) const

int	get_script (const char *const unichar_repr, int length) const

int	get_script_table_size () const

const char *	get_script_from_script_id (int id) const

int	get_script_id_from_name (const char *script_name) const

bool	is_null_script (const char *script) const

int	add_script (const char *script)

bool	get_enabled (UNICHAR_ID unichar_id) const

int	null_sid () const

int	common_sid () const

int	latin_sid () const

int	cyrillic_sid () const

int	greek_sid () const

int	han_sid () const

int	hiragana_sid () const

int	katakana_sid () const

int	thai_sid () const

int	hangul_sid () const

int	default_sid () const

bool	script_has_upper_lower () const

bool	script_has_xheight () const

Static Public Member Functions
static STRING	debug_utf8_str (const char *str)

static std::string	CleanupString (const char *utf8_str)

static std::string	CleanupString (const char *utf8_str, size_t length)

Static Public Attributes
static const char *	kCustomLigatures [][2]

static const char *	kSpecialUnicharCodes [SPECIAL_UNICHAR_CODES_COUNT]

Private Member Functions
void	encode_string (const char str, int str_index, int str_length, GenericVector< UNICHAR_ID > encoding, GenericVector< char > lengths, int best_total_length, GenericVector< UNICHAR_ID > best_encoding, GenericVector< char > best_lengths) const

bool	GetStrProperties (const char utf8_str, UNICHAR_PROPERTIES props) const

bool	load_via_fgets (TessResultCallback2< char , char , int > *fgets_cb, bool skip_fragments)

Private Attributes
UNICHAR_SLOT *	unichars

UNICHARMAP	ids

int	size_used

int	size_reserved

char **	script_table

int	script_table_size_used

int	script_table_size_reserved

bool	top_bottom_set_

bool	script_has_upper_lower_

bool	script_has_xheight_

bool	old_style_included_

int	null_sid_

int	common_sid_

int	latin_sid_

int	cyrillic_sid_

int	greek_sid_

int	han_sid_

int	hiragana_sid_

int	katakana_sid_

int	thai_sid_

int	hangul_sid_

int	default_sid_

Static Private Attributes
static const char *	kCleanupMaps [][2]

static const char *	null_script = "NULL"

Member Enumeration Documentation

◆ Direction

enum UNICHARSET::Direction

Enumerator
U_LEFT_TO_RIGHT
U_RIGHT_TO_LEFT
U_EUROPEAN_NUMBER
U_EUROPEAN_NUMBER_SEPARATOR
U_EUROPEAN_NUMBER_TERMINATOR
U_ARABIC_NUMBER
U_COMMON_NUMBER_SEPARATOR
U_BLOCK_SEPARATOR
U_SEGMENT_SEPARATOR
U_WHITE_SPACE_NEUTRAL
U_OTHER_NEUTRAL
U_LEFT_TO_RIGHT_EMBEDDING
U_LEFT_TO_RIGHT_OVERRIDE
U_RIGHT_TO_LEFT_ARABIC
U_RIGHT_TO_LEFT_EMBEDDING
U_RIGHT_TO_LEFT_OVERRIDE
U_POP_DIRECTIONAL_FORMAT
U_DIR_NON_SPACING_MARK
U_BOUNDARY_NEUTRAL
U_CHAR_DIRECTION_COUNT

Constructor & Destructor Documentation

◆ UNICHARSET()

UNICHARSET::UNICHARSET ( )

◆ ~UNICHARSET()

UNICHARSET::~UNICHARSET ( )

Member Function Documentation

◆ add_script()

int UNICHARSET::add_script ( const char * script )

◆ AnyRepeatedUnicodes()

bool UNICHARSET::AnyRepeatedUnicodes ( ) const

◆ AppendOtherUnicharset()

void UNICHARSET::AppendOtherUnicharset ( const UNICHARSET & src )

◆ CleanupString() [1/2]

static std::string UNICHARSET::CleanupString ( const char * utf8_str )

inlinestatic

◆ CleanupString() [2/2]

std::string UNICHARSET::CleanupString	(	const char *	utf8_str,
		size_t	length
	)

static

◆ clear()

void UNICHARSET::clear ( )

inline

◆ common_sid()

int UNICHARSET::common_sid ( ) const

inline

◆ contains_unichar() [1/2]

bool UNICHARSET::contains_unichar ( const char *const unichar_repr ) const

◆ contains_unichar() [2/2]

bool UNICHARSET::contains_unichar	(	const char *const	unichar_repr,
		int	length
	)		const

◆ contains_unichar_id()

bool UNICHARSET::contains_unichar_id ( UNICHAR_ID unichar_id ) const

inline

◆ CopyFrom()

void UNICHARSET::CopyFrom ( const UNICHARSET & src )

◆ cyrillic_sid()

int UNICHARSET::cyrillic_sid ( ) const

inline

◆ debug_str() [1/2]

STRING UNICHARSET::debug_str ( UNICHAR_ID id ) const

◆ debug_str() [2/2]

STRING UNICHARSET::debug_str ( const char * unichar_repr ) const

inline

◆ debug_utf8_str()

STRING UNICHARSET::debug_utf8_str ( const char * str )

static

◆ default_sid()

int UNICHARSET::default_sid ( ) const

inline

◆ delete_pointers_in_unichars()

void UNICHARSET::delete_pointers_in_unichars ( )

inline

◆ encodable_string()

bool UNICHARSET::encodable_string	(	const char *	str,
		int *	first_bad_position
	)		const

◆ encode_string() [1/2]

bool UNICHARSET::encode_string	(	const char *	str,
		bool	give_up_on_failure,
		GenericVector< UNICHAR_ID > *	encoding,
		GenericVector< char > *	lengths,
		int *	encoded_length
	)		const

◆ encode_string() [2/2]

void UNICHARSET::encode_string	(	const char *	str,
		int	str_index,
		int	str_length,
		GenericVector< UNICHAR_ID > *	encoding,
		GenericVector< char > *	lengths,
		int *	best_total_length,
		GenericVector< UNICHAR_ID > *	best_encoding,
		GenericVector< char > *	best_lengths
	)		const

private

◆ eq()

bool UNICHARSET::eq	(	UNICHAR_ID	unichar_id,
		const char *const	unichar_repr
	)		const

◆ ExpandRangesFromOther()

void UNICHARSET::ExpandRangesFromOther ( const UNICHARSET & src )

◆ get_advance_stats()

void UNICHARSET::get_advance_stats	(	UNICHAR_ID	unichar_id,
		float *	advance,
		float *	advance_sd
	)		const

inline

◆ get_bearing_stats()

void UNICHARSET::get_bearing_stats	(	UNICHAR_ID	unichar_id,
		float *	bearing,
		float *	bearing_sd
	)		const

inline

◆ get_chartype() [1/2]

char UNICHARSET::get_chartype ( UNICHAR_ID unichar_id ) const

◆ get_chartype() [2/2]

char UNICHARSET::get_chartype ( const char *const unichar_repr ) const

inline

◆ get_direction()

Direction UNICHARSET::get_direction ( UNICHAR_ID unichar_id ) const

inline

◆ get_enabled()

bool UNICHARSET::get_enabled ( UNICHAR_ID unichar_id ) const

inline

◆ get_fragment() [1/2]

const CHAR_FRAGMENT* UNICHARSET::get_fragment ( UNICHAR_ID unichar_id ) const

inline

◆ get_fragment() [2/2]

const CHAR_FRAGMENT* UNICHARSET::get_fragment ( const char *const unichar_repr ) const

inline

◆ get_isalpha() [1/3]

bool UNICHARSET::get_isalpha ( UNICHAR_ID unichar_id ) const

inline

◆ get_isalpha() [2/3]

bool UNICHARSET::get_isalpha ( const char *const unichar_repr ) const

inline

◆ get_isalpha() [3/3]

bool UNICHARSET::get_isalpha	(	const char *const	unichar_repr,
		int	length
	)		const

inline

◆ get_isdigit() [1/3]

bool UNICHARSET::get_isdigit ( UNICHAR_ID unichar_id ) const

inline

◆ get_isdigit() [2/3]

bool UNICHARSET::get_isdigit ( const char *const unichar_repr ) const

inline

◆ get_isdigit() [3/3]

bool UNICHARSET::get_isdigit	(	const char *const	unichar_repr,
		int	length
	)		const

inline

◆ get_islower() [1/3]

bool UNICHARSET::get_islower ( UNICHAR_ID unichar_id ) const

inline

◆ get_islower() [2/3]

bool UNICHARSET::get_islower ( const char *const unichar_repr ) const

inline

◆ get_islower() [3/3]

bool UNICHARSET::get_islower	(	const char *const	unichar_repr,
		int	length
	)		const

inline

◆ get_isngram()

bool UNICHARSET::get_isngram ( UNICHAR_ID unichar_id ) const

inline

◆ get_isprivate()

bool UNICHARSET::get_isprivate ( UNICHAR_ID unichar_id ) const

◆ get_ispunctuation() [1/3]

bool UNICHARSET::get_ispunctuation ( UNICHAR_ID unichar_id ) const

inline

◆ get_ispunctuation() [2/3]

bool UNICHARSET::get_ispunctuation ( const char *const unichar_repr ) const

inline

◆ get_ispunctuation() [3/3]

bool UNICHARSET::get_ispunctuation	(	const char *const	unichar_repr,
		int	length
	)		const

inline

◆ get_isupper() [1/3]

bool UNICHARSET::get_isupper ( UNICHAR_ID unichar_id ) const

inline

◆ get_isupper() [2/3]

bool UNICHARSET::get_isupper ( const char *const unichar_repr ) const

inline

◆ get_isupper() [3/3]

bool UNICHARSET::get_isupper	(	const char *const	unichar_repr,
		int	length
	)		const

inline

◆ get_mirror()

UNICHAR_ID UNICHARSET::get_mirror ( UNICHAR_ID unichar_id ) const

inline

◆ get_normed_unichar()

const char* UNICHARSET::get_normed_unichar ( UNICHAR_ID unichar_id ) const

inline

◆ get_other_case()

UNICHAR_ID UNICHARSET::get_other_case ( UNICHAR_ID unichar_id ) const

inline

◆ get_properties() [1/2]

unsigned int UNICHARSET::get_properties ( UNICHAR_ID unichar_id ) const

◆ get_properties() [2/2]

unsigned int UNICHARSET::get_properties ( const char *const unichar_repr ) const

inline

◆ get_script() [1/3]

int UNICHARSET::get_script ( UNICHAR_ID unichar_id ) const

inline

◆ get_script() [2/3]

int UNICHARSET::get_script ( const char *const unichar_repr ) const

inline

◆ get_script() [3/3]

int UNICHARSET::get_script	(	const char *const	unichar_repr,
		int	length
	)		const

inline

◆ get_script_from_script_id()

const char* UNICHARSET::get_script_from_script_id ( int id ) const

inline

◆ get_script_id_from_name()

int UNICHARSET::get_script_id_from_name ( const char * script_name ) const

◆ get_script_table_size()

int UNICHARSET::get_script_table_size ( ) const

inline

◆ get_top_bottom()

void UNICHARSET::get_top_bottom	(	UNICHAR_ID	unichar_id,
		int *	min_bottom,
		int *	max_bottom,
		int *	min_top,
		int *	max_top
	)		const

inline

◆ get_width_stats()

void UNICHARSET::get_width_stats	(	UNICHAR_ID	unichar_id,
		float *	width,
		float *	width_sd
	)		const

inline

◆ GetStrProperties()

bool UNICHARSET::GetStrProperties	(	const char *	utf8_str,
		UNICHAR_PROPERTIES *	props
	)		const

private

◆ greek_sid()

int UNICHARSET::greek_sid ( ) const

inline

◆ han_sid()

int UNICHARSET::han_sid ( ) const

inline

◆ hangul_sid()

int UNICHARSET::hangul_sid ( ) const

inline

◆ has_special_codes()

bool UNICHARSET::has_special_codes ( ) const

inline

◆ hiragana_sid()

int UNICHARSET::hiragana_sid ( ) const

inline

◆ id_to_unichar()

const char * UNICHARSET::id_to_unichar ( UNICHAR_ID id ) const

◆ id_to_unichar_ext()

const char * UNICHARSET::id_to_unichar_ext ( UNICHAR_ID id ) const

◆ is_null_script()

bool UNICHARSET::is_null_script ( const char * script ) const

inline

◆ IsSpaceDelimited()

bool UNICHARSET::IsSpaceDelimited ( UNICHAR_ID unichar_id ) const

inline

◆ katakana_sid()

int UNICHARSET::katakana_sid ( ) const

inline

◆ latin_sid()

int UNICHARSET::latin_sid ( ) const

inline

◆ load_from_file() [1/5]

bool UNICHARSET::load_from_file	(	const char *const	filename,
		bool	skip_fragments
	)

inline

◆ load_from_file() [2/5]

bool UNICHARSET::load_from_file ( const char *const filename )

inline

◆ load_from_file() [3/5]

bool UNICHARSET::load_from_file	(	FILE *	file,
		bool	skip_fragments
	)

◆ load_from_file() [4/5]

bool UNICHARSET::load_from_file ( FILE * file )

inline

◆ load_from_file() [5/5]

bool UNICHARSET::load_from_file	(	tesseract::TFile *	file,
		bool	skip_fragments
	)

◆ load_from_inmemory_file() [1/2]

bool UNICHARSET::load_from_inmemory_file	(	const char *const	memory,
		int	mem_size,
		bool	skip_fragments
	)

◆ load_from_inmemory_file() [2/2]

bool UNICHARSET::load_from_inmemory_file	(	const char *const	memory,
		int	mem_size
	)

inline

◆ load_via_fgets()

bool UNICHARSET::load_via_fgets	(	TessResultCallback2< char , char , int > *	fgets_cb,
		bool	skip_fragments
	)

private

◆ major_right_to_left()

bool UNICHARSET::major_right_to_left ( ) const

◆ normed_ids()

const GenericVector<UNICHAR_ID>& UNICHARSET::normed_ids ( UNICHAR_ID unichar_id ) const

inline

◆ null_sid()

int UNICHARSET::null_sid ( ) const

inline

◆ PartialSetPropertiesFromOther()

void UNICHARSET::PartialSetPropertiesFromOther	(	int	start_index,
		const UNICHARSET &	src
	)

◆ post_load_setup()

void UNICHARSET::post_load_setup ( )

◆ PropertiesIncomplete()

bool UNICHARSET::PropertiesIncomplete ( UNICHAR_ID unichar_id ) const

inline

◆ reserve()

void UNICHARSET::reserve ( int unichars_number )

◆ save_to_file() [1/3]

bool UNICHARSET::save_to_file ( const char *const filename ) const

inline

◆ save_to_file() [2/3]

bool UNICHARSET::save_to_file ( FILE * file ) const

inline

◆ save_to_file() [3/3]

bool UNICHARSET::save_to_file ( tesseract::TFile * file ) const

inline

◆ save_to_string()

bool UNICHARSET::save_to_string ( STRING * str ) const

◆ script_has_upper_lower()

bool UNICHARSET::script_has_upper_lower ( ) const

inline

◆ script_has_xheight()

bool UNICHARSET::script_has_xheight ( ) const

inline

◆ set_advance_stats()

void UNICHARSET::set_advance_stats	(	UNICHAR_ID	unichar_id,
		float	advance,
		float	advance_sd
	)

inline

◆ set_bearing_stats()

void UNICHARSET::set_bearing_stats	(	UNICHAR_ID	unichar_id,
		float	bearing,
		float	bearing_sd
	)

inline

◆ set_black_and_whitelist()

void UNICHARSET::set_black_and_whitelist	(	const char *	blacklist,
		const char *	whitelist,
		const char *	unblacklist
	)

◆ set_direction()

void UNICHARSET::set_direction	(	UNICHAR_ID	unichar_id,
		UNICHARSET::Direction	value
	)

inline

◆ set_isalpha()

void UNICHARSET::set_isalpha	(	UNICHAR_ID	unichar_id,
		bool	value
	)

inline

◆ set_isdigit()

void UNICHARSET::set_isdigit	(	UNICHAR_ID	unichar_id,
		bool	value
	)

inline

◆ set_islower()

void UNICHARSET::set_islower	(	UNICHAR_ID	unichar_id,
		bool	value
	)

inline

◆ set_isngram()

void UNICHARSET::set_isngram	(	UNICHAR_ID	unichar_id,
		bool	value
	)

inline

◆ set_ispunctuation()

void UNICHARSET::set_ispunctuation	(	UNICHAR_ID	unichar_id,
		bool	value
	)

inline

◆ set_isupper()

void UNICHARSET::set_isupper	(	UNICHAR_ID	unichar_id,
		bool	value
	)

inline

◆ set_mirror()

void UNICHARSET::set_mirror	(	UNICHAR_ID	unichar_id,
		UNICHAR_ID	mirror
	)

inline

◆ set_normed()

void UNICHARSET::set_normed	(	UNICHAR_ID	unichar_id,
		const char *	normed
	)

inline

◆ set_normed_ids()

void UNICHARSET::set_normed_ids ( UNICHAR_ID unichar_id )

◆ set_other_case()

void UNICHARSET::set_other_case	(	UNICHAR_ID	unichar_id,
		UNICHAR_ID	other_case
	)

inline

◆ set_ranges_empty()

void UNICHARSET::set_ranges_empty ( )

◆ set_script()

void UNICHARSET::set_script	(	UNICHAR_ID	unichar_id,
		const char *	value
	)

inline

◆ set_top_bottom()

void UNICHARSET::set_top_bottom	(	UNICHAR_ID	unichar_id,
		int	min_bottom,
		int	max_bottom,
		int	min_top,
		int	max_top
	)

inline

◆ set_width_stats()

void UNICHARSET::set_width_stats	(	UNICHAR_ID	unichar_id,
		float	width,
		float	width_sd
	)

inline

◆ SetPropertiesFromOther()

void UNICHARSET::SetPropertiesFromOther ( const UNICHARSET & src )

inline

◆ size()

int UNICHARSET::size ( ) const

inline

◆ SizesDistinct()

bool UNICHARSET::SizesDistinct	(	UNICHAR_ID	id1,
		UNICHAR_ID	id2
	)		const

◆ step()

int UNICHARSET::step ( const char * str ) const

◆ thai_sid()

int UNICHARSET::thai_sid ( ) const

inline

◆ to_lower()

UNICHAR_ID UNICHARSET::to_lower ( UNICHAR_ID unichar_id ) const

inline

◆ to_upper()

UNICHAR_ID UNICHARSET::to_upper ( UNICHAR_ID unichar_id ) const

inline

◆ top_bottom_useful()

bool UNICHARSET::top_bottom_useful ( ) const

inline

◆ unichar_insert() [1/2]

void UNICHARSET::unichar_insert	(	const char *const	unichar_repr,
		OldUncleanUnichars	old_style
	)

◆ unichar_insert() [2/2]

void UNICHARSET::unichar_insert ( const char *const unichar_repr )

inline

◆ unichar_insert_backwards_compatible()

void UNICHARSET::unichar_insert_backwards_compatible ( const char *const unichar_repr )

inline

◆ unichar_to_id() [1/2]

UNICHAR_ID UNICHARSET::unichar_to_id ( const char *const unichar_repr ) const

◆ unichar_to_id() [2/2]

UNICHAR_ID UNICHARSET::unichar_to_id	(	const char *const	unichar_repr,
		int	length
	)		const

Member Data Documentation

◆ common_sid_

int UNICHARSET::common_sid_

private

◆ cyrillic_sid_

int UNICHARSET::cyrillic_sid_

private

◆ default_sid_

int UNICHARSET::default_sid_

private

◆ greek_sid_

int UNICHARSET::greek_sid_

private

◆ han_sid_

int UNICHARSET::han_sid_

private

◆ hangul_sid_

int UNICHARSET::hangul_sid_

private

◆ hiragana_sid_

int UNICHARSET::hiragana_sid_

private

◆ ids

UNICHARMAP UNICHARSET::ids

private

◆ katakana_sid_

int UNICHARSET::katakana_sid_

private

◆ kCleanupMaps

const char * UNICHARSET::kCleanupMaps

staticprivate

Initial value:

= {
    {"\u0640", ""},    
    {"\ufb01", "fi"},  
    {"\ufb02", "fl"},  
    {nullptr, nullptr}}

◆ kCustomLigatures

const char * UNICHARSET::kCustomLigatures

static

Initial value:

= {
  {"ct", "\uE003"},  
  {"ſh", "\uE006"},  
  {"ſi", "\uE007"},  
  {"ſl", "\uE008"},  
  {"ſſ", "\uE009"},  
  {nullptr, nullptr}
}

◆ kSpecialUnicharCodes

const char * UNICHARSET::kSpecialUnicharCodes

static

Initial value:

= {
    " ",
    "Joined",
    "|Broken|0|1"
}

◆ latin_sid_

int UNICHARSET::latin_sid_

private

◆ null_script

const char * UNICHARSET::null_script = "NULL"

staticprivate

◆ null_sid_

int UNICHARSET::null_sid_

private

◆ old_style_included_

bool UNICHARSET::old_style_included_

private

◆ script_has_upper_lower_

bool UNICHARSET::script_has_upper_lower_

private

◆ script_has_xheight_

bool UNICHARSET::script_has_xheight_

private

◆ script_table

char** UNICHARSET::script_table

private

◆ script_table_size_reserved

int UNICHARSET::script_table_size_reserved

private

◆ script_table_size_used

int UNICHARSET::script_table_size_used

private

◆ size_reserved

int UNICHARSET::size_reserved

private

◆ size_used

int UNICHARSET::size_used

private

◆ thai_sid_

int UNICHARSET::thai_sid_

private

◆ top_bottom_set_

bool UNICHARSET::top_bottom_set_

private

◆ unichars

UNICHAR_SLOT* UNICHARSET::unichars

private

The documentation for this class was generated from the following files:

/home/stephane/src/tesseract/src/ccutil/unicharset.h
/home/stephane/src/tesseract/src/ccutil/unicharset.cpp

Classes

Public Types

Public Member Functions

Static Public Member Functions

Static Public Attributes

Private Member Functions

Private Attributes

Static Private Attributes

Member Enumeration Documentation

◆ Direction

Constructor & Destructor Documentation

◆ UNICHARSET()

◆ ~UNICHARSET()

Member Function Documentation

◆ add_script()

◆ AnyRepeatedUnicodes()

◆ AppendOtherUnicharset()

◆ CleanupString() [1/2]

◆ CleanupString() [2/2]

◆ clear()

◆ common_sid()

◆ contains_unichar() [1/2]

◆ contains_unichar() [2/2]

◆ contains_unichar_id()

◆ CopyFrom()

◆ cyrillic_sid()

◆ debug_str() [1/2]

◆ debug_str() [2/2]

◆ debug_utf8_str()

◆ default_sid()

◆ delete_pointers_in_unichars()

◆ encodable_string()

◆ encode_string() [1/2]

◆ encode_string() [2/2]

◆ eq()

◆ ExpandRangesFromOther()

◆ get_advance_stats()

◆ get_bearing_stats()

◆ get_chartype() [1/2]

◆ get_chartype() [2/2]

◆ get_direction()

◆ get_enabled()

◆ get_fragment() [1/2]

◆ get_fragment() [2/2]

◆ get_isalpha() [1/3]

◆ get_isalpha() [2/3]

◆ get_isalpha() [3/3]

◆ get_isdigit() [1/3]

◆ get_isdigit() [2/3]

◆ get_isdigit() [3/3]

◆ get_islower() [1/3]

◆ get_islower() [2/3]

◆ get_islower() [3/3]

◆ get_isngram()

◆ get_isprivate()

◆ get_ispunctuation() [1/3]

◆ get_ispunctuation() [2/3]

◆ get_ispunctuation() [3/3]

◆ get_isupper() [1/3]

◆ get_isupper() [2/3]

◆ get_isupper() [3/3]

◆ get_mirror()

◆ get_normed_unichar()

◆ get_other_case()

◆ get_properties() [1/2]

◆ get_properties() [2/2]

◆ get_script() [1/3]

◆ get_script() [2/3]

◆ get_script() [3/3]

◆ get_script_from_script_id()

◆ get_script_id_from_name()

◆ get_script_table_size()

◆ get_top_bottom()

◆ get_width_stats()

◆ GetStrProperties()

◆ greek_sid()

◆ han_sid()

◆ hangul_sid()

◆ has_special_codes()

◆ hiragana_sid()