29 #include "genericvector.h" 32 #include "unicharset.h" 41 enum BlobChoiceClassifier {
42 BCC_STATIC_CLASSIFIER,
43 BCC_ADAPTED_CLASSIFIER,
44 BCC_SPECKLE_CLASSIFIER,
73 BlobChoiceClassifier c);
97 int score1 = 0, score2 = 0;
101 if (
fonts_[f].score > score1) {
106 }
else if (
fonts_[f].score > score2) {
181 tprintf(
"r%.2f c%.2f x[%g,%g]: %d %s",
184 (unicharset ==
nullptr) ?
"" :
189 tprintf(
" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
239 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
276 static const char *permuter_name(uint8_t permuter);
279 : unicharset_(unicharset) { this->init(8); }
281 : unicharset_(unicharset) { this->init(reserved); }
283 const char *src_lengths,
286 uint8_t src_permuter,
288 : unicharset_(&unicharset) {
289 this->init(src_string, src_lengths, src_rating,
290 src_certainty, src_permuter);
294 :
ELIST_LINK(word), unicharset_(word.unicharset_) {
295 this->init(word.
length());
307 return adjust_factor_;
310 adjust_factor_ = factor;
316 assert(index < length_);
317 return unichar_ids_[index];
320 return state_[index];
323 if (index < 0 || index >= length_)
325 return script_pos_[index];
334 return certainties_[index];
337 return min_x_height_;
340 return max_x_height_;
343 min_x_height_ = min_height;
344 max_x_height_ = max_height;
349 const char *permuter_name()
const;
353 BLOB_CHOICE_LIST* blob_choices(
int index,
MATRIX* ratings)
const;
360 assert(index < length_);
364 return dangerous_ambig_found_;
367 dangerous_ambig_found_ = value;
382 ASSERT_HOST(reserved_ >= len);
390 reserved_, unichar_ids_);
392 reserved_, script_pos_);
396 reserved_, certainties_);
399 unichar_ids_ =
new UNICHAR_ID[1];
402 certainties_ =
new float[1];
409 inline void init(
int reserved) {
410 reserved_ = reserved;
412 unichar_ids_ =
new UNICHAR_ID[reserved];
414 state_ =
new int[reserved];
415 certainties_ =
new float[reserved];
417 unichar_ids_ =
nullptr;
418 script_pos_ =
nullptr;
420 certainties_ =
nullptr;
423 adjust_factor_ = 1.0f;
426 min_x_height_ = 0.0f;
427 max_x_height_ = FLT_MAX;
429 unichars_in_script_order_ =
false;
430 dangerous_ambig_found_ =
false;
438 void init(
const char *src_string,
const char *src_lengths,
439 float src_rating,
float src_certainty,
440 uint8_t src_permuter);
455 assert(reserved_ > length_);
458 rating, certainty, length_-1);
461 void append_unichar_id(UNICHAR_ID
unichar_id,
int blob_count,
465 float rating,
float certainty,
int index) {
466 assert(index < length_);
468 state_[index] = blob_count;
478 void set_blob_choice(
int index,
int blob_count,
481 bool contains_unichar_id(UNICHAR_ID unichar_id)
const;
482 void remove_unichar_ids(
int index,
int num);
485 this->remove_unichar_ids(index, 1);
487 bool has_rtl_unichar_id()
const;
488 void reverse_and_mirror_unichar_ids();
493 void punct_stripped(
int *start_core,
int *end_core)
const;
498 void GetNonSuperscriptSpan(
int *start,
int *end)
const;
502 WERD_CHOICE shallow_copy(
int start,
int end)
const;
504 void string_and_lengths(
STRING *word_str,
STRING *word_lengths_str)
const;
507 for (
int i = 0; i < length_; ++i) {
508 word_str += unicharset_->debug_str(unichar_ids_[i]);
515 for (
int i = 0; i < length_; ++i) {
516 if (!unicharset_->IsSpaceDelimited(unichar_ids_[i]))
return true;
522 for (
int i = 0; i < length_; ++i) {
523 if (unichar_ids_[i] != UNICHAR_SPACE)
return false;
532 return unichars_in_script_order_ = in_script_order;
536 return unichars_in_script_order_;
542 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
543 return unichar_string_;
549 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
550 return unichar_lengths_;
558 void SetScriptPositions(
bool small_caps,
TWERD* word,
int debug = 0);
566 const TBOX& blob_box,
567 UNICHAR_ID unichar_id);
572 int GetTopScriptID()
const;
575 void UpdateStateForSplit(
int blob_position);
578 int TotalOfStates()
const;
581 void print(
const char *msg)
const;
583 void print_state(
const char *msg)
const;
587 void DisplaySegmentation(
TWERD* word);
648 bool EqualIgnoringCaseAndTerminalPunct(
const WERD_CHOICE &word1,
652 void print_ratings_list(
654 BLOB_CHOICE_LIST *ratings,
BlobChoiceClassifier classifier() const
Definition: ratngs.h:133
float rating_
Definition: ratngs.h:620
void set_xgap_after(int16_t gap)
Definition: ratngs.h:164
float yshift_
Definition: ratngs.h:230
bool unichars_in_script_order() const
Definition: ratngs.h:535
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:95
float rating_
Definition: ratngs.h:214
UNICHAR_ID * unichar_ids_
Definition: ratngs.h:611
STRING unichar_lengths_
Definition: ratngs.h:639
const MATRIX_COORD & matrix_cell()
Definition: ratngs.h:115
int length_
Definition: ratngs.h:616
float yshift() const
Definition: ratngs.h:130
float certainty() const
Definition: ratngs.h:83
void set_rating(float newrat)
Definition: ratngs.h:148
int col
Definition: matrix.h:633
float min_xheight() const
Definition: ratngs.h:124
int16_t xgap_before_
Definition: ratngs.h:223
Definition: unicharset.h:146
const UNICHAR_ID * unichar_ids() const
Definition: ratngs.h:312
static T * double_the_size_memcpy(int current_size, T *data)
Definition: genericvector.h:207
bool IsClassified() const
Definition: ratngs.h:139
BlobChoiceClassifier classifier_
Definition: ratngs.h:231
const UNICHARSET * unicharset() const
Definition: ratngs.h:300
bool IsAllSpaces() const
Definition: ratngs.h:521
GenericVector< tesseract::ScoredFont > fonts_
Definition: ratngs.h:205
float * certainties_
Definition: ratngs.h:614
int script_id() const
Definition: ratngs.h:112
void set_dangerous_ambig_found_(bool value)
Definition: ratngs.h:366
tesseract::ScriptPos * script_pos_
Definition: ratngs.h:612
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
Definition: ratngs.h:170
float min_x_height() const
Definition: ratngs.h:336
Definition: baseapi.cpp:94
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:452
bool IsAdapted() const
Definition: ratngs.h:136
STRING unichar_string_
Definition: ratngs.h:638
float certainty(int index) const
Definition: ratngs.h:333
int script_id_
Definition: ratngs.h:219
UNICHAR_ID unichar_id_
Definition: ratngs.h:203
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
const STRING & unichar_lengths() const
Definition: ratngs.h:548
float rating() const
Definition: ratngs.h:80
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:200
WERD_CHOICE(const UNICHARSET *unicharset)
Definition: ratngs.h:278
uint8_t permuter() const
Definition: ratngs.h:346
float adjust_factor_
Definition: ratngs.h:618
int length() const
Definition: ratngs.h:303
const char * string() const
Definition: strngs.cpp:196
ScriptPos
Definition: ratngs.h:262
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:167
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
Definition: ratngs.h:280
bool dangerous_ambig_found() const
Definition: ratngs.h:363
bool ContainsAnyNonSpaceDelimited() const
Definition: ratngs.h:514
float certainty_
Definition: ratngs.h:218
void set_certainty(float new_val)
Definition: ratngs.h:372
int16_t fontinfo_id_
Definition: ratngs.h:206
void remove_last_unichar_id()
Definition: ratngs.h:483
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
void set_xgap_before(int16_t gap)
Definition: ratngs.h:161
float certainty_
Definition: ratngs.h:622
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:145
const UNICHARSET * unicharset_
Definition: ratngs.h:595
void print_full() const
Definition: ratngs.h:187
BLOB_CHOICE()
Definition: ratngs.h:52
void remove_unichar_id(int index)
Definition: ratngs.h:484
WERD_CHOICE(const WERD_CHOICE &word)
Definition: ratngs.h:293
const GenericVector< tesseract::ScoredFont > & fonts() const
Definition: ratngs.h:92
int row
Definition: matrix.h:634
void set_x_heights(float min_height, float max_height)
Definition: ratngs.h:342
void print(const UNICHARSET *unicharset) const
Definition: ratngs.h:180
const STRING debug_string() const
Definition: ratngs.h:505
int16_t fontinfo_id() const
Definition: ratngs.h:86
int size() const
Definition: genericvector.h:71
int16_t xgap_after() const
Definition: ratngs.h:121
float min_x_height_
Definition: ratngs.h:624
void set_script(int newscript_id)
Definition: ratngs.h:154
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:342
int16_t xgap_after_
Definition: ratngs.h:224
float max_xheight() const
Definition: ratngs.h:127
int * state_
Definition: ratngs.h:613
float max_xheight_
Definition: ratngs.h:227
void set_rating(float new_val)
Definition: ratngs.h:369
void init(int reserved)
Definition: ratngs.h:409
int16_t fontinfo_id2_
Definition: ratngs.h:207
void set_matrix_cell(int col, int row)
Definition: ratngs.h:157
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index)
Definition: ratngs.h:464
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition: ratngs.h:387
bool unichars_in_script_order_
Definition: ratngs.h:632
int16_t fontinfo_id2() const
Definition: ratngs.h:89
BLOB_CHOICE & operator=(const BLOB_CHOICE &other)
Definition: ratngs.cpp:133
void set_certainty(float newrat)
Definition: ratngs.h:151
void print() const
Definition: ratngs.h:580
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:443
bool dangerous_ambig_found_
Definition: ratngs.h:634
void set_length(int len)
Definition: ratngs.h:381
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
Definition: ratngs.h:282
int16_t xgap_before() const
Definition: ratngs.h:118
float adjust_factor() const
Definition: ratngs.h:306
float certainty() const
Definition: ratngs.h:330
int state(int index) const
Definition: ratngs.h:319
float max_x_height() const
Definition: ratngs.h:339
void set_adjust_factor(float factor)
Definition: ratngs.h:309
void set_permuter(uint8_t perm)
Definition: ratngs.h:375
float max_x_height_
Definition: ratngs.h:625
MATRIX_COORD matrix_cell_
Definition: ratngs.h:222
static const float kBadRating
Definition: ratngs.h:275
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:359
float rating() const
Definition: ratngs.h:327
float min_xheight_
Definition: ratngs.h:226
uint8_t permuter_
Definition: ratngs.h:626
static int SortByRating(const void *p1, const void *p2)
Definition: ratngs.h:193
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
Definition: ratngs.cpp:152
bool set_unichars_in_script_order(bool in_script_order)
Definition: ratngs.h:531
int reserved_
Definition: ratngs.h:615