tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
sorthelper.h
1 // File: sorthelper.h
3 // Description: Generic sort and maxfinding class.
4 // Author: Ray Smith
5 // Created: Thu May 20 17:48:21 PDT 2010
6 //
7 // (C) Copyright 2010, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_CCUTIL_SORTHELPER_H_
21 #define TESSERACT_CCUTIL_SORTHELPER_H_
22 
23 #include <cstdlib>
24 #include "genericvector.h"
25 
26 // Generic class to provide functions based on a <value,count> pair.
27 // T is the value type.
28 // The class keeps a count of each value and can return the most frequent
29 // value or a sorted array of the values with counts.
30 // Note that this class uses linear search for adding. It is better
31 // to use the STATS class to get the mode of a large number of values
32 // in a small space. SortHelper is better to get the mode of a small number
33 // of values from a large space.
34 // T must have a copy constructor.
35 template <typename T>
36 class SortHelper {
37  public:
38  // Simple pair class to hold the values and counts.
39  template<typename PairT> struct SortPair {
40  PairT value;
41  int count;
42  };
43  // qsort function to sort by decreasing count.
44  static int SortPairsByCount(const void* v1, const void* v2) {
45  const SortPair<T>* p1 = static_cast<const SortPair<T>*>(v1);
46  const SortPair<T>* p2 = static_cast<const SortPair<T>*>(v2);
47  return p2->count - p1->count;
48  }
49  // qsort function to sort by decreasing value.
50  static int SortPairsByValue(const void* v1, const void* v2) {
51  const SortPair<T>* p1 = static_cast<const SortPair<T>*>(v1);
52  const SortPair<T>* p2 = static_cast<const SortPair<T>*>(v2);
53  if (p2->value - p1->value < 0) return -1;
54  if (p2->value - p1->value > 0) return 1;
55  return 0;
56  }
57 
58  // Constructor takes a hint of the array size, but it need not be accurate.
59  explicit SortHelper(int sizehint) {
60  counts_.reserve(sizehint);
61  }
62 
63  // Add a value that may be a duplicate of an existing value.
64  // Uses a linear search.
65  void Add(T value, int count) {
66  // Linear search for value.
67  for (int i = 0; i < counts_.size(); ++i) {
68  if (counts_[i].value == value) {
69  counts_[i].count += count;
70  return;
71  }
72  }
73  SortPair<T> new_pair = {value, count};
74  counts_.push_back(SortPair<T>(new_pair));
75  }
76 
77  // Returns the frequency of the most frequent value.
78  // If max_value is not nullptr, returns the most frequent value.
79  // If the array is empty, returns -INT32_MAX and max_value is unchanged.
80  int MaxCount(T* max_value) const {
81  int best_count = -INT32_MAX;
82  for (int i = 0; i < counts_.size(); ++i) {
83  if (counts_[i].count > best_count) {
84  best_count = counts_[i].count;
85  if (max_value != nullptr)
86  *max_value = counts_[i].value;
87  }
88  }
89  return best_count;
90  }
91 
92  // Returns the data array sorted by decreasing frequency.
95  return counts_;
96  }
97  // Returns the data array sorted by decreasing value.
100  return counts_;
101  }
102 
103  private:
105 };
106 
107 
108 #endif // TESSERACT_CCUTIL_SORTHELPER_H_.
const GenericVector< SortPair< T > > & SortByValue()
Definition: sorthelper.h:98
int count
Definition: sorthelper.h:41
const GenericVector< SortPair< T > > & SortByCount()
Definition: sorthelper.h:93
SortHelper(int sizehint)
Definition: sorthelper.h:59
static int SortPairsByValue(const void *v1, const void *v2)
Definition: sorthelper.h:50
void Add(T value, int count)
Definition: sorthelper.h:65
GenericVector< SortPair< T > > counts_
Definition: sorthelper.h:104
Definition: baseapi.h:37
static int SortPairsByCount(const void *v1, const void *v2)
Definition: sorthelper.h:44
int MaxCount(T *max_value) const
Definition: sorthelper.h:80
Definition: sorthelper.h:39
Definition: sorthelper.h:36
PairT value
Definition: sorthelper.h:40