tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
genericheap.h
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: genericheap.h
5 // Description: Template heap class.
6 // Author: Ray Smith, based on Dan Johnson's original code.
7 // Created: Wed Mar 14 08:13:00 PDT 2012
8 //
9 // (C) Copyright 2012, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #ifndef TESSERACT_CCUTIL_GENERICHEAP_H_
23 #define TESSERACT_CCUTIL_GENERICHEAP_H_
24 
25 #include "errcode.h"
26 #include "genericvector.h"
27 
28 namespace tesseract {
29 
30 // GenericHeap requires 1 template argument:
31 // Pair will normally be either KDPairInc<Key, Data> or KDPairDec<Key, Data>
32 // for some arbitrary Key and scalar, smart pointer, or non-ownership pointer
33 // Data type, according to whether a MIN heap or a MAX heap is desired,
34 // respectively. Using KDPtrPairInc<Key, Data> or KDPtrPairDec<Key, Data>,
35 // GenericHeap can also handle simple Data pointers and own them.
36 // If no additional data is required, Pair can also be a scalar, since
37 // GenericHeap doesn't look inside it except for operator<.
38 //
39 // The heap is stored as a packed binary tree in an array hosted by a
40 // GenericVector<Pair>, with the invariant that the children of each node are
41 // both NOT Pair::operator< the parent node. KDPairInc defines Pair::operator<
42 // to use Key::operator< to generate a MIN heap and KDPairDec defines
43 // Pair::operator< to use Key::operator> to generate a MAX heap by reversing
44 // all the comparisons.
45 // See http://en.wikipedia.org/wiki/Heap_(data_structure) for more detail on
46 // the basic heap implementation.
47 //
48 // Insertion and removal are both O(log n) and, unlike the STL heap, an
49 // explicit Reshuffle function allows a node to be repositioned in time O(log n)
50 // after changing its value.
51 //
52 // Accessing the element for revaluation is a more complex matter, since the
53 // index and pointer can be changed arbitrarily by heap operations.
54 // Revaluation can be done by making the Data type in the Pair derived from or
55 // contain a DoublePtr as its first data element, making it possible to convert
56 // the pointer to a Pair using KDPairInc::RecastDataPointer.
57 template <typename Pair>
58 class GenericHeap {
59  public:
60  GenericHeap() = default;
61  // The initial size is only a GenericVector::reserve. It is not enforced as
62  // the size limit of the heap. Caller must implement their own enforcement.
63  explicit GenericHeap(int initial_size) {
64  heap_.reserve(initial_size);
65  }
66 
67  // Simple accessors.
68  bool empty() const {
69  return heap_.empty();
70  }
71  int size() const {
72  return heap_.size();
73  }
74  int size_reserved() const {
75  return heap_.size_reserved();
76  }
77  void clear() {
78  // Clear truncates to 0 to keep the number reserved in tact.
79  heap_.truncate(0);
80  }
81  // Provides access to the underlying vector.
82  // Caution! any changes that modify the keys will invalidate the heap!
84  return &heap_;
85  }
86  // Provides read-only access to an element of the underlying vector.
87  const Pair& get(int index) const {
88  return heap_[index];
89  }
90 
91  // Add entry to the heap, keeping the smallest item at the top, by operator<.
92  // Note that *entry is used as the source of operator=, but it is non-const
93  // to allow for a smart pointer to be contained within.
94  // Time = O(log n).
95  void Push(Pair* entry) {
96  int hole_index = heap_.size();
97  // Make a hole in the end of heap_ and sift it up to be the correct
98  // location for the new *entry. To avoid needing a default constructor
99  // for primitive types, and to allow for use of DoublePtr in the Pair
100  // somewhere, we have to incur a double copy here.
101  heap_.push_back(*entry);
102  *entry = heap_.back();
103  hole_index = SiftUp(hole_index, *entry);
104  heap_[hole_index] = *entry;
105  }
106 
107  // Get the value of the top (smallest, defined by operator< ) element.
108  const Pair& PeekTop() const {
109  return heap_[0];
110  }
111  // Get the value of the worst (largest, defined by operator< ) element.
112  const Pair& PeekWorst() const { return heap_[IndexOfWorst()]; }
113 
114  // Removes the top element of the heap. If entry is not nullptr, the element
115  // is copied into *entry, otherwise it is discarded.
116  // Returns false if the heap was already empty.
117  // Time = O(log n).
118  bool Pop(Pair* entry) {
119  int new_size = heap_.size() - 1;
120  if (new_size < 0)
121  return false; // Already empty.
122  if (entry != nullptr)
123  *entry = heap_[0];
124  if (new_size > 0) {
125  // Sift the hole at the start of the heap_ downwards to match the last
126  // element.
127  Pair hole_pair = heap_[new_size];
128  heap_.truncate(new_size);
129  int hole_index = SiftDown(0, hole_pair);
130  heap_[hole_index] = hole_pair;
131  } else {
132  heap_.truncate(new_size);
133  }
134  return true;
135  }
136 
137  // Removes the MAXIMUM element of the heap. (MIN from a MAX heap.) If entry is
138  // not nullptr, the element is copied into *entry, otherwise it is discarded.
139  // Time = O(n). Returns false if the heap was already empty.
140  bool PopWorst(Pair* entry) {
141  int worst_index = IndexOfWorst();
142  if (worst_index < 0) return false; // It cannot be empty!
143  // Extract the worst element from the heap, leaving a hole at worst_index.
144  if (entry != nullptr)
145  *entry = heap_[worst_index];
146  int heap_size = heap_.size() - 1;
147  if (heap_size > 0) {
148  // Sift the hole upwards to match the last element of the heap_
149  Pair hole_pair = heap_[heap_size];
150  int hole_index = SiftUp(worst_index, hole_pair);
151  heap_[hole_index] = hole_pair;
152  }
153  heap_.truncate(heap_size);
154  return true;
155  }
156 
157  // Returns the index of the worst element. Time = O(n/2).
158  int IndexOfWorst() const {
159  int heap_size = heap_.size();
160  if (heap_size == 0) return -1; // It cannot be empty!
161 
162  // Find the maximum element. Its index is guaranteed to be greater than
163  // the index of the parent of the last element, since by the heap invariant
164  // the parent must be less than or equal to the children.
165  int worst_index = heap_size - 1;
166  int end_parent = ParentNode(worst_index);
167  for (int i = worst_index - 1; i > end_parent; --i) {
168  if (heap_[worst_index] < heap_[i]) worst_index = i;
169  }
170  return worst_index;
171  }
172 
173  // The pointed-to Pair has changed its key value, so the location of pair
174  // is reshuffled to maintain the heap invariant.
175  // Must be a valid pointer to an element of the heap_!
176  // Caution! Since GenericHeap is based on GenericVector, reallocs may occur
177  // whenever the vector is extended and elements may get shuffled by any
178  // Push or Pop operation. Therefore use this function only if Data in Pair is
179  // of type DoublePtr, derived (first) from DoublePtr, or has a DoublePtr as
180  // its first element. Reshuffles the heap to maintain the invariant.
181  // Time = O(log n).
182  void Reshuffle(Pair* pair) {
183  int index = pair - &heap_[0];
184  Pair hole_pair = heap_[index];
185  index = SiftDown(index, hole_pair);
186  index = SiftUp(index, hole_pair);
187  heap_[index] = hole_pair;
188  }
189 
190  private:
191  // A hole in the heap exists at hole_index, and we want to fill it with the
192  // given pair. SiftUp sifts the hole upward to the correct position and
193  // returns the destination index without actually putting pair there.
194  int SiftUp(int hole_index, const Pair& pair) {
195  int parent;
196  while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) {
197  heap_[hole_index] = heap_[parent];
198  hole_index = parent;
199  }
200  return hole_index;
201  }
202 
203  // A hole in the heap exists at hole_index, and we want to fill it with the
204  // given pair. SiftDown sifts the hole downward to the correct position and
205  // returns the destination index without actually putting pair there.
206  int SiftDown(int hole_index, const Pair& pair) {
207  int heap_size = heap_.size();
208  int child;
209  while ((child = LeftChild(hole_index)) < heap_size) {
210  if (child + 1 < heap_size && heap_[child + 1] < heap_[child])
211  ++child;
212  if (heap_[child] < pair) {
213  heap_[hole_index] = heap_[child];
214  hole_index = child;
215  } else {
216  break;
217  }
218  }
219  return hole_index;
220  }
221 
222  // Functions to navigate the tree. Unlike the original implementation, we
223  // store the root at index 0.
224  int ParentNode(int index) const {
225  return (index + 1) / 2 - 1;
226  }
227  int LeftChild(int index) const {
228  return index * 2 + 1;
229  }
230 
231  private:
233 };
234 
235 } // namespace tesseract
236 
237 #endif // TESSERACT_CCUTIL_GENERICHEAP_H_
Definition: genericheap.h:58
bool PopWorst(Pair *entry)
Definition: genericheap.h:140
void Reshuffle(Pair *pair)
Definition: genericheap.h:182
int SiftUp(int hole_index, const Pair &pair)
Definition: genericheap.h:194
int LeftChild(int index) const
Definition: genericheap.h:227
const Pair & PeekWorst() const
Definition: genericheap.h:112
int push_back(T object)
Definition: genericvector.h:799
int size_reserved() const
Definition: genericheap.h:74
void truncate(int size)
Definition: genericvector.h:136
Definition: baseapi.cpp:94
void clear()
Definition: genericheap.h:77
const Pair & PeekTop() const
Definition: genericheap.h:108
void Push(Pair *entry)
Definition: genericheap.h:95
int SiftDown(int hole_index, const Pair &pair)
Definition: genericheap.h:206
int IndexOfWorst() const
Definition: genericheap.h:158
int ParentNode(int index) const
Definition: genericheap.h:224
GenericHeap(int initial_size)
Definition: genericheap.h:63
T & back() const
Definition: genericvector.h:730
bool empty() const
Definition: genericheap.h:68
GenericVector< Pair > heap_
Definition: genericheap.h:232
int size() const
Definition: genericvector.h:71
int size() const
Definition: genericheap.h:71
bool Pop(Pair *entry)
Definition: genericheap.h:118
void reserve(int size)
Definition: genericvector.h:684
int size_reserved() const
Definition: genericvector.h:81
GenericVector< Pair > * heap()
Definition: genericheap.h:83
bool empty() const
Definition: genericvector.h:90