tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
bitvector.h
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: bitvector.h
5 // Description: Class replacement for BITVECTOR.
6 // Author: Ray Smith
7 // Created: Mon Jan 10 17:44:01 PST 2011
8 //
9 // (C) Copyright 2011, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #ifndef TESSERACT_CCUTIL_BITVECTOR_H_
23 #define TESSERACT_CCUTIL_BITVECTOR_H_
24 
25 #include <cassert>
26 #include <cstdio>
27 #include "host.h"
28 
29 namespace tesseract {
30 
31 // Trivial class to encapsulate a fixed-length array of bits, with
32 // Serialize/DeSerialize. Replaces the old macros.
33 class BitVector {
34  public:
35  // Fast lookup table to get the first least significant set bit in a byte.
36  // For zero, the table has 255, but since it is a special case, most code
37  // that uses this table will check for zero before looking up lsb_index_.
38  static const uint8_t lsb_index_[256];
39  // Fast lookup table to get the residual bits after zeroing the least
40  // significant set bit in a byte.
41  static const uint8_t lsb_eroded_[256];
42  // Fast lookup table to give the number of set bits in a byte.
43  static const int hamming_table_[256];
44 
45  BitVector();
46  // Initializes the array to length * false.
47  explicit BitVector(int length);
48  BitVector(const BitVector& src);
49  BitVector& operator=(const BitVector& src);
50  ~BitVector();
51 
52  // Initializes the array to length * false.
53  void Init(int length);
54 
55  // Returns the number of bits that are accessible in the vector.
56  int size() const {
57  return bit_size_;
58  }
59 
60  // Writes to the given file. Returns false in case of error.
61  bool Serialize(FILE* fp) const;
62  // Reads from the given file. Returns false in case of error.
63  // If swap is true, assumes a big/little-endian swap is needed.
64  bool DeSerialize(bool swap, FILE* fp);
65 
66  void SetAllFalse();
67  void SetAllTrue();
68 
69  // Accessors to set/reset/get bits.
70  // The range of index is [0, size()-1].
71  // There is debug-only bounds checking.
72  void SetBit(int index) {
73  array_[WordIndex(index)] |= BitMask(index);
74  }
75  void ResetBit(int index) {
76  array_[WordIndex(index)] &= ~BitMask(index);
77  }
78  void SetValue(int index, bool value) {
79  if (value)
80  SetBit(index);
81  else
82  ResetBit(index);
83  }
84  bool At(int index) const {
85  return (array_[WordIndex(index)] & BitMask(index)) != 0;
86  }
87  bool operator[](int index) const {
88  return (array_[WordIndex(index)] & BitMask(index)) != 0;
89  }
90 
91  // Returns the index of the next set bit after the given index.
92  // Useful for quickly iterating through the set bits in a sparse vector.
93  int NextSetBit(int prev_bit) const;
94 
95  // Returns the number of set bits in the vector.
96  int NumSetBits() const;
97 
98  // Logical in-place operations on whole bit vectors. Tries to do something
99  // sensible if they aren't the same size, but they should be really.
100  void operator|=(const BitVector& other);
101  void operator&=(const BitVector& other);
102  void operator^=(const BitVector& other);
103  // Set subtraction *this = v1 - v2.
104  void SetSubtract(const BitVector& v1, const BitVector& v2);
105 
106  private:
107  // Allocates memory for a vector of the given length.
108  void Alloc(int length);
109 
110  // Computes the index to array_ for the given index, with debug range
111  // checking.
112  int WordIndex(int index) const {
113  assert(0 <= index && index < bit_size_);
114  return index / kBitFactor;
115  }
116  // Returns a mask to select the appropriate bit for the given index.
117  uint32_t BitMask(int index) const {
118  return 1 << (index & (kBitFactor - 1));
119  }
120  // Returns the number of array elements needed to represent the current
121  // bit_size_.
122  int WordLength() const {
123  return (bit_size_ + kBitFactor - 1) / kBitFactor;
124  }
125  // Returns the number of bytes consumed by the array_.
126  int ByteLength() const {
127  return WordLength() * sizeof(*array_);
128  }
129 
130  // Number of bits in this BitVector.
131  int32_t bit_size_;
132  // Array of words used to pack the bits.
133  // Bits are stored little-endian by uint32_t word, ie by word first and then
134  // starting with the least significant bit in each word.
135  uint32_t* array_;
136  // Number of bits in an array_ element.
137  static const int kBitFactor = sizeof(uint32_t) * 8;
138 };
139 
140 } // namespace tesseract.
141 
142 #endif // TESSERACT_CCUTIL_BITVECTOR_H_
static const int hamming_table_[256]
Definition: bitvector.h:43
bool DeSerialize(bool swap, FILE *fp)
Definition: bitvector.cpp:147
void operator|=(const BitVector &other)
Definition: bitvector.cpp:225
void Alloc(int length)
Definition: bitvector.cpp:254
void SetValue(int index, bool value)
Definition: bitvector.h:78
uint32_t BitMask(int index) const
Definition: bitvector.h:117
static const uint8_t lsb_eroded_[256]
Definition: bitvector.h:41
void SetSubtract(const BitVector &v1, const BitVector &v2)
Definition: bitvector.cpp:243
void operator^=(const BitVector &other)
Definition: bitvector.cpp:237
void SetAllFalse()
Definition: bitvector.cpp:163
int32_t bit_size_
Definition: bitvector.h:131
int WordLength() const
Definition: bitvector.h:122
Definition: baseapi.cpp:94
int ByteLength() const
Definition: bitvector.h:126
bool Serialize(FILE *fp) const
Definition: bitvector.cpp:139
int WordIndex(int index) const
Definition: bitvector.h:112
void Init(int length)
Definition: bitvector.cpp:133
static const uint8_t lsb_index_[256]
Definition: bitvector.h:38
uint32_t * array_
Definition: bitvector.h:135
bool operator[](int index) const
Definition: bitvector.h:87
Definition: bitvector.h:33
void SetAllTrue()
Definition: bitvector.cpp:166
bool At(int index) const
Definition: bitvector.h:84
int size() const
Definition: bitvector.h:56
BitVector & operator=(const BitVector &src)
Definition: bitvector.cpp:122
int NumSetBits() const
Definition: bitvector.cpp:210
static const int kBitFactor
Definition: bitvector.h:137
void SetBit(int index)
Definition: bitvector.h:72
void ResetBit(int index)
Definition: bitvector.h:75
int NextSetBit(int prev_bit) const
Definition: bitvector.cpp:172
BitVector()
Definition: bitvector.cpp:110
~BitVector()
Definition: bitvector.cpp:128
void operator &=(const BitVector &other)