tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
stridemap.h
1 // File: stridemap.h
3 // Description: Indexing into a 4-d tensor held in a 2-d Array.
4 // Author: Ray Smith
5 // Created: Fri Sep 20 16:00:31 PST 2016
6 //
7 // (C) Copyright 2016, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 #ifndef TESSERACT_LSTM_STRIDEMAP_H_
19 #define TESSERACT_LSTM_STRIDEMAP_H_
20 
21 #include <cstring>
22 #include <vector>
23 #include "tprintf.h"
24 
25 namespace tesseract {
26 
27 // Enum describing the dimensions of the 'Tensor' in a NetworkIO.
28 // A NetworkIO is analogous to a TF Tensor, except that the number of dimensions
29 // is fixed (4), and they always have the same meaning. The underlying
30 // representation is a 2-D array, for which the product batch*height*width
31 // is always dim1 and depth is always dim2. FlexDimensions is used only for
32 // batch, height, width with the StrideMap, and therefore represents the runtime
33 // shape. The build-time shape is defined by StaticShape.
35  FD_BATCH, // Index of multiple images.
36  FD_HEIGHT, // y-coordinate in image.
37  FD_WIDTH, // x-coordinate in image.
38  FD_DIMSIZE, // Number of flexible non-depth dimensions.
39 };
40 
41 // Encapsulation of information relating to the mapping from [batch][y][x] to
42 // the first index into the 2-d array underlying a NetworkIO.
43 class StrideMap {
44  public:
45  // Class holding the non-depth indices.
46  class Index {
47  public:
48  explicit Index(const StrideMap& stride_map) : stride_map_(&stride_map) {
49  InitToFirst();
50  }
51  Index(const StrideMap& stride_map, int batch, int y, int x)
52  : stride_map_(&stride_map) {
53  indices_[FD_BATCH] = batch;
54  indices_[FD_HEIGHT] = y;
55  indices_[FD_WIDTH] = x;
57  }
58  // Accesses the index to the underlying array.
59  int t() const { return t_; }
60  int index(FlexDimensions dimension) const { return indices_[dimension]; }
61  // Initializes the indices to the first valid location.
62  void InitToFirst() {
63  memset(indices_, 0, sizeof(indices_));
64  t_ = 0;
65  }
66  // Initializes the indices to the last valid location.
68  // Returns true if *this is a valid index.
69  bool IsValid() const;
70  // Returns true if the index of the given dimension is the last.
71  bool IsLast(FlexDimensions dimension) const;
72  // Given that the dimensions up to and including dim-1 are valid, returns
73  // the maximum index for dimension dim.
74  int MaxIndexOfDim(FlexDimensions dim) const;
75  // Adds the given offset to the given dimension. Returns true if the result
76  // makes a valid index.
77  bool AddOffset(int offset, FlexDimensions dimension);
78  // Increments the index in some encapsulated way that guarantees to remain
79  // valid until it returns false, meaning that the iteration is complete.
80  bool Increment();
81  // Decrements the index in some encapsulated way that guarantees to remain
82  // valid until it returns false, meaning that the iteration (that started
83  // with InitToLast()) is complete.
84  bool Decrement();
85 
86  private:
87  // Initializes the indices to the last valid location in the given batch
88  // index.
89  void InitToLastOfBatch(int batch);
90  // Computes and sets t_ from the current indices_.
91  void SetTFromIndices();
92 
93  // Map into which *this is an index.
95  // Index to the first dimension of the underlying array.
96  int t_;
97  // Indices into the individual dimensions.
99  };
100 
102  memset(shape_, 0, sizeof(shape_));
103  memset(t_increments_, 0, sizeof(t_increments_));
104  }
105  // Default copy constructor and operator= are OK to use here!
106 
107  // Sets up the stride for the given array of height, width pairs.
108  void SetStride(const std::vector<std::pair<int, int>>& h_w_pairs);
109  // Scales width and height dimensions by the given factors.
110  void ScaleXY(int x_factor, int y_factor);
111  // Reduces width to 1, across the batch, whatever the input size.
112  void ReduceWidthTo1();
113  // Transposes the width and height dimensions.
114  void TransposeXY();
115  // Returns the size of the given dimension.
116  int Size(FlexDimensions dimension) const { return shape_[dimension]; }
117  // Returns the total width required.
118  int Width() const { return t_increments_[FD_BATCH] * shape_[FD_BATCH]; }
119 
120  private:
121  // Computes t_increments_ from shape_.
122  void ComputeTIncrements();
123 
124  // The size of each non-depth dimension.
126  // Precomputed 't' increments for each dimension. This is the value of
127  // the given dimension in the packed 3-d array that the shape_ represents.
129  // Vector of size shape_[FD_BATCH] holds the height of each image in a batch.
130  std::vector<int> heights_;
131  // Vector of size shape_[FD_BATCH] holds the width of each image in a batch.
132  std::vector<int> widths_;
133 };
134 
135 } // namespace tesseract
136 
137 #endif // TESSERACT_LSTM_STRIDEMAP_H_
int index(FlexDimensions dimension) const
Definition: stridemap.h:60
void ComputeTIncrements()
Definition: stridemap.cpp:168
void InitToLastOfBatch(int batch)
Definition: stridemap.cpp:110
std::vector< int > heights_
Definition: stridemap.h:130
Index(const StrideMap &stride_map, int batch, int y, int x)
Definition: stridemap.h:51
int indices_[FD_DIMSIZE]
Definition: stridemap.h:98
const StrideMap * stride_map_
Definition: stridemap.h:94
int Size(FlexDimensions dimension) const
Definition: stridemap.h:116
FlexDimensions
Definition: stridemap.h:34
Definition: stridemap.h:36
Definition: stridemap.h:35
void SetTFromIndices()
Definition: stridemap.cpp:119
Definition: baseapi.cpp:94
std::vector< int > widths_
Definition: stridemap.h:132
void ScaleXY(int x_factor, int y_factor)
Definition: stridemap.cpp:145
int Width() const
Definition: stridemap.h:118
Definition: stridemap.h:37
void SetStride(const std::vector< std::pair< int, int >> &h_w_pairs)
Definition: stridemap.cpp:127
int MaxIndexOfDim(FlexDimensions dim) const
Definition: stridemap.cpp:44
bool IsValid() const
Definition: stridemap.cpp:25
StrideMap()
Definition: stridemap.h:101
void InitToLast()
Definition: stridemap.h:67
int t_increments_[FD_DIMSIZE]
Definition: stridemap.h:128
Definition: stridemap.h:38
void InitToFirst()
Definition: stridemap.h:62
Index(const StrideMap &stride_map)
Definition: stridemap.h:48
void TransposeXY()
Definition: stridemap.cpp:161
int t_
Definition: stridemap.h:96
bool Increment()
Definition: stridemap.cpp:71
int t() const
Definition: stridemap.h:59
Definition: stridemap.h:43
bool IsLast(FlexDimensions dimension) const
Definition: stridemap.cpp:38
int shape_[FD_DIMSIZE]
Definition: stridemap.h:125
void ReduceWidthTo1()
Definition: stridemap.cpp:154
bool AddOffset(int offset, FlexDimensions dimension)
Definition: stridemap.cpp:63
bool Decrement()
Definition: stridemap.cpp:88
Definition: stridemap.h:46