TensorRT  7.2.1.6
NVIDIA TensorRT
Looking for a C++ dev who knows TensorRT?
I'm looking for work. Hire me!
All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Pages
BertInference Struct Reference
Collaboration diagram for BertInference:

Public Member Functions

 BertInference (const std::string &enginePath, const int maxBatchSize, const int seqLength, const bool enableGraph=false)
 
void allocateBindings (const int maxBatchSize)
 
void prepare (int profIdx, int batchSize)
 
void run (const void *const *inputBuffers, int warmUps, int iterations)
 
void run (const void *inputIds, const void *segmentIds, const void *inputMask, int warmUps, int iterations)
 
void run (int profIdx, int batchSize, const void *inputIds, const void *segmentIds, const void *inputMask, int warmUps, int iterations)
 
void reportTiming (int batchIndex, int batchSize)
 
 ~BertInference ()
 

Public Attributes

const int mSeqLength
 
const bool mEnableGraph
 
TrtUniquePtr< ICudaEnginemEngine {nullptr}
 
TrtUniquePtr< IExecutionContextmContext {nullptr}
 
std::vector< void * > mBindings
 
bool mEnableVariableLen
 
std::vector< intmCuSeqlens
 
cudaStream_t mStream {NULL}
 
std::vector< void * > mDeviceBuffers
 
std::vector< float > mHostOutput
 
std::vector< size_t > mInputSizes
 
size_t mOutputSize
 
std::vector< intmOutputDims
 
std::vector< std::vector< float > > mTimes
 
cudaGraphExec_t mExecGraph
 

Static Public Attributes

static const int kBERT_INPUT_NUM = 3
 

Constructor & Destructor Documentation

◆ BertInference()

BertInference::BertInference ( const std::string &  enginePath,
const int  maxBatchSize,
const int  seqLength,
const bool  enableGraph = false 
)
inline

◆ ~BertInference()

BertInference::~BertInference ( )
inline

Member Function Documentation

◆ allocateBindings()

void BertInference::allocateBindings ( const int  maxBatchSize)
inline

◆ prepare()

void BertInference::prepare ( int  profIdx,
int  batchSize 
)
inline

◆ run() [1/3]

void BertInference::run ( const void *const *  inputBuffers,
int  warmUps,
int  iterations 
)
inline
Here is the caller graph for this function:

◆ run() [2/3]

void BertInference::run ( const void *  inputIds,
const void *  segmentIds,
const void *  inputMask,
int  warmUps,
int  iterations 
)
inline
Here is the caller graph for this function:

◆ run() [3/3]

void BertInference::run ( int  profIdx,
int  batchSize,
const void *  inputIds,
const void *  segmentIds,
const void *  inputMask,
int  warmUps,
int  iterations 
)
inline
Here is the caller graph for this function:

◆ reportTiming()

void BertInference::reportTiming ( int  batchIndex,
int  batchSize 
)
inline

Member Data Documentation

◆ kBERT_INPUT_NUM

const int BertInference::kBERT_INPUT_NUM = 3
static

◆ mSeqLength

const int BertInference::mSeqLength

◆ mEnableGraph

const bool BertInference::mEnableGraph

◆ mEngine

TrtUniquePtr<ICudaEngine> BertInference::mEngine {nullptr}

◆ mContext

TrtUniquePtr<IExecutionContext> BertInference::mContext {nullptr}

◆ mBindings

std::vector<void*> BertInference::mBindings

◆ mEnableVariableLen

bool BertInference::mEnableVariableLen

◆ mCuSeqlens

std::vector<int> BertInference::mCuSeqlens

◆ mStream

cudaStream_t BertInference::mStream {NULL}

◆ mDeviceBuffers

std::vector<void*> BertInference::mDeviceBuffers

◆ mHostOutput

std::vector<float> BertInference::mHostOutput

◆ mInputSizes

std::vector<size_t> BertInference::mInputSizes

◆ mOutputSize

size_t BertInference::mOutputSize

◆ mOutputDims

std::vector<int> BertInference::mOutputDims

◆ mTimes

std::vector<std::vector<float> > BertInference::mTimes

◆ mExecGraph

cudaGraphExec_t BertInference::mExecGraph

The documentation for this struct was generated from the following file: