|
| BertInference (const std::string &enginePath, const int maxBatchSize, const int seqLength, const bool enableGraph=false) |
|
void | allocateBindings (const int maxBatchSize) |
|
void | prepare (int profIdx, int batchSize) |
|
void | run (const void *const *inputBuffers, int warmUps, int iterations) |
|
void | run (const void *inputIds, const void *segmentIds, const void *inputMask, int warmUps, int iterations) |
|
void | run (int profIdx, int batchSize, const void *inputIds, const void *segmentIds, const void *inputMask, int warmUps, int iterations) |
|
void | reportTiming (int batchIndex, int batchSize) |
|
| ~BertInference () |
|
◆ BertInference()
BertInference::BertInference |
( |
const std::string & |
enginePath, |
|
|
const int |
maxBatchSize, |
|
|
const int |
seqLength, |
|
|
const bool |
enableGraph = false |
|
) |
| |
|
inline |
◆ ~BertInference()
BertInference::~BertInference |
( |
| ) |
|
|
inline |
◆ allocateBindings()
void BertInference::allocateBindings |
( |
const int |
maxBatchSize | ) |
|
|
inline |
◆ prepare()
void BertInference::prepare |
( |
int |
profIdx, |
|
|
int |
batchSize |
|
) |
| |
|
inline |
◆ run() [1/3]
void BertInference::run |
( |
const void *const * |
inputBuffers, |
|
|
int |
warmUps, |
|
|
int |
iterations |
|
) |
| |
|
inline |
◆ run() [2/3]
void BertInference::run |
( |
const void * |
inputIds, |
|
|
const void * |
segmentIds, |
|
|
const void * |
inputMask, |
|
|
int |
warmUps, |
|
|
int |
iterations |
|
) |
| |
|
inline |
◆ run() [3/3]
void BertInference::run |
( |
int |
profIdx, |
|
|
int |
batchSize, |
|
|
const void * |
inputIds, |
|
|
const void * |
segmentIds, |
|
|
const void * |
inputMask, |
|
|
int |
warmUps, |
|
|
int |
iterations |
|
) |
| |
|
inline |
◆ reportTiming()
void BertInference::reportTiming |
( |
int |
batchIndex, |
|
|
int |
batchSize |
|
) |
| |
|
inline |
◆ kBERT_INPUT_NUM
const int BertInference::kBERT_INPUT_NUM = 3 |
|
static |
◆ mSeqLength
const int BertInference::mSeqLength |
◆ mEnableGraph
const bool BertInference::mEnableGraph |
◆ mEngine
TrtUniquePtr<ICudaEngine> BertInference::mEngine {nullptr} |
◆ mContext
◆ mBindings
std::vector<void*> BertInference::mBindings |
◆ mEnableVariableLen
bool BertInference::mEnableVariableLen |
◆ mCuSeqlens
std::vector<int> BertInference::mCuSeqlens |
◆ mStream
cudaStream_t BertInference::mStream {NULL} |
◆ mDeviceBuffers
std::vector<void*> BertInference::mDeviceBuffers |
◆ mHostOutput
std::vector<float> BertInference::mHostOutput |
◆ mInputSizes
std::vector<size_t> BertInference::mInputSizes |
◆ mOutputSize
size_t BertInference::mOutputSize |
◆ mOutputDims
std::vector<int> BertInference::mOutputDims |
◆ mTimes
std::vector<std::vector<float> > BertInference::mTimes |
◆ mExecGraph
cudaGraphExec_t BertInference::mExecGraph |
The documentation for this struct was generated from the following file: