TensorRT  7.2.1.6
NVIDIA TensorRT
Looking for a C++ dev who knows TensorRT?
I'm looking for work. Hire me!
bert::Fused_multihead_attention_params_v2 Struct Reference
Collaboration diagram for bert::Fused_multihead_attention_params_v2:

Public Member Functions

void clear ()
 

Public Attributes

void * qkv_ptr
 
void * packed_mask_ptr
 
void * o_ptr
 
int64_t qkv_stride_in_bytes
 
int64_t packed_mask_stride_in_bytes
 
int64_t o_stride_in_bytes
 
int b
 
int h
 
int s
 
int d
 
uint32_t scale_bmm1
 
uint32_t scale_softmax
 
uint32_t scale_bmm2
 
bool enable_i2f_trick
 
intcu_seqlens
 
bool interleaved = false
 
bool ignore_b1opt = false
 
bool force_unroll = false
 
bool use_int8_scale_max = false
 

Member Function Documentation

◆ clear()

void bert::Fused_multihead_attention_params_v2::clear ( )
inline

Member Data Documentation

◆ qkv_ptr

void* bert::Fused_multihead_attention_params_v2::qkv_ptr

◆ packed_mask_ptr

void* bert::Fused_multihead_attention_params_v2::packed_mask_ptr

◆ o_ptr

void* bert::Fused_multihead_attention_params_v2::o_ptr

◆ qkv_stride_in_bytes

int64_t bert::Fused_multihead_attention_params_v2::qkv_stride_in_bytes

◆ packed_mask_stride_in_bytes

int64_t bert::Fused_multihead_attention_params_v2::packed_mask_stride_in_bytes

◆ o_stride_in_bytes

int64_t bert::Fused_multihead_attention_params_v2::o_stride_in_bytes

◆ b

int bert::Fused_multihead_attention_params_v2::b

◆ h

int bert::Fused_multihead_attention_params_v2::h

◆ s

int bert::Fused_multihead_attention_params_v2::s

◆ d

int bert::Fused_multihead_attention_params_v2::d

◆ scale_bmm1

uint32_t bert::Fused_multihead_attention_params_v2::scale_bmm1

◆ scale_softmax

uint32_t bert::Fused_multihead_attention_params_v2::scale_softmax

◆ scale_bmm2

uint32_t bert::Fused_multihead_attention_params_v2::scale_bmm2

◆ enable_i2f_trick

bool bert::Fused_multihead_attention_params_v2::enable_i2f_trick

◆ cu_seqlens

int* bert::Fused_multihead_attention_params_v2::cu_seqlens

◆ interleaved

bool bert::Fused_multihead_attention_params_v2::interleaved = false

◆ ignore_b1opt

bool bert::Fused_multihead_attention_params_v2::ignore_b1opt = false

◆ force_unroll

bool bert::Fused_multihead_attention_params_v2::force_unroll = false

◆ use_int8_scale_max

bool bert::Fused_multihead_attention_params_v2::use_int8_scale_max = false

The documentation for this struct was generated from the following file: