FusedMultiHeadAttentionXMMAKernelV2(const FusedMultiHeadAttentionKernelMetaInfoV2 *pMetaStart, unsigned int nMetaCount, Data_type type, unsigned int sm) | bert::FusedMultiHeadAttentionXMMAKernelV2 | inline |
hashID(unsigned int s, bool interleaved, bool unroll) const | bert::FusedMultiHeadAttentionXMMAKernelV2 | inline |
hashID(const KernelMeta &kernelMeta) const | bert::FusedMultiHeadAttentionXMMAKernelV2 | inlinevirtual |
TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 >::hashID(unsigned int s, unsigned int d) const | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | inline |
isValid(int s) const | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | inline |
KernelMeta typedef | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | |
KernelParam typedef | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | |
loadXMMAKernels() | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | inline |
mDataType | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
mDriver | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
mFunctions | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
mKernelMeta | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
mKernelMetaCount | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
mModules | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
mSM | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
mValidSequences | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | protected |
run(Fused_multihead_attention_params_v2 ¶ms, cudaStream_t ss) const | bert::FusedMultiHeadAttentionXMMAKernelV2 | inlinevirtual |
TFusedMultiHeadAttentionXMMAKernel(const FusedMultiHeadAttentionKernelMetaInfoV2 *pMetaStart, unsigned int nMetaCount, Data_type type, unsigned int sm) | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | inline |
~TFusedMultiHeadAttentionXMMAKernel()=default | bert::TFusedMultiHeadAttentionXMMAKernel< FusedMultiHeadAttentionKernelMetaInfoV2, Fused_multihead_attention_params_v2 > | virtual |