Classes | |
class | BertConfig |
Functions | |
def | set_tensor_name (tensor, prefix, name) |
def | set_output_name (layer, prefix, name, out_idx=0) |
def | set_output_range (layer, maxval, out_idx=0) |
def | attention_layer_opt (prefix, config, init_dict, network, input_tensor, mask_idx, cu_seqlens, max_seqlen) |
def | skipln (prefix, config, init_dict, network, input_tensor, skip) |
def | transformer_layer_opt (prefix, config, init_dict, network, input_tensor, mask_idx, cu_seqlens, max_seqlen) |
def | bert_model (config, init_dict, network, input_tensor, mask_idx, cu_seqlens, max_seqlen) |
def | squad_output (prefix, config, init_dict, network, input_tensor) |
def | load_tf_weights (inputbase, config) |
def | onnx_to_trt_name (onnx_name) |
def | load_onnx_weights_and_quant (path, config) |
def | emb_layernorm (builder, network, config, weights_dict, builder_config, max_sequence_length, max_batch_size) |
def | build_engine (batch_size, workspace_size, sequence_length, config, weights_dict, squad_json, vocab_file, calibrationCacheFile, calib_num) |
def | main () |
Variables | |
TRT_LOGGER = trt.Logger(trt.Logger.INFO) | |
handle = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL) | |
plg_registry = trt.get_plugin_registry() | |
emln_plg_creator2 = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic", "2", "") | |
mha_plg_creator2 = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "2", "") | |
skln_plg_creator2 = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "2", "") | |
mha_plg_creator3 = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "3", "") | |
skln_plg_creator3 = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "3", "") | |
string | WQ = "self_query_kernel" |
string | BQ = "self_query_bias" |
string | WK = "self_key_kernel" |
string | BK = "self_key_bias" |
string | WV = "self_value_kernel" |
string | BV = "self_value_bias" |
string | WQKV = "self_qkv_kernel" |
string | BQKV = "self_qkv_bias" |
string | W_AOUT = "attention_output_dense_kernel" |
string | B_AOUT = "attention_output_dense_bias" |
string | AOUT_LN_BETA = "attention_output_layernorm_beta" |
string | AOUT_LN_GAMMA = "attention_output_layernorm_gamma" |
string | W_MID = "intermediate_dense_kernel" |
string | B_MID = "intermediate_dense_bias" |
string | W_LOUT = "output_dense_kernel" |
string | B_LOUT = "output_dense_bias" |
string | LOUT_LN_BETA = "output_layernorm_beta" |
string | LOUT_LN_GAMMA = "output_layernorm_gamma" |
string | SQD_W = "squad_output_weights" |
string | SQD_B = "squad_output_bias" |
def builder_varseqlen.set_tensor_name | ( | tensor, | |
prefix, | |||
name | |||
) |
def builder_varseqlen.set_output_name | ( | layer, | |
prefix, | |||
name, | |||
out_idx = 0 |
|||
) |
def builder_varseqlen.set_output_range | ( | layer, | |
maxval, | |||
out_idx = 0 |
|||
) |
def builder_varseqlen.attention_layer_opt | ( | prefix, | |
config, | |||
init_dict, | |||
network, | |||
input_tensor, | |||
mask_idx, | |||
cu_seqlens, | |||
max_seqlen | |||
) |
Add the attention layer
def builder_varseqlen.skipln | ( | prefix, | |
config, | |||
init_dict, | |||
network, | |||
input_tensor, | |||
skip | |||
) |
Add the skip layer
def builder_varseqlen.transformer_layer_opt | ( | prefix, | |
config, | |||
init_dict, | |||
network, | |||
input_tensor, | |||
mask_idx, | |||
cu_seqlens, | |||
max_seqlen | |||
) |
Add the transformer layer
def builder_varseqlen.bert_model | ( | config, | |
init_dict, | |||
network, | |||
input_tensor, | |||
mask_idx, | |||
cu_seqlens, | |||
max_seqlen | |||
) |
Create the bert model
def builder_varseqlen.squad_output | ( | prefix, | |
config, | |||
init_dict, | |||
network, | |||
input_tensor | |||
) |
Create the squad output
def builder_varseqlen.load_tf_weights | ( | inputbase, | |
config | |||
) |
Load the weights from the tensorflow checkpoint
def builder_varseqlen.onnx_to_trt_name | ( | onnx_name | ) |
Converting variables in the onnx checkpoint to names corresponding to the naming convention used in the TF version, expected by the builder
def builder_varseqlen.load_onnx_weights_and_quant | ( | path, | |
config | |||
) |
Load the weights from the onnx checkpoint
def builder_varseqlen.emb_layernorm | ( | builder, | |
network, | |||
config, | |||
weights_dict, | |||
builder_config, | |||
max_sequence_length, | |||
max_batch_size | |||
) |
def builder_varseqlen.build_engine | ( | batch_size, | |
workspace_size, | |||
sequence_length, | |||
config, | |||
weights_dict, | |||
squad_json, | |||
vocab_file, | |||
calibrationCacheFile, | |||
calib_num | |||
) |
def builder_varseqlen.main | ( | ) |
builder_varseqlen.TRT_LOGGER = trt.Logger(trt.Logger.INFO) |
builder_varseqlen.handle = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL) |
builder_varseqlen.plg_registry = trt.get_plugin_registry() |
builder_varseqlen.emln_plg_creator2 = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic", "2", "") |
builder_varseqlen.mha_plg_creator2 = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "2", "") |
builder_varseqlen.skln_plg_creator2 = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "2", "") |
builder_varseqlen.mha_plg_creator3 = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "3", "") |
builder_varseqlen.skln_plg_creator3 = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "3", "") |
string builder_varseqlen.WQ = "self_query_kernel" |
string builder_varseqlen.BQ = "self_query_bias" |
string builder_varseqlen.WK = "self_key_kernel" |
string builder_varseqlen.BK = "self_key_bias" |
string builder_varseqlen.WV = "self_value_kernel" |
string builder_varseqlen.BV = "self_value_bias" |
string builder_varseqlen.WQKV = "self_qkv_kernel" |
string builder_varseqlen.BQKV = "self_qkv_bias" |
string builder_varseqlen.W_AOUT = "attention_output_dense_kernel" |
string builder_varseqlen.B_AOUT = "attention_output_dense_bias" |
string builder_varseqlen.AOUT_LN_BETA = "attention_output_layernorm_beta" |
string builder_varseqlen.AOUT_LN_GAMMA = "attention_output_layernorm_gamma" |
string builder_varseqlen.W_MID = "intermediate_dense_kernel" |
string builder_varseqlen.B_MID = "intermediate_dense_bias" |
string builder_varseqlen.W_LOUT = "output_dense_kernel" |
string builder_varseqlen.B_LOUT = "output_dense_bias" |
string builder_varseqlen.LOUT_LN_BETA = "output_layernorm_beta" |
string builder_varseqlen.LOUT_LN_GAMMA = "output_layernorm_gamma" |
string builder_varseqlen.SQD_W = "squad_output_weights" |
string builder_varseqlen.SQD_B = "squad_output_bias" |