Classes
class	BertConfig

Functions
def	set_tensor_name (tensor, prefix, name)

def	set_output_name (layer, prefix, name, out_idx=0)

def	set_output_range (layer, maxval, out_idx=0)

def	get_mha_dtype (config)

def	attention_layer_opt (prefix, config, init_dict, network, input_tensor, imask)

def	skipln (prefix, config, init_dict, network, input_tensor, skip, bias=None)

def	custom_fc (config, network, input_tensor, out_dims, W)

def	transformer_layer_opt (prefix, config, init_dict, network, input_tensor, imask)

def	bert_model (config, init_dict, network, input_tensor, input_mask)

def	squad_output (prefix, config, init_dict, network, input_tensor)

def	load_tf_weights (inputbase, config)

def	onnx_to_trt_name (onnx_name)

def	load_onnx_weights_and_quant (path, config)

def	emb_layernorm (builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes)

def	build_engine (batch_sizes, workspace_size, sequence_lengths, config, weights_dict, squad_json, vocab_file, calibrationCacheFile, calib_num)

def	generate_calibration_cache (sequence_lengths, workspace_size, config, weights_dict, squad_json, vocab_file, calibrationCacheFile, calib_num)

def	main ()

Variables
	TRT_LOGGER = trt.Logger(trt.Logger.INFO)

	handle = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL)

	plg_registry = trt.get_plugin_registry()

	emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic", "1", "")

	qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "1", "")

	skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "1", "")

	fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic", "1", "")

string	WQ = "self_query_kernel"

string	BQ = "self_query_bias"

string	WK = "self_key_kernel"

string	BK = "self_key_bias"

string	WV = "self_value_kernel"

string	BV = "self_value_bias"

string	WQKV = "self_qkv_kernel"

string	BQKV = "self_qkv_bias"

string	W_AOUT = "attention_output_dense_kernel"

string	B_AOUT = "attention_output_dense_bias"

string	AOUT_LN_BETA = "attention_output_layernorm_beta"

string	AOUT_LN_GAMMA = "attention_output_layernorm_gamma"

string	W_MID = "intermediate_dense_kernel"

string	B_MID = "intermediate_dense_bias"

string	W_LOUT = "output_dense_kernel"

string	B_LOUT = "output_dense_bias"

string	LOUT_LN_BETA = "output_layernorm_beta"

string	LOUT_LN_GAMMA = "output_layernorm_gamma"

string	SQD_W = "squad_output_weights"

string	SQD_B = "squad_output_bias"

Function Documentation

◆ set_tensor_name()

def builder.set_tensor_name	(	tensor,
		prefix,
		name
	)

Here is the caller graph for this function:

◆ set_output_name()

def builder.set_output_name	(	layer,
		prefix,
		name,
		out_idx = `0`
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ set_output_range()

def builder.set_output_range	(	layer,
		maxval,
		out_idx = `0`
	)

Here is the caller graph for this function:

◆ get_mha_dtype()

def builder.get_mha_dtype ( config )

Here is the caller graph for this function:

◆ attention_layer_opt()

def builder.attention_layer_opt	(	prefix,
		config,
		init_dict,
		network,
		input_tensor,
		imask
	)

Add the attention layer

Here is the call graph for this function:

Here is the caller graph for this function:

◆ skipln()

def builder.skipln	(	prefix,
		config,
		init_dict,
		network,
		input_tensor,
		skip,
		bias = `None`
	)

Add the skip layer

Here is the caller graph for this function:

◆ custom_fc()

def builder.custom_fc	(	config,
		network,
		input_tensor,
		out_dims,
		W
	)

Here is the caller graph for this function:

◆ transformer_layer_opt()

def builder.transformer_layer_opt	(	prefix,
		config,
		init_dict,
		network,
		input_tensor,
		imask
	)

Add the transformer layer

Here is the call graph for this function:

Here is the caller graph for this function:

◆ bert_model()

def builder.bert_model	(	config,
		init_dict,
		network,
		input_tensor,
		input_mask
	)

Create the bert model

Here is the call graph for this function:

Here is the caller graph for this function:

◆ squad_output()

def builder.squad_output	(	prefix,
		config,
		init_dict,
		network,
		input_tensor
	)

Create the squad output

Here is the call graph for this function:

Here is the caller graph for this function:

◆ load_tf_weights()

def builder.load_tf_weights	(	inputbase,
		config
	)

Load the weights from the tensorflow checkpoint

Here is the caller graph for this function:

◆ onnx_to_trt_name()

def builder.onnx_to_trt_name ( onnx_name )

Converting variables in the onnx checkpoint to names corresponding to the naming convention used in the TF version, expected by the builder

Here is the caller graph for this function:

◆ load_onnx_weights_and_quant()

def builder.load_onnx_weights_and_quant	(	path,
		config
	)

Load the weights from the onnx checkpoint

Here is the call graph for this function:

Here is the caller graph for this function:

◆ emb_layernorm()

def builder.emb_layernorm	(	builder,
		network,
		config,
		weights_dict,
		builder_config,
		sequence_lengths,
		batch_sizes
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ build_engine()

def builder.build_engine	(	batch_sizes,
		workspace_size,
		sequence_lengths,
		config,
		weights_dict,
		squad_json,
		vocab_file,
		calibrationCacheFile,
		calib_num
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ generate_calibration_cache()

def builder.generate_calibration_cache	(	sequence_lengths,
		workspace_size,
		config,
		weights_dict,
		squad_json,
		vocab_file,
		calibrationCacheFile,
		calib_num
	)

BERT demo needs a separate engine building path to generate calibration cache.
This is because we need to configure SLN and MHA plugins in FP32 mode when
generating calibration cache, and INT8 mode when building the actual engine.
This cache could be generated by examining certain training data and can be
reused across different configurations.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ main()

def builder.main ( )

Here is the call graph for this function:

Variable Documentation

◆ TRT_LOGGER

builder.TRT_LOGGER = trt.Logger(trt.Logger.INFO)

◆ handle

builder.handle = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL)

◆ plg_registry

builder.plg_registry = trt.get_plugin_registry()

◆ emln_plg_creator

builder.emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic", "1", "")

◆ qkv2_plg_creator

builder.qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "1", "")

◆ skln_plg_creator

builder.skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "1", "")

◆ fc_plg_creator

builder.fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic", "1", "")

◆ WQ

string builder.WQ = "self_query_kernel"

◆ BQ

string builder.BQ = "self_query_bias"

◆ WK

string builder.WK = "self_key_kernel"

◆ BK

string builder.BK = "self_key_bias"

◆ WV

string builder.WV = "self_value_kernel"

◆ BV

string builder.BV = "self_value_bias"

◆ WQKV

string builder.WQKV = "self_qkv_kernel"

◆ BQKV

string builder.BQKV = "self_qkv_bias"

◆ W_AOUT

string builder.W_AOUT = "attention_output_dense_kernel"

◆ B_AOUT

string builder.B_AOUT = "attention_output_dense_bias"

◆ AOUT_LN_BETA

string builder.AOUT_LN_BETA = "attention_output_layernorm_beta"

◆ AOUT_LN_GAMMA

string builder.AOUT_LN_GAMMA = "attention_output_layernorm_gamma"

◆ W_MID

string builder.W_MID = "intermediate_dense_kernel"

◆ B_MID

string builder.B_MID = "intermediate_dense_bias"

◆ W_LOUT

string builder.W_LOUT = "output_dense_kernel"

◆ B_LOUT

string builder.B_LOUT = "output_dense_bias"

◆ LOUT_LN_BETA

string builder.LOUT_LN_BETA = "output_layernorm_beta"

◆ LOUT_LN_GAMMA

string builder.LOUT_LN_GAMMA = "output_layernorm_gamma"

◆ SQD_W

string builder.SQD_W = "squad_output_weights"

◆ SQD_B

string builder.SQD_B = "squad_output_bias"

Classes

Functions

Variables

Function Documentation

◆ set_tensor_name()

◆ set_output_name()

◆ set_output_range()

◆ get_mha_dtype()

◆ attention_layer_opt()

◆ skipln()

◆ custom_fc()

◆ transformer_layer_opt()

◆ bert_model()

◆ squad_output()

◆ load_tf_weights()

◆ onnx_to_trt_name()

◆ load_onnx_weights_and_quant()

◆ emb_layernorm()

◆ build_engine()

◆ generate_calibration_cache()

◆ main()

Variable Documentation

◆ TRT_LOGGER

◆ handle

◆ plg_registry

◆ emln_plg_creator

◆ qkv2_plg_creator

◆ skln_plg_creator

◆ fc_plg_creator

◆ WQ

◆ BQ

◆ WK

◆ BK

◆ WV

◆ BV

◆ WQKV

◆ BQKV

◆ W_AOUT

◆ B_AOUT

◆ AOUT_LN_BETA

◆ AOUT_LN_GAMMA

◆ W_MID

◆ B_MID

◆ W_LOUT

◆ B_LOUT

◆ LOUT_LN_BETA

◆ LOUT_LN_GAMMA

◆ SQD_W

◆ SQD_B