Classes
class	MeasureTime

Functions
def	parse_args ()

def	question_features (tokens, question)

def	inference (features, tokens)

def	print_single_query (eval_time_elapsed, prediction, nbest_json)

def	parse_args (parser)

def	checkpoint_from_distributed (state_dict)

def	unwrap_distributed (state_dict)

def	load_and_setup_model (model_name, parser, checkpoint, fp16_run, cpu_run, forward_is_infer=False)

def	pad_sequences (batch)

def	prepare_input_sequence (texts, cpu_run=False)

def	main ()

Variables
	TRT_LOGGER = trt.Logger(trt.Logger.INFO)

def	args = parse_args()

	paragraph_text = None

	squad_examples = None

	output_prediction_file = None

	f = open(args.passage_file, 'r')

	question_text = None

	tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True)

int	doc_stride = 128

def	max_seq_length = args.sequence_length

	handle = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL)

int	selected_profile = -1

	num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles

	profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)

	active_optimization_profile

int	binding_idx_offset = selected_profile * num_binding_per_profile

tuple	input_shape = (max_seq_length, args.batch_size)

	input_nbytes = trt.volume(input_shape) * trt.int32.itemsize

	stream = cuda.Stream()

list	d_inputs = [cuda.mem_alloc(input_nbytes) for binding in range(3)]

	h_output = cuda.pagelocked_empty(tuple(context.get_binding_shape(binding_idx_offset + 3)), dtype=np.float32)

	d_output = cuda.mem_alloc(h_output.nbytes)

	all_predictions = collections.OrderedDict()

def	features = question_features(example.doc_tokens, example.question_text)

	eval_time_elapsed

	prediction

	nbest_json

	doc_tokens = dp.convert_doc_tokens(paragraph_text)

list	EXIT_CMDS = ["exit", "quit"]

Function Documentation

◆ parse_args() [1/2]

def inference.parse_args ( )

Parse command line arguments

Here is the caller graph for this function:

◆ question_features()

def inference.question_features	(	tokens,
		question
	)

◆ inference()

def inference.inference	(	features,
		tokens
	)

◆ print_single_query()

def inference.print_single_query	(	eval_time_elapsed,
		prediction,
		nbest_json
	)

◆ parse_args() [2/2]

def inference.parse_args ( parser )

Parse commandline arguments.

Here is the call graph for this function:

◆ checkpoint_from_distributed()

def inference.checkpoint_from_distributed ( state_dict )

Checks whether checkpoint was generated by DistributedDataParallel. DDP
wraps model in additional "module.", it needs to be unwrapped for single
GPU inference.
:param state_dict: model's state dict

Here is the caller graph for this function:

◆ unwrap_distributed()

def inference.unwrap_distributed ( state_dict )

Unwraps model from DistributedDataParallel.
DDP wraps model in additional "module.", it needs to be removed for single
GPU inference.
:param state_dict: model's state dict

Here is the caller graph for this function:

◆ load_and_setup_model()

def inference.load_and_setup_model	(	model_name,
		parser,
		checkpoint,
		fp16_run,
		cpu_run,
		forward_is_infer = `False`
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ pad_sequences()

def inference.pad_sequences ( batch )

Here is the caller graph for this function:

◆ prepare_input_sequence()

def inference.prepare_input_sequence	(	texts,
		cpu_run = `False`
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ main()

def inference.main ( )

Launches text to speech (inference).
Inference is executed on a single GPU or CPU.

Here is the call graph for this function:

Variable Documentation

◆ TRT_LOGGER

inference.TRT_LOGGER = trt.Logger(trt.Logger.INFO)

◆ args

def inference.args = parse_args()

◆ paragraph_text

string inference.paragraph_text = None

◆ squad_examples

inference.squad_examples = None

◆ output_prediction_file

def inference.output_prediction_file = None

◆ f

inference.f = open(args.passage_file, 'r')

◆ question_text

string inference.question_text = None

◆ tokenizer

inference.tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True)

◆ doc_stride

int inference.doc_stride = 128

◆ max_seq_length

def inference.max_seq_length = args.sequence_length

◆ handle

inference.handle = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL)

◆ selected_profile

inference.selected_profile = -1

◆ num_binding_per_profile

inference.num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles

◆ profile_shape

inference.profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)

◆ active_optimization_profile

inference.active_optimization_profile

◆ binding_idx_offset

int inference.binding_idx_offset = selected_profile * num_binding_per_profile

◆ input_shape

tuple inference.input_shape = (max_seq_length, args.batch_size)

◆ input_nbytes

inference.input_nbytes = trt.volume(input_shape) * trt.int32.itemsize

◆ stream

inference.stream = cuda.Stream()

◆ d_inputs

list inference.d_inputs = [cuda.mem_alloc(input_nbytes) for binding in range(3)]

◆ h_output

inference.h_output = cuda.pagelocked_empty(tuple(context.get_binding_shape(binding_idx_offset + 3)), dtype=np.float32)

◆ d_output

inference.d_output = cuda.mem_alloc(h_output.nbytes)

◆ all_predictions

inference.all_predictions = collections.OrderedDict()

◆ features

def inference.features = question_features(example.doc_tokens, example.question_text)

◆ eval_time_elapsed

inference.eval_time_elapsed

◆ prediction

inference.prediction

◆ nbest_json

inference.nbest_json

◆ doc_tokens

inference.doc_tokens = dp.convert_doc_tokens(paragraph_text)

◆ EXIT_CMDS

list inference.EXIT_CMDS = ["exit", "quit"]

Classes

Functions

Variables

Function Documentation

◆ parse_args() [1/2]

◆ question_features()

◆ inference()

◆ print_single_query()

◆ parse_args() [2/2]

◆ checkpoint_from_distributed()

◆ unwrap_distributed()

◆ load_and_setup_model()

◆ pad_sequences()

◆ prepare_input_sequence()

◆ main()

Variable Documentation

◆ TRT_LOGGER

◆ args

◆ paragraph_text

◆ squad_examples

◆ output_prediction_file

◆ f

◆ question_text

◆ tokenizer

◆ doc_stride

◆ max_seq_length

◆ handle

◆ selected_profile

◆ num_binding_per_profile

◆ profile_shape

◆ active_optimization_profile

◆ binding_idx_offset

◆ input_shape

◆ input_nbytes

◆ stream

◆ d_inputs

◆ h_output

◆ d_output

◆ all_predictions

◆ features

◆ eval_time_elapsed

◆ prediction

◆ nbest_json

◆ doc_tokens

◆ EXIT_CMDS