Functions | |
def | parse_args (parser) |
def | reduce_tensor (tensor, num_gpus) |
def | init_distributed (args, world_size, rank, group_name) |
def | save_checkpoint (model, optimizer, epoch, config, amp_run, output_dir, model_name, local_rank, world_size) |
def | get_last_checkpoint_filename (output_dir, model_name) |
def | load_checkpoint (model, optimizer, epoch, config, amp_run, filepath, local_rank) |
def | evaluating (model) |
def | validate (model, criterion, valset, epoch, batch_iter, batch_size, world_size, collate_fn, distributed_run, rank, batch_to_gpu) |
def | adjust_learning_rate (iteration, epoch, optimizer, learning_rate, anneal_steps, anneal_factor, rank) |
def | main () |
def train.parse_args | ( | parser | ) |
Parse commandline arguments.
def train.reduce_tensor | ( | tensor, | |
num_gpus | |||
) |
def train.init_distributed | ( | args, | |
world_size, | |||
rank, | |||
group_name | |||
) |
def train.save_checkpoint | ( | model, | |
optimizer, | |||
epoch, | |||
config, | |||
amp_run, | |||
output_dir, | |||
model_name, | |||
local_rank, | |||
world_size | |||
) |
def train.get_last_checkpoint_filename | ( | output_dir, | |
model_name | |||
) |
def train.load_checkpoint | ( | model, | |
optimizer, | |||
epoch, | |||
config, | |||
amp_run, | |||
filepath, | |||
local_rank | |||
) |
def train.evaluating | ( | model | ) |
Temporarily switch to evaluation mode.
def train.validate | ( | model, | |
criterion, | |||
valset, | |||
epoch, | |||
batch_iter, | |||
batch_size, | |||
world_size, | |||
collate_fn, | |||
distributed_run, | |||
rank, | |||
batch_to_gpu | |||
) |
Handles all the validation scoring and printing
def train.adjust_learning_rate | ( | iteration, | |
epoch, | |||
optimizer, | |||
learning_rate, | |||
anneal_steps, | |||
anneal_factor, | |||
rank | |||
) |
def train.main | ( | ) |