def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, test_imp=False): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ClevrerValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, sampled_batch in enumerate(val_loader): video_ft = sampled_batch['res_ft'] des_q = sampled_batch['question_dict']['question'] attn_masks = sampled_batch['question_dict']['attention_mask'] des_ans = sampled_batch['question_dict']['ans'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: video_ft = video_ft.cuda(non_blocking=True) des_q = des_q.cuda(non_blocking=True) attn_masks = attn_masks.cuda(non_blocking=True) des_ans = des_ans.cuda() val_meter.data_toc() # Explicitly declare reduction to mean. des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") pred_des_ans = model(video_ft, des_q, attn_masks) loss_des_val = des_loss_fun(pred_des_ans, des_ans) # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct] loss_mc_val = None mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = (loss_des_val.item(), top1_err.item(), top5_err.item()) val_meter.iter_toc() #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, mb_size_des, mb_size_mc # Update and log stats. val_meter.update_stats(top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, loss_mc_val, des_ans.size(0), mb_size_mc) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) val_meter.reset()
def train_epoch(train_dloader, model, optimizer, cur_epoch, cfg): model.train() train_tqdm = tqdm(train_dloader, ncols=80) data_size = len(train_dloader) for cur_iter, (inputs, labels, _, extra_data) in enumerate(train_tqdm): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in extra_data.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: extra_data[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: preds = model(inputs, extra_data["boxes"]) else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() train_tqdm.set_description("Train_loss: %.4f" % loss.cpu().item())
def train_epoch(self, train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) start = time.time() btch = cfg.TRAIN.BATCH_SIZE * self.cfg.NUM_SHARDS rankE = os.environ.get("RANK", None) worldE = os.environ.get("WORLD_SIZE", None) dSize = data_size * btch self.logger.info( "Train Epoch {} dLen {} Batch {} dSize {} localRank {} rank {} {} world {} {}" .format(cur_epoch, data_size, btch, dSize, du.get_local_rank(), du.get_rank(), rankE, du.get_world_size(), worldE)) tot = 0 first = True predsAll = [] labelsAll = [] for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. tot += len(labels) if isinstance(inputs, (list, )): if first: self.logger.info( "rank {} LEN {} {} shape Slow {} Fast {} {} tot {}". format(du.get_rank(), len(labels), len(inputs), inputs[0].shape, inputs[1].shape, labels[0].shape, tot)) first = False for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: if first: self.logger.info( "rank {} LEN {} shape {} {} tot {}".format( du.get_rank(), len(labels), inputs.shape, labels[0].shape, tot)) first = False inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) ite = data_size * cur_epoch + cur_iter if du.is_master_proc(): self.logger.log_row(name='TrainLoss', iter=ite, loss=loss, description="train loss") self.logger.log_row(name='TrainLr', iter=ite, lr=lr, description="train learn rate") else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Binary classifier - save preds / labels for metrics if cfg.MODEL.NUM_CLASSES == 2: predsAll.extend(preds.detach().cpu().numpy()[:, -1]) labelsAll.extend(labels.detach().cpu().numpy()) # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, min(5, cfg.MODEL.NUM_CLASSES))) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. # self.logger.info("UPDATING stat {} {} {}".format(inputs[0].size(0), cfg.NUM_GPUS, inputs[0].size(0) * cfg.NUM_GPUS)) train_meter.update_stats(top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) stats = train_meter.log_iter_stats(cur_epoch, cur_iter, predsAll, labelsAll) ite = dSize * cur_epoch + btch * (cur_iter + 1) self.plotStats(stats, ite, 'TrainIter') train_meter.iter_tic() if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=True) # Log epoch stats. gathered = du.all_gather([ torch.tensor(predsAll).to(torch.device("cuda")), torch.tensor(labelsAll).to(torch.device("cuda")) ]) stats = train_meter.log_epoch_stats(cur_epoch, gathered[0].detach().cpu().numpy(), gathered[1].detach().cpu().numpy()) ite = (cur_epoch + 1) * dSize self.plotStats(stats, ite, 'TrainEpoch') train_meter.reset() end = time.time() el = end - start totAll = du.all_reduce([torch.tensor(tot).cuda()], average=False) tSum = totAll[0].item() elT = torch.tensor(el).cuda() elMax = du.all_reduce([elT], op=dist.ReduceOp.MAX, average=False)[0].item() jobRate = tSum / elMax self.logger.info( "totSampCnt {} workerSampCnt {} eTimeMax {} eTimeWorker {} SampPerSecJob {:.1f} SampPerSecWorker {:.1f}" .format(tSum, tot, elMax, el, jobRate, tot / el)) return jobRate
question = sample_batched['question_dict']['question'] ans = sample_batched['question_dict']['ans'] break print("Model") vocab_len = dataset.get_vocab_len() ans_vocab_len = dataset.get_ans_vocab_len() vocab = dataset.get_vocab() name = cfg.MODEL.MODEL_NAME model = MODEL_REGISTRY.get(name)(cfg, vocab_len, ans_vocab_len, vocab) print("Embedding layer: ") print(model.embed_layer.weight) print("Pass through model") print("Question = {}".format(question)) if is_des: pred_des_ans = model(question, True) print("Model output = {}".format(pred_des_ans)) des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") loss = des_loss_fun(pred_des_ans, ans) else: pred_mc_ans = model(question, False) print("Model output = {}".format(pred_mc_ans)) mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") loss = mc_loss_fun(pred_mc_ans, ans) print("Loss = {}".format(loss)) loss.backward() print("Embed Grad 0:5:") print(model.embed_layer.weight.grad[0:5])
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, test_imp=False): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (ClevrerTrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ test_counter = 0 # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, sampled_batch in enumerate(train_loader): #Samples 2 batches. One for des and one for mc #There are much more des, then some batches are only des des_batch = sampled_batch['des'] des_q = des_batch['question_dict']['question'] des_ans = des_batch['question_dict']['ans'] des_len = des_batch['question_dict']['len'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() des_len = des_len.cuda(non_blocking=True) has_mc = sampled_batch['has_mc'][0] if has_mc: mc_batch = sampled_batch['mc'] mc_q = mc_batch['question_dict']['question'] mc_ans = mc_batch['question_dict']['ans'] mc_len = mc_batch['question_dict']['len'] if cfg.NUM_GPUS: mc_q = mc_q.cuda(non_blocking=True) mc_ans = mc_ans.cuda() mc_len = mc_len.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() #Separated batches #Des pred_des_ans = model(des_q, True) des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") loss = des_loss_fun(pred_des_ans, des_ans) # check Nan Loss. misc.check_nan_losses(loss) #Backward pass optimizer.zero_grad() loss.backward() optimizer.step() #Save for stats loss_des_val = loss #MC loss_mc_val = None if has_mc: pred_mc_ans = model(mc_q, False) mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") loss = mc_loss_fun(pred_mc_ans, mc_ans) #Multiply by 4 # check Nan Loss. misc.check_nan_losses(loss) #Backward pass optimizer.zero_grad() loss.backward() optimizer.step() #Save for stats loss_mc_val = loss # #Non separated Not updated for same batch 2 questions: # pred_des_ans = model(des_q, True) # pred_mc_ans = model(mc_q, False) # # Explicitly declare reduction to mean. # des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") # mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") # # Compute the loss. # loss_des_val = des_loss_fun(pred_des_ans, des_ans) # loss_mc_val = mc_loss_fun(pred_mc_ans, mc_ans) # loss = loss_mc_val + loss_des_val # # check Nan Loss. # misc.check_nan_losses(loss) # # Perform the backward pass. # optimizer.zero_grad() # loss.backward() # # Update the parameters. # optimizer.step() top1_err, top5_err = None, None # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) top1_err, top5_err = [(1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct] if has_mc: diff_mc_ans = torch.abs( mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float()) #Errors mc_opt_err = 100 * torch.true_divide(diff_mc_ans.sum(), (4 * mc_q.size()[0])) mc_q_err = 100 * torch.true_divide( (diff_mc_ans.sum(dim=1, keepdim=True) != 0).float().sum(), mc_q.size()[0]) # Copy the stats from GPU to CPU (sync point). loss_des_val, loss_mc_val, top1_err, top5_err, mc_opt_err, mc_q_err = ( loss_des_val.item(), loss_mc_val.item(), top1_err.item(), top5_err.item(), mc_opt_err.item(), mc_q_err.item()) mb_size_mc = mc_q.size()[0] else: mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = (loss_des_val.item(), top1_err.item(), top5_err.item()) #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, lr, mb_size # Update and log stats. train_meter.update_stats(top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, loss_mc_val, lr, des_q.size()[0], mb_size_mc) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() #For testing implementation if test_imp: print(" --- Descriptive questions results --- ") # print("Des_q") # print(des_q) print("Des_ans") print(des_ans) #print("Des_ans_pred") #print(pred_des_ans) print("Argmax => prediction") print(torch.argmax(pred_des_ans, dim=1, keepdim=False)) print("Top1_err and Top5err") print(top1_err, top5_err) print("Loss_des_val = {}".format(loss_des_val)) if has_mc: print(" --- Multiple Choice questions results --- ") # print("Mc_q") # print(mc_q) # print("Mc errors pred x ans") # print(torch.abs(mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float())) print("mc_opt_err = {} \nmc_q_err = {}".format( mc_opt_err, mc_q_err)) print("Loss_mc_val = {}".format(loss_mc_val)) test_counter += 1 if test_counter == 4: break # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) else: # Compute the errors. num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats(top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, test_imp=False): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ClevrerValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ test_counter = 0 # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, sampled_batch in enumerate(val_loader): frames = sampled_batch['frames'] des_q = sampled_batch['question_dict']['question'] des_ans = sampled_batch['question_dict']['ans'] # des_len = sampled_batch['question_dict']['len'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(frames, (list,)): for i in range(len(frames)): frames[i] = frames[i].cuda(non_blocking=True) else: frames = frames.cuda(non_blocking=True) des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() # des_len = des_len.cuda(non_blocking=True) val_meter.data_toc() # Explicitly declare reduction to mean. des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") pred_des_ans = model(frames, des_q, True) loss_des_val = des_loss_fun(pred_des_ans, des_ans) # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [ (1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct ] loss_mc_val = None mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = ( loss_des_val.item(), top1_err.item(), top5_err.item() ) val_meter.iter_toc() #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, mb_size_des, mb_size_mc # Update and log stats. val_meter.update_stats( top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, loss_mc_val, des_q.size()[0], mb_size_mc ) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() #For testing implementation if test_imp: print(" --- Descriptive questions results --- ") # print("Des_q") # print(des_q) print("Des_ans") print(des_ans) #print("Des_ans_pred") #print(pred_des_ans) print("Argmax => prediction") print(torch.argmax(pred_des_ans, dim=1, keepdim=False)) print("Top1_err and Top5err") print(top1_err, top5_err) print("Loss_des_val = {}".format(loss_des_val)) test_counter += 1 if test_counter == 1: break # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) val_meter.reset()
def train_epoch(train_loader, model, optimizer, scaler, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) if cfg.MIXUP.ENABLE: mixup_fn = MixUp( mixup_alpha=cfg.MIXUP.ALPHA, cutmix_alpha=cfg.MIXUP.CUTMIX_ALPHA, mix_prob=cfg.MIXUP.PROB, switch_prob=cfg.MIXUP.SWITCH_PROB, label_smoothing=cfg.MIXUP.LABEL_SMOOTH_VALUE, num_classes=cfg.MODEL.NUM_CLASSES, ) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.MIXUP.ENABLE: samples, labels = mixup_fn(inputs[0], labels) inputs[0] = samples with torch.cuda.amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION): if cfg.DETECTION.ENABLE: preds = model(inputs, meta["boxes"]) else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() scaler.scale(loss).backward() # Unscales the gradients of optimizer's assigned params in-place scaler.unscale_(optimizer) # Clip gradients if necessary if cfg.SOLVER.CLIP_GRAD_VAL: torch.nn.utils.clip_grad_value_(model.parameters(), cfg.SOLVER.CLIP_GRAD_VAL) elif cfg.SOLVER.CLIP_GRAD_L2NORM: torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.SOLVER.CLIP_GRAD_L2NORM) # Update the parameters. scaler.step(optimizer) scaler.update() if cfg.MIXUP.ENABLE: _top_max_k_vals, top_max_k_inds = torch.topk(labels, 2, dim=1, largest=True, sorted=True) idx_top1 = torch.arange(labels.shape[0]), top_max_k_inds[:, 0] idx_top2 = torch.arange(labels.shape[0]), top_max_k_inds[:, 1] preds = preds.detach() preds[idx_top1] += preds[idx_top2] preds[idx_top2] = 0.0 labels = top_max_k_inds[:, 0] if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() # Check if the correct params are set to requires_grad = True assert_requires_grad_correctness(model, du.is_master_proc(), cfg) train_meter.iter_tic() data_size = len(train_loader) np.set_printoptions(suppress=True) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.MODEL.HEAD_ACT == "softmax" and cfg.TRAIN.DATASET == "custom": # We have to change our labels to long tensor labels = labels.type(torch.LongTensor) labels = labels.cuda() # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr, cfg) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"], is_train=True) else: # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) """ if cur_iter % 70 == 0: softmax = torch.nn.Softmax(dim=1) probabilities = softmax(preds) loss_prob = loss_fun(probabilities, labels) preds_numpy = probabilities.cpu().detach().numpy() preds_numpy = np.round(preds_numpy, 4) labels_numpy = labels.cpu().detach().numpy() print("--------------------------") for label, pred in zip (labels_numpy, preds_numpy): print(str(label) + "---->", end= "") print(pred[label]) """ # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() # Todo: adjust accordingly if cfg.DETECTION.ENABLE: #and not (cfg.MODEL.HEAD_ACT == "softmax"): if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( {"Train/loss": loss, "Train/lr": lr}, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) top1_err, top5_err = [ (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct ] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err] ) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, student_model, teacher_model, optimizer, train_meter, cur_epoch, cfg, writer=None ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. teacher_model.eval() student_model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta, _) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. student_preds, student_features = student_model(inputs.copy(), meta["boxes"]) with torch.no_grad(): teacher_preds, teacher_features = teacher_model(inputs.copy(), meta["boxes"]) else: # Perform the forward pass. student_preds, student_features = student_model(inputs.copy()) with torch.no_grad(): teacher_preds, teacher_features = teacher_model(inputs.copy()) # Explicitly declare reduction to mean. # L2 loss for featuremap difference loss_mse_func = losses.get_loss_func('mse')(reduction="mean") # Cross entropy loss for prediction loss_pred_func = losses.get_loss_func('cross_entropy')(reduction="mean") # kl-divergence loss loss_kl_func = losses.get_loss_func('kl_divergence')(reduction="batchmean") T = cfg.KD.TEMPERATURE alpha = cfg.KD.ALPHA loss_pred = loss_pred_func(student_preds, labels) * (1. - alpha) loss_mse = [] loss_kl = [] for s_features, t_features in zip(student_features, teacher_features): for i in range(2): #mse loss loss_mse.append(loss_mse_func(s_features[i], t_features[i]) * (alpha * T * T)) #kl divergence loss b, c, t, h, w = s_features[i].shape s_feature = s_features[i].permute(0, 2, 3, 4, 1).contiguous().view(b*t*h*w, c) t_feature = t_features[i].permute(0, 2, 3, 4, 1).contiguous().view(b*t*h*w, c) loss_kl.append(loss_kl_func(F.log_softmax(s_feature/T, dim = 0), F.softmax(t_feature/T, dim = 0)) * (alpha * T * T)) #TOTAL LOSS = sum of all losses loss = loss_pred + sum(loss_mse) + sum(loss_kl) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available.T if writer is not None: writer.add_scalars( {"Train/loss": loss, "Train/lr": lr, "Train/mse": sum(loss_mse), "Train/loss_kl": sum(loss_kl), "Train/loss_pred": loss_pred}, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct(student_preds, labels, (1, 5)) top1_err, top5_err = [ (1.0 - x / student_preds.size(0)) * 100.0 for x in num_topks_correct ] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err] ) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, "Train/mse": sum(loss_mse), "Train/loss_kl": sum(loss_kl), "Train/loss_pred": loss_pred, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, model, optimizer, scaler, train_meter, cur_epoch, cfg, writer=None, ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) if cfg.MIXUP.ENABLE: mixup_fn = MixUp( mixup_alpha=cfg.MIXUP.ALPHA, cutmix_alpha=cfg.MIXUP.CUTMIX_ALPHA, mix_prob=cfg.MIXUP.PROB, switch_prob=cfg.MIXUP.SWITCH_PROB, label_smoothing=cfg.MIXUP.LABEL_SMOOTH_VALUE, num_classes=cfg.MODEL.NUM_CLASSES, ) iters_noupdate = 0 if cfg.MODEL.MODEL_NAME == "ContrastiveModel" and cfg.CONTRASTIVE.TYPE == "moco": assert cfg.CONTRASTIVE.QUEUE_LEN % (cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS) == 0 iters_noupdate = (cfg.CONTRASTIVE.QUEUE_LEN // cfg.TRAIN.BATCH_SIZE // cfg.NUM_SHARDS) if cfg.MODEL.FROZEN_BN: misc.frozen_bn_stats(model) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # import ipdb; ipdb.set_trace() profiler.log_tic("loop_time") for cur_iter, (inputs, labels, index, time, meta) in enumerate(train_loader): if not isinstance(inputs, list): inputs = [inputs] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): if isinstance(inputs[i], (list, )): for j in range(len(inputs[i])): # inputs[i][j] = inputs[i][j].cuda(non_blocking=True) inputs[i][j] = inputs[i][j].to("cuda:0") else: # inputs[i] = inputs[i].cuda(non_blocking=True) inputs[i] = inputs[i].to("cuda:0") else: # inputs = inputs.cuda(non_blocking=True) inputs = inputs.to("cuda:0") # labels = labels.cuda() labels = labels.to("cuda:0") for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) index = index.to("cuda:0") time = time.to("cuda:0") batch_size = (inputs[0][0].size(0) if isinstance(inputs[0], list) else inputs[0].size(0)) # Update the learning rate. epoch_exact = cur_epoch + float(cur_iter) / data_size lr = optim.get_epoch_lr(epoch_exact, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.MIXUP.ENABLE: samples, labels = mixup_fn(inputs[0], labels) inputs[0] = samples with torch.cuda.amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION): # Explicitly declare reduction to mean. perform_backward = True optimizer.zero_grad() if cfg.MODEL.MODEL_NAME == "ContrastiveModel": ( model, preds, partial_loss, perform_backward, ) = contrastive_forward(model, cfg, inputs, index, time, epoch_exact, scaler) elif cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: profiler.log_tic("model_time") preds = model(inputs) profiler.log_toc("model_time", shape=inputs[0].shape) if cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel": labels = torch.zeros(preds.size(0), dtype=labels.dtype, device=labels.device) if cfg.MODEL.MODEL_NAME == "ContrastiveModel" and partial_loss: loss = partial_loss else: # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) if perform_backward: # print("Running backward!") scaler.scale(loss).backward() # Unscales the gradients of optimizer's assigned params in-place scaler.unscale_(optimizer) # Clip gradients if necessary if cfg.SOLVER.CLIP_GRAD_VAL: torch.nn.utils.clip_grad_value_(model.parameters(), cfg.SOLVER.CLIP_GRAD_VAL) elif cfg.SOLVER.CLIP_GRAD_L2NORM: torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.SOLVER.CLIP_GRAD_L2NORM) model = cancel_swav_gradients(model, cfg, epoch_exact) if cur_iter < iters_noupdate and cur_epoch == 0: # for e.g. MoCo logger.info("Not updating parameters {}/{}".format( cur_iter, iters_noupdate)) else: # print("Updating optimizer!") # Update the parameters. scaler.step(optimizer) scaler.update() if cfg.MIXUP.ENABLE: _top_max_k_vals, top_max_k_inds = torch.topk(labels, 2, dim=1, largest=True, sorted=True) idx_top1 = torch.arange(labels.shape[0]), top_max_k_inds[:, 0] idx_top2 = torch.arange(labels.shape[0]), top_max_k_inds[:, 1] preds = preds.detach() preds[idx_top1] += preds[idx_top2] preds[idx_top2] = 0.0 labels = top_max_k_inds[:, 0] if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss.detach(), top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, batch_size * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) torch.cuda.synchronize() train_meter.iter_toc() # do measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) torch.cuda.synchronize() train_meter.iter_tic() profiler.log_toc("loop_time", shape=inputs[0].shape) profiler.log_tic("loop_time") profiler.report(25) # profiler.blahblah del inputs # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) regr_list = [] num_list = [] top_list = [] for cur_iter, (inputs, labels, _) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (list, )): for i in range(len(labels)): labels[i] = labels[i].cuda(non_blocking=True) labels = torch.stack((labels)) else: labels = labels.cuda(non_blocking=True) if cfg.MODEL.LOSS_FUNC == 'mse': labels = labels.float() lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() top1_err = None # Compute the errors. num_classes = cfg.MODEL.NUM_CLASSES if cfg.DATA.LABELS_TYPE == 'regression': ln = (labels.size(1) - 1) // 2 + 1 pr = preds[:, ln:].reshape(-1, 5) lb = labels[:, ln:].reshape(-1) num_topks_correct = metrics.topks_correct(pr, lb, (1, )) top1_err = (1.0 - num_topks_correct[0] / len(lb)) * 100.0 regr = ((preds[:, 0] - labels[:, 0])**2).mean() numbers = ((preds[:, 1:ln] - labels[:, 1:ln])**2).mean() if cfg.NUM_GPUS > 1: regr, numbers = du.all_reduce([regr, numbers]) regr_list.append(regr.item()) num_list.append(numbers.item()) elif cfg.DATA.LABELS_TYPE == 'length': regr = ((preds[:, 0] - labels[:, 0])**2).mean() numbers = ((preds[:, 1:] - labels[:, 1:])**2).mean() if cfg.NUM_GPUS > 1: regr, numbers = du.all_reduce([regr, numbers]) regr_list.append(regr.item()) num_list.append(numbers.item()) num_topks_correct = metrics.topks_correct(preds, labels, (1, )) top1_err = num_topks_correct[0] * 0.0 elif cfg.DATA.LABELS_TYPE == 'stend': top1_err = loss.clone() # sigm = torch.nn.Sigmoid() # start = sigm(preds[:, 0]).cpu().detach().numpy() # end = sigm(preds[:, 1]).cpu().detach().numpy() else: num_topks_correct = metrics.topks_correct(preds, labels, (1, )) preds_ix = preds.size(2) * preds.size( 0) if cfg.DATA.LABELS_TYPE == 'mask' else preds.size(1) top1_err = (1.0 - num_topks_correct[0] / preds_size) * 100.0 # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err = du.all_reduce([loss, top1_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err = (loss.item(), top1_err.item()) top_list.append(top1_err) train_meter.iter_toc() # Update and log stats. train_meter.update_stats(top1_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() if cfg.DATA.LABELS_TYPE == 'regression' or cfg.DATA.LABELS_TYPE == 'length': print('---------------------') print( f'LOSS VALUES!!: SIZE_LOSS:{np.mean(regr_list)} NUM_LOSS:{np.mean(num_list)} CLASS_LOSS:{np.mean(top_list)}' ) print('---------------------') # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() regr_list = [] num_list = [] top_list = [] for cur_iter, (inputs, labels, _) in enumerate(val_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (list, )): for i in range(len(labels)): labels[i] = labels[i].cuda(non_blocking=True) labels = torch.stack((labels)) else: labels = labels.cuda(non_blocking=True) preds = model(inputs) if cfg.DATA.LABELS_TYPE == 'regression': pr = preds[:, ln:].reshape(-1, 5) lb = labels[:, ln:].reshape(-1) num_topks_correct = metrics.topks_correct(pr, lb, (1, )) top1_err = (1.0 - num_topks_correct[0] / len(lb)) * 100.0 regr = ((preds[:, 0] - labels[:, 0])**2).mean() numbers = ((preds[:, 1:ln] - labels[:, 1:ln])**2).mean() if cfg.NUM_GPUS > 1: regr, numbers = du.all_reduce([regr, numbers]) regr_list.append(regr.item()) num_list.append(numbers.item()) elif cfg.DATA.LABELS_TYPE == 'length': regr = ((preds[:, 0] - labels[:, 0])**2).mean() numbers = ((preds[:, 1:] - labels[:, 1:])**2).mean() if cfg.NUM_GPUS > 1: regr, numbers = du.all_reduce([regr, numbers]) regr_list.append(regr.item()) num_list.append(numbers.item()) num_topks_correct = metrics.topks_correct(preds, labels, (1, )) top1_err = num_topks_correct[0] * 0.0 elif cfg.DATA.LABELS_TYPE == 'stend': loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") loss = loss_fun(preds, labels) top1_err = loss.clone() else: num_topks_correct = metrics.topks_correct(preds, labels, (1, )) preds_ix = preds.size(2) * preds.size( 0) if cfg.DATA.LABELS_TYPE == 'mask' else preds.size(1) top1_err = (1.0 - num_topks_correct[0] / preds_size) * 100.0 # num_topks_correct = metrics.topks_correct(preds, labels, (1, )) # # Combine the errors across the GPUs. # preds_ix = 2 if cfg.DATA.LABELS_TYPE == 'mask' else 1 # top1_err = (1.0 - num_topks_correct[0] / preds.size(preds_ix)) * 100.0 if cfg.NUM_GPUS > 1: top1_err = du.all_reduce([top1_err])[0] # Copy the errors from GPU to CPU (sync point). top1_err = top1_err.item() top_list.append(top1_err) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(top1_err, inputs[0].size(0) * cfg.NUM_GPUS) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() if cfg.DATA.LABELS_TYPE == 'regression' or cfg.DATA.LABELS_TYPE == 'length': print('---------------------') print( f'VALIDATE LOSS!!: SIZE_LOSS:{np.mean(regr_list):.5} NUM_LOSS:{np.mean(num_list):.5} CLASS_LOSS:{np.mean(top_list):.5}' ) print('---------------------') # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) val_meter.reset()
def __init__(self, cfg): super(ContrastiveModel, self).__init__() # Construct the model. self.backbone = _MODEL_TYPES[cfg.MODEL.ARCH](cfg) self.type = cfg.CONTRASTIVE.TYPE self.T = cfg.CONTRASTIVE.T self.dim = cfg.CONTRASTIVE.DIM self.length = cfg.CONTRASTIVE.LENGTH self.k = cfg.CONTRASTIVE.QUEUE_LEN self.mmt = cfg.CONTRASTIVE.MOMENTUM self.momentum_annealing = cfg.CONTRASTIVE.MOMENTUM_ANNEALING self.duration = 1 self.cfg = cfg self.num_gpus = cfg.NUM_GPUS self.l2_norm = Normalize() self.knn_num_imgs = 0 self.knn_on = cfg.CONTRASTIVE.KNN_ON self.train_labels = np.zeros((0, ), dtype=np.int32) self.num_pos = 2 self.num_crops = (self.cfg.DATA.TRAIN_CROP_NUM_TEMPORAL * self.cfg.DATA.TRAIN_CROP_NUM_SPATIAL) self.nce_loss_fun = losses.get_loss_func("contrastive_loss")( reduction="mean") assert self.cfg.MODEL.LOSS_FUNC == "contrastive_loss" self.softmax = nn.Softmax(dim=1).cuda() if self.type == "mem": self.mem_type = cfg.CONTRASTIVE.MEM_TYPE if self.mem_type == "1d": self.memory = Memory1D(self.length, self.duration, self.dim, cfg) else: self.memory = Memory(self.length, self.duration, self.dim, cfg) self.examplar_type = "video" self.interp = cfg.CONTRASTIVE.INTERP_MEMORY elif self.type == "self": pass elif self.type == "moco" or self.type == "byol": # MoCo components self.backbone_hist = _MODEL_TYPES[cfg.MODEL.ARCH](cfg) for p in self.backbone_hist.parameters(): p.requires_grad = False self.register_buffer("ptr", torch.tensor([0])) self.ptr.requires_grad = False stdv = 1.0 / math.sqrt(self.dim / 3) self.register_buffer( "queue_x", torch.rand(self.k, self.dim).mul_(2 * stdv).add_(-stdv), ) self.register_buffer("iter", torch.zeros([1], dtype=torch.long)) self._batch_shuffle_on = (False if ("sync" in cfg.BN.NORM_TYPE and cfg.BN.NUM_SYNC_DEVICES == cfg.NUM_GPUS) or self.type == "byol" else True) elif self.type == "swav": self.swav_use_public_code = True if self.swav_use_public_code: self.swav_prototypes = nn.Linear( self.dim, 1000, bias=False) # for orig implementation else: self.swav_prototypes = nn.Parameter( torch.randn((self.dim, 1000), dtype=torch.float)) self.swav_eps_sinkhorn = 0.05 self.swav_use_the_queue = False # optionally starts a queue if self.cfg.CONTRASTIVE.SWAV_QEUE_LEN > 0: self.register_buffer( "queue_swav", torch.zeros( 2, # = args.crops_for_assign self.cfg.CONTRASTIVE.SWAV_QEUE_LEN // du.get_world_size(), self.dim, ), ) elif self.type == "simclr": self._simclr_precompute_pos_neg_mask_multi() self.simclr_dist_on = cfg.CONTRASTIVE.SIMCLR_DIST_ON # self.knn_mem = Memory1D(self.length, 1, self.dim, cfg) # does not work if self.knn_on: self.knn_mem = Memory(self.length, 1, self.dim, cfg)
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (ClevrerTrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, sampled_batch in enumerate(train_loader): frames = sampled_batch['frames'] des_q = sampled_batch['question_dict']['des_q'] des_ans = sampled_batch['question_dict']['des_ans'] mc_q = sampled_batch['question_dict']['mc_q'] mc_ans = sampled_batch['question_dict']['mc_ans'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: frames = frames.cuda(non_blocking=True) des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() mc_q = mc_q.cuda(non_blocking=True) mc_ans = mc_ans.cuda() # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() pred_des_ans = model(frames, des_q, True) pred_mc_ans = model(frames, mc_q, False) # Explicitly declare reduction to mean. des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") # Compute the loss. loss = des_loss_fun(pred_des_ans, des_ans) loss += mc_loss_fun(pred_mc_ans, mc_ans) # check Nan Loss. misc.check_nan_losses(loss) #Check if plateau # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() top1_err, top5_err = None, None # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) top1_err, top5_err = [(1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct] diff_mc_ans = torch.abs( mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float()) #Errors mc_opt_err = 100 * torch.true_divide(diff_mc_ans.sum(), (4 * des_q.size()[0])) mc_q_err = 100 * torch.true_divide( (diff_mc_ans.sum(dim=1, keepdim=True) != 0).float().sum(), des_q.size()[0]) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err, mc_opt_err, mc_q_err = du.all_reduce( [loss, top1_err, top5_err, mc_opt_err, mc_q_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err, mc_opt_err, mc_q_err = (loss.item(), top1_err.item(), top5_err.item(), mc_opt_err.item(), mc_q_err.item()) # Update and log stats. train_meter.update_stats( top1_err, top5_err, mc_opt_err, mc_q_err, loss, lr, frames.size()[0] * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, "Train/mc_opt_err": mc_opt_err, "Train/mc_q_err": mc_q_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() # _________________________ save model test __________________________________________ if cur_iter % 100 == 1: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_iter, cfg) # cur_epoch print("----------------------- save done ") # exit(0) # _________________________________________________________________________________________ if cfg.DETECTION.ENABLE: # inputs[4,3,8,224,224], preds[32,2048,7,7] # change {1,3,8,224,224] -> [8,3,224,224] ################################################################################## inputs0 = inputs[0].squeeze(0).permute(1, 0, 2, 3) inputs1 = inputs[1].squeeze(0).permute(1, 0, 2, 3) meta["boxes"] = meta["boxes"].unsqueeze(0).unsqueeze(0) inputs = [inputs0, inputs1] preds = model(inputs, meta["boxes"]) # ################################################################################################################################# # import os # weights = 'checkpoints/checkpoint_epoch_00007.pyth' # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # chkpt = torch.load(weights, map_location=device) # try: # model_dict = model.module.state_dict() # except AttributeError: # model_dict = model.state_dict() # 读取原始状态及参数, ## 多GPU训练,导致训练存储的模型时key会加上model # # 将pretrained_dict里不属于model_dict的键剔除掉 # chkpt = {k: v for k, v in chkpt.items() if k in model_dict} # print("load pretrain model") # model_dict.update(chkpt) # model.load_state_dict(model_dict) # model.to(device) # # inputs = [inputs.to(device)] # model.eval() # input_tensor = (inputs, meta["boxes"].to(device)) # traced_script_module = torch.jit.trace(model, input_tensor) # traced_script_module.save("weights/sf_pytorch.pt") # print("************************* out put save **********************************") # exit(0) ############################################################################################## else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None, wandb_log=False): """ Perform the audio training for one epoch. Args: train_loader (loader): audio training loader. model (model): the audio model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() if cfg.BN.FREEZE: model.module.freeze_fn( 'bn_statistics') if cfg.NUM_GPUS > 1 else model.freeze_fn( 'bn_statistics') train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (dict, )): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() # preds = model(inputs) #this is how model.forward() is called preds = model(inputs)[ 0] #this is the original output, the output of the last layer linear_layer_output = model(inputs)[1] if isinstance(labels, (dict, )): # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss_verb = loss_fun(preds[0], labels['verb']) loss_noun = loss_fun(preds[1], labels['noun']) loss = 0.5 * (loss_verb + loss_noun) # check Nan Loss. misc.check_nan_losses(loss) else: #I believe this is the VGG loss part, as the labels are not split into nouns and verbs # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Embedding loss function. emb_loss_fun = losses.get_loss_func( cfg.MODEL.EMB_LOSS_FUNC)(reduction="mean") # Compute the loss for the main model. loss = loss_fun(preds, labels) # Compute the loss for the embeddings. emb_loss = emb_loss_fun(linear_layer_output, word_embedding) # Add the losses together- use embeddings to fine tune the model's objective loss = loss + emb_loss # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if isinstance(labels, (dict, )): # Compute the verb accuracies. verb_top1_acc, verb_top5_acc = metrics.topk_accuracies( preds[0], labels['verb'], (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_verb, verb_top1_acc, verb_top5_acc = du.all_reduce( [loss_verb, verb_top1_acc, verb_top5_acc]) # Copy the stats from GPU to CPU (sync point). loss_verb, verb_top1_acc, verb_top5_acc = ( loss_verb.item(), verb_top1_acc.item(), verb_top5_acc.item(), ) # Compute the noun accuracies. noun_top1_acc, noun_top5_acc = metrics.topk_accuracies( preds[1], labels['noun'], (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_noun, noun_top1_acc, noun_top5_acc = du.all_reduce( [loss_noun, noun_top1_acc, noun_top5_acc]) # Copy the stats from GPU to CPU (sync point). loss_noun, noun_top1_acc, noun_top5_acc = ( loss_noun.item(), noun_top1_acc.item(), noun_top5_acc.item(), ) # Compute the action accuracies. action_top1_acc, action_top5_acc = metrics.multitask_topk_accuracies( (preds[0], preds[1]), (labels['verb'], labels['noun']), (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, action_top1_acc, action_top5_acc = du.all_reduce( [loss, action_top1_acc, action_top5_acc]) # Copy the stats from GPU to CPU (sync point). loss, action_top1_acc, action_top5_acc = ( loss.item(), action_top1_acc.item(), action_top5_acc.item(), ) # Update and log stats. train_meter.update_stats( (verb_top1_acc, noun_top1_acc, action_top1_acc), (verb_top5_acc, noun_top5_acc, action_top5_acc), (loss_verb, loss_noun, loss), lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None and not wandb_log: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_acc": action_top1_acc, "Train/Top5_acc": action_top5_acc, "Train/verb/loss": loss_verb, "Train/noun/loss": loss_noun, "Train/verb/Top1_acc": verb_top1_acc, "Train/verb/Top5_acc": verb_top5_acc, "Train/noun/Top1_acc": noun_top1_acc, "Train/noun/Top5_acc": noun_top5_acc, }, global_step=data_size * cur_epoch + cur_iter, ) if wandb_log: wandb.log( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_acc": action_top1_acc, "Train/Top5_acc": action_top5_acc, "Train/verb/loss": loss_verb, "Train/noun/loss": loss_noun, "Train/verb/Top1_acc": verb_top1_acc, "Train/verb/Top5_acc": verb_top5_acc, "Train/noun/Top1_acc": noun_top1_acc, "Train/noun/Top5_acc": noun_top5_acc, "train_step": data_size * cur_epoch + cur_iter, }, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None and not wandb_log: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) if wandb_log: wandb.log( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, "train_step": data_size * cur_epoch + cur_iter, }, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None, wandb_log=False): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): if cfg.NUM_GPUS: # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (dict, )): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) val_meter.data_toc() preds = model(inputs) if isinstance(labels, (dict, )): # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss_verb = loss_fun(preds[0], labels['verb']) loss_noun = loss_fun(preds[1], labels['noun']) loss = 0.5 * (loss_verb + loss_noun) # Compute the verb accuracies. verb_top1_acc, verb_top5_acc = metrics.topk_accuracies( preds[0], labels['verb'], (1, 5)) # Combine the errors across the GPUs. if cfg.NUM_GPUS > 1: loss_verb, verb_top1_acc, verb_top5_acc = du.all_reduce( [loss_verb, verb_top1_acc, verb_top5_acc]) # Copy the errors from GPU to CPU (sync point). loss_verb, verb_top1_acc, verb_top5_acc = ( loss_verb.item(), verb_top1_acc.item(), verb_top5_acc.item(), ) # Compute the noun accuracies. noun_top1_acc, noun_top5_acc = metrics.topk_accuracies( preds[1], labels['noun'], (1, 5)) # Combine the errors across the GPUs. if cfg.NUM_GPUS > 1: loss_noun, noun_top1_acc, noun_top5_acc = du.all_reduce( [loss_noun, noun_top1_acc, noun_top5_acc]) # Copy the errors from GPU to CPU (sync point). loss_noun, noun_top1_acc, noun_top5_acc = ( loss_noun.item(), noun_top1_acc.item(), noun_top5_acc.item(), ) # Compute the action accuracies. action_top1_acc, action_top5_acc = metrics.multitask_topk_accuracies( (preds[0], preds[1]), (labels['verb'], labels['noun']), (1, 5)) # Combine the errors across the GPUs. if cfg.NUM_GPUS > 1: loss, action_top1_acc, action_top5_acc = du.all_reduce( [loss, action_top1_acc, action_top5_acc]) # Copy the errors from GPU to CPU (sync point). loss, action_top1_acc, action_top5_acc = ( loss.item(), action_top1_acc.item(), action_top5_acc.item(), ) val_meter.iter_toc() # Update and log stats. val_meter.update_stats( (verb_top1_acc, noun_top1_acc, action_top1_acc), (verb_top5_acc, noun_top5_acc, action_top5_acc), inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None and not wandb_log: writer.add_scalars( { "Val/loss": loss, "Val/Top1_acc": action_top1_acc, "Val/Top5_acc": action_top5_acc, "Val/verb/loss": loss_verb, "Val/verb/Top1_acc": verb_top1_acc, "Val/verb/Top5_acc": verb_top5_acc, "Val/noun/loss": loss_noun, "Val/noun/Top1_acc": noun_top1_acc, "Val/noun/Top5_acc": noun_top5_acc, }, global_step=len(val_loader) * cur_epoch + cur_iter, ) if wandb_log: wandb.log( { "Val/loss": loss, "Val/Top1_acc": action_top1_acc, "Val/Top5_acc": action_top5_acc, "Val/verb/loss": loss_verb, "Val/verb/Top1_acc": verb_top1_acc, "Val/verb/Top5_acc": verb_top5_acc, "Val/noun/loss": loss_noun, "Val/noun/Top1_acc": noun_top1_acc, "Val/noun/Top5_acc": noun_top5_acc, "val_step": len(val_loader) * cur_epoch + cur_iter, }, ) val_meter.update_predictions((preds[0], preds[1]), (labels['verb'], labels['noun'])) else: # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) val_meter.iter_toc() # Update and log stats. val_meter.update_stats( top1_err, top5_err, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None and not wandb_log: writer.add_scalars( { "Val/loss": loss, "Val/Top1_err": top1_err, "Val/Top5_err": top5_err, }, global_step=len(val_loader) * cur_epoch + cur_iter, ) if wandb_log: wandb.log( { "Val/loss": loss, "Val/Top1_err": top1_err, "Val/Top5_err": top5_err, "val_step": len(val_loader) * cur_epoch + cur_iter, }, ) val_meter.update_predictions(preds, labels) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # Log epoch stats. is_best_epoch, top1_dict = val_meter.log_epoch_stats(cur_epoch) # write to tensorboard format if available. if writer is not None: all_preds = [pred.clone().detach() for pred in val_meter.all_preds] all_labels = [label.clone().detach() for label in val_meter.all_labels] if cfg.NUM_GPUS: all_preds = [pred.cpu() for pred in all_preds] all_labels = [label.cpu() for label in all_labels] writer.plot_eval(preds=all_preds, labels=all_labels, global_step=cur_epoch) if writer is not None and not wandb_log: if "top1_acc" in top1_dict.keys(): writer.add_scalars( { "Val/epoch/Top1_acc": top1_dict["top1_acc"], "Val/epoch/verb/Top1_acc": top1_dict["verb_top1_acc"], "Val/epoch/noun/Top1_acc": top1_dict["noun_top1_acc"], }, global_step=cur_epoch, ) else: writer.add_scalars( {"Val/epoch/Top1_err": top1_dict["top1_err"]}, global_step=cur_epoch, ) if wandb_log: if "top1_acc" in top1_dict.keys(): wandb.log( { "Val/epoch/Top1_acc": top1_dict["top1_acc"], "Val/epoch/verb/Top1_acc": top1_dict["verb_top1_acc"], "Val/epoch/noun/Top1_acc": top1_dict["noun_top1_acc"], "epoch": cur_epoch, }, ) else: wandb.log({ "Val/epoch/Top1_err": top1_dict["top1_err"], "epoch": cur_epoch }) top1 = top1_dict["top1_acc"] if "top1_acc" in top1_dict.keys( ) else top1_dict["top1_err"] val_meter.reset() return is_best_epoch, top1
def train_epoch(train_loader, model, optimizer, scheduler, train_meter, cur_epoch, cfg, test_imp=False): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (ClevrerTrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, sampled_batch in enumerate(train_loader): frames = sampled_batch['res_ft'] des_q = sampled_batch['question_dict']['question'] des_ans = sampled_batch['question_dict']['ans'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: frames = frames.cuda(non_blocking=True) des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() train_meter.data_toc() #Pass through model.zero_grad() pred_des_ans = model(frames, des_q, True) des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") loss = des_loss_fun(pred_des_ans, des_ans) # check Nan Loss. misc.check_nan_losses(loss) #Backward pass loss.backward() optimizer.step() scheduler.step() #Save for stats loss_des_val = loss top1_err, top5_err = None, None # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) top1_err, top5_err = [(1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct] mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = (loss_des_val.item(), top1_err.item(), top5_err.item()) #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, lr, mb_size # Update and log stats. train_meter.update_stats(top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, None, scheduler.get_last_lr(), des_ans.size(0), mb_size_mc) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, cnt): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() if cfg.BN.FREEZE: model.freeze_fn('bn_statistics') train_meter.iter_tic() data_size = len(train_loader) #for cur_iter, (inputs, bboxs, masks, labels, _, meta) in enumerate(train_loader): for cur_iter, output_dict in enumerate(train_loader): if cfg.EPICKITCHENS.USE_BBOX: inputs = output_dict['inputs'] bboxs = output_dict['bboxs'] masks = output_dict['masks'] labels = output_dict['label'] # output_dict['index'] meta = output_dict['metadata'] else: inputs = output_dict['inputs'] labels = output_dict['label'] meta = output_dict['metadata'] # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (dict,)): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. if cfg.EPICKITCHENS.USE_BBOX: if isinstance(bboxs, (list,)): for i in range(len(bboxs)): bboxs[i] = bboxs[i].cuda(non_blocking=True) masks[i] = masks[i].cuda(non_blocking=True) else: bboxs = bboxs.cuda(non_blocking=True) masks = masks.cuda(non_blocking=True) preds = model(inputs, bboxes=bboxs, masks=masks) else: preds = model(inputs) if isinstance(labels, (dict,)): # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss_verb = loss_fun(preds[0], labels['verb']) loss_noun = loss_fun(preds[1], labels['noun']) loss = 0.5 * (loss_verb + loss_noun) # check Nan Loss. misc.check_nan_losses(loss) else: # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) else: if isinstance(labels, (dict,)): # Compute the verb accuracies. verb_top1_acc, verb_top5_acc = metrics.topk_accuracies(preds[0], labels['verb'], (1, 5)) # predicted_answer_softmax = torch.nn.Softmax(dim=1)(preds[0]) # predicted_answer_max = torch.max(predicted_answer_softmax.data, 1).indices # print(cnt, predicted_answer_max, labels['verb']) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_verb, verb_top1_acc, verb_top5_acc = du.all_reduce( [loss_verb, verb_top1_acc, verb_top5_acc] ) # Copy the stats from GPU to CPU (sync point). loss_verb, verb_top1_acc, verb_top5_acc = ( loss_verb.item(), verb_top1_acc.item(), verb_top5_acc.item(), ) # Compute the noun accuracies. noun_top1_acc, noun_top5_acc = metrics.topk_accuracies(preds[1], labels['noun'], (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_noun, noun_top1_acc, noun_top5_acc = du.all_reduce( [loss_noun, noun_top1_acc, noun_top5_acc] ) # Copy the stats from GPU to CPU (sync point). loss_noun, noun_top1_acc, noun_top5_acc = ( loss_noun.item(), noun_top1_acc.item(), noun_top5_acc.item(), ) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss = du.all_reduce( [loss] ) if isinstance(loss, (list,)): loss = loss[0] # Copy the stats from GPU to CPU (sync point). loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats( (verb_top1_acc, noun_top1_acc), (verb_top5_acc, noun_top5_acc), (loss_verb, loss_noun, loss), lr, inputs[0].size(0) * cfg.NUM_GPUS ) else: # Compute the errors. num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) top1_err, top5_err = [ (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct ] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err] ) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS ) train_meter.log_iter_stats(cur_epoch, cur_iter, cnt) train_meter.iter_tic() cnt += 1 # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset() return cnt
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep, cfg): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) global_iters = data_size * cur_epoch for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if len(inputs[i].shape) > 5: labels = torch.repeat_interleave(labels, inputs[i].size(1), 0) for i in range(len(inputs)): if len(inputs[i].shape) > 5: inputs[i] = inputs[i].view((-1, ) + inputs[i].shape[2:]) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, global_iters, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. if 'masks' in meta: preds = model((inputs, meta['masks'])) else: preds = model(inputs) #################################################################################################################################### # check activations #################################################################################################################################### # if writer is not None and global_iters%cfg.SUMMARY_PERIOD==0: # bu_errors = preds['bu_errors']#.cpu()#.data.numpy().squeeze() # for layer in range(len(bu_errors)): # images = bu_errors[layer].transpose(1,2).transpose(0,1) # images = (images-images.min()) # images = images/images.max() # images = images.reshape((-1,) + images.shape[2:]) # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/bu_error_l%d'%layer, grid, global_iters) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_bu_errors_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) # mix_out = preds['mix_layer']#.cpu().data.numpy().squeeze() # for layer in range(len(mix_out)): # images = mix_out[layer].transpose(1,2).transpose(0,1) # images = images.reshape((-1,) + images.shape[2:]) # images = (images-images.min()) # images = images/images.max() # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/mix_layer_l%d'%layer, grid, global_iters) # # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_mix_layer_l%d.jpg'%(i,layer)), nrow=18, normalize=True) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_mix_layer_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) # inhibition = preds['H_inh']#.cpu()#.data.numpy().squeeze() # for layer in range(len(inhibition)): # images = inhibition[layer].transpose(1,2).transpose(0,1) # images = (images-images.min()) # images = images/images.max() # images = images.reshape((-1,) + images.shape[2:]) # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/H_inh_l%d'%layer, grid, global_iters) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_H_inh_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) # hidden = preds['hidden']#.cpu()#.data.numpy().squeeze() # for layer in range(len(hidden)): # images = hidden[layer].transpose(1,2).transpose(0,1) # images = (images-images.min()) # images = images/images.max() # images = images.reshape((-1,) + images.shape[2:]) # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/hidden_l%d'%layer, grid, global_iters) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_hidden_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) out_keys = preds.keys() total_loss = 0 if cfg.PREDICTIVE.ENABLE: errors = preds['pred_errors'] if 'frame_errors' in preds: frame_errors = preds['frame_errors'] if 'IoU' in preds: iou = preds['IoU'] if 'Acc' in preds: acc = preds['Acc'] pred_loss = errors.mean() total_loss += pred_loss # if 'frame_errors' in out_keys: # total_loss += frame_errors # copy_baseline = F.smooth_l1_loss(inputs[i][:,:,1:] - inputs[i][:,:,:-1], torch.zeros_like(inputs[i][:,:,1:])) # copy_baseline = F.l1_loss(inputs[i][:,:,1:] - inputs[i][:,:,:-1], torch.zeros_like(inputs[i][:,:,1:])) if cfg.PREDICTIVE.CPC: cpc_loss = preds['cpc_loss'] total_loss += cpc_loss if 'cbp_penalty' in preds: penalty = preds['cbp_penalty'] total_loss += penalty if cfg.SUPERVISED: preds = preds['logits'] if cfg.MODEL.LOSS_FUNC != '': # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) total_loss += loss # check Nan Loss. misc.check_nan_losses(total_loss) # Perform the backward pass. optimizer.zero_grad() total_loss.backward() #################################################################################################################################### # check gradients if writer is not None and global_iters % cfg.SUMMARY_PERIOD == 0: n_p = model.module.named_parameters() if hasattr( model, 'module') else model.named_parameters() fig = viz_helpers.plot_grad_flow_v2(n_p) writer.add_figure('grad_flow/grad_flow', fig, global_iters) #################################################################################################################################### # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(lr, inputs[0].size(0) * cfg.NUM_GPUS, loss=loss) else: if cfg.SUPERVISED: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: if cfg.PREDICTIVE.ENABLE: pred_loss = du.all_reduce([pred_loss]) pred_loss = pred_loss[0] if 'frame_errors' in out_keys: frame_errors = du.all_reduce([frame_errors])[0] if 'IoU' in preds: iou = du.all_reduce([iou])[0] if 'Acc' in preds: acc = du.all_reduce([acc])[0] # copy_baseline = du.all_reduce([copy_baseline]) # copy_baseline = copy_baseline[0] if cfg.PREDICTIVE.CPC: cpc_loss = du.all_reduce([cpc_loss]) cpc_loss = cpc_loss[0] if cfg.SUPERVISED: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) if 'cbp_penalty' in out_keys: penalty = du.all_reduce([penalty])[0] loss_logs = {} if cfg.PREDICTIVE.ENABLE: pred_loss = pred_loss.item() loss_logs['loss_pred'] = pred_loss if 'frame_errors' in out_keys: frame_errors = frame_errors.item() loss_logs['frame_errors'] = frame_errors if 'IoU' in preds: loss_logs['IoU'] = iou.item() if 'Acc' in preds: loss_logs['Acc'] = acc.item() # copy_baseline = copy_baseline.item() # loss_logs['copy_comp'] = copy_baseline if cfg.PREDICTIVE.CPC: cpc_loss = cpc_loss.item() loss_logs['loss_cpc'] = cpc_loss if cfg.SUPERVISED: # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) loss_logs['loss_class'] = loss loss_logs['top5_err'] = top5_err loss_logs['top1_err'] = top1_err if 'cbp_penalty' in out_keys: loss_logs['cbp_penalty'] = penalty.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(lr, inputs[0].size(0) * cfg.NUM_GPUS, **loss_logs) if writer is not None and global_iters % cfg.LOG_PERIOD == 0: for k, v in loss_logs.items(): writer.add_scalar('loss/' + k.strip('loss_'), train_meter.stats[k].get_win_median(), global_iters) if nep is not None and global_iters % cfg.LOG_PERIOD == 0: for k, v in loss_logs.items(): nep.log_metric(k.strip('loss_'), train_meter.stats[k].get_win_median()) nep.log_metric('global_iters', global_iters) # writer.add_scalar('loss/top1_err', train_meter.mb_top1_err.get_win_median(), global_iters) # writer.add_scalar('loss/top5_err', train_meter.mb_top5_err.get_win_median(), global_iters) # writer.add_scalar('loss/loss', train_meter.loss.get_win_median(), global_iters) if global_iters % cfg.SUMMARY_PERIOD == 0 and du.get_rank( ) == 0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS): with torch.no_grad(): # logger.info(inputs[i].shape) # sys.stdout.flush() inputs[0] = inputs[0][:min(3, len(inputs[0]))] if 'masks' in meta: frames = model( (inputs, meta['masks'][:min(3, len(inputs[0]))]), extra=['frames'])['frames'] else: frames = model(inputs, extra=['frames'])['frames'] n_rows = inputs[0].size(2) - 1 inputs = inputs[0].transpose(1, 2)[:, -n_rows:] frames = frames.transpose(1, 2)[:, -n_rows:] inputs = inputs * inputs.new( cfg.DATA.STD)[None, None, :, None, None] + inputs.new( cfg.DATA.MEAN)[None, None, :, None, None] frames = frames * frames.new( cfg.DATA.STD)[None, None, :, None, None] + frames.new( cfg.DATA.MEAN)[None, None, :, None, None] images = torch.cat([inputs, frames], 1).reshape((-1, ) + inputs.shape[2:]) # grid = tv.utils.make_grid(images, nrow=8, normalize=True) # writer.add_image('predictions', images, global_iters) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d.jpg' % global_iters), nrow=n_rows, normalize=True) # del images # del frames # del inputs train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() global_iters += 1 # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, model, optimizer, train_meter, cur_epoch, cfg, test_imp=False ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (ClevrerTrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ test_counter = 0 # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, sampled_batch in enumerate(train_loader): frames = sampled_batch['frames'] des_q = sampled_batch['question_dict']['question'] des_ans = sampled_batch['question_dict']['ans'] # des_len = sampled_batch['question_dict']['len'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(frames, (list,)): for i in range(len(frames)): frames[i] = frames[i].cuda(non_blocking=True) else: frames = frames.cuda(non_blocking=True) des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() # des_len = des_len.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() #Separated batches #Des pred_des_ans = model(frames, des_q, True) des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") loss = des_loss_fun(pred_des_ans, des_ans) # check Nan Loss. misc.check_nan_losses(loss) #Backward pass optimizer.zero_grad() loss.backward() optimizer.step() #Save for stats loss_des_val = loss top1_err, top5_err = None, None # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) top1_err, top5_err = [ (1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct ] mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = ( loss_des_val.item(), top1_err.item(), top5_err.item() ) #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, lr, mb_size # Update and log stats. train_meter.update_stats( top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, None, lr, des_q.size()[0], mb_size_mc ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() #For testing implementation if test_imp: print(" --- Descriptive questions results --- ") # print("Des_q") # print(des_q) print("Des_ans") print(des_ans) #print("Des_ans_pred") #print(pred_des_ans) print("Argmax => prediction") print(torch.argmax(pred_des_ans, dim=1, keepdim=False)) print("Top1_err and Top5err") print(top1_err, top5_err) print("Loss_des_val = {}".format(loss_des_val)) test_counter += 1 if test_counter == 1: break # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, nep, cfg): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds.cpu(), ori_boxes.cpu(), metadata.cpu()) else: preds = model(inputs) aux_loss_keys = [] if cfg.PREDICTIVE.ENABLE: aux_loss_keys.append('pred_errors') errors = preds['pred_errors'] pred_loss = errors.mean() if 'frame_errors' in preds: aux_loss_keys.append('frame_errors') frame_errors = preds['frame_errors'] if cfg.PREDICTIVE.CPC: aux_loss_keys.append('cpc_loss') cpc_loss = preds['cpc_loss'] if cfg.SUPERVISED: preds = preds['logits'] # Explicitly declare reduction to mean. if cfg.MODEL.LOSS_FUNC != '' and cfg.SUPERVISED: loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # total_loss = total_loss + loss # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: if cfg.PREDICTIVE.ENABLE: pred_loss = du.all_reduce([pred_loss])[0] if cfg.PREDICTIVE.CPC: cpc_loss = du.all_reduce([cpc_loss])[0] if cfg.SUPERVISED: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # # Copy the stats from GPU to CPU (sync point). # loss, top1_err, top5_err = ( # loss.item(), # top1_err.item(), # top5_err.item(), # ) # if cfg.NUM_GPUS > 1: # top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). loss_logs = {} if 'loss_pred' in aux_loss_keys: loss_logs['loss_pred'] = pred_loss.item() if 'frame_errors' in aux_loss_keys: loss_logs['frame_errors'] = frame_errors.item() if 'loss_cpc' in aux_loss_keys: loss_logs['loss_cpc'] = cpc_loss.item() if cfg.SUPERVISED: loss_logs['loss_class'] = loss.item() loss_logs['top1_err'] = top1_err.item() loss_logs['top5_err'] = top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats(inputs[0].size(0) * cfg.NUM_GPUS, **loss_logs) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # neptune update if nep is not None: for k, v in loss_logs.items(): nep.log_metric('val_' + k.strip('loss_'), val_meter.stats[k].get_global_avg()) # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) val_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta, boxes, b_indices) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.DETECTION.ENABLE: preds = model(inputs, meta["boxes"]) else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, test_imp=False): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ClevrerValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ test_counter = 0 # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, sampled_batch in enumerate(val_loader): #Samples 2 batches. One for des and one for mc #There are much more des, then some batches are only des des_batch = sampled_batch['des'] des_q = des_batch['question_dict']['question'] des_ans = des_batch['question_dict']['ans'] des_len = des_batch['question_dict']['len'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() des_len = des_len.cuda(non_blocking=True) has_mc = sampled_batch['has_mc'][0] if has_mc: mc_batch = sampled_batch['mc'] mc_q = mc_batch['question_dict']['question'] mc_ans = mc_batch['question_dict']['ans'] mc_len = mc_batch['question_dict']['len'] if cfg.NUM_GPUS: mc_q = mc_q.cuda(non_blocking=True) mc_ans = mc_ans.cuda() mc_len = mc_len.cuda(non_blocking=True) val_meter.data_toc() # Explicitly declare reduction to mean. des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") pred_des_ans = model(des_q, True) loss_des_val = des_loss_fun(pred_des_ans, des_ans) loss_mc_val = None if has_mc: pred_mc_ans = model(mc_q, False) loss_mc_val = mc_loss_fun(pred_mc_ans, mc_ans) # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct] if has_mc: diff_mc_ans = torch.abs( mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float()) #Errors mc_opt_err = 100 * torch.true_divide(diff_mc_ans.sum(), (4 * mc_q.size()[0])) mc_q_err = 100 * torch.true_divide( (diff_mc_ans.sum(dim=1, keepdim=True) != 0).float().sum(), mc_q.size()[0]) # Copy the stats from GPU to CPU (sync point). loss_des_val, loss_mc_val, top1_err, top5_err, mc_opt_err, mc_q_err = ( loss_des_val.item(), loss_mc_val.item(), top1_err.item(), top5_err.item(), mc_opt_err.item(), mc_q_err.item()) mb_size_mc = mc_q.size()[0] else: mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = (loss_des_val.item(), top1_err.item(), top5_err.item()) val_meter.iter_toc() #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, mb_size_des, mb_size_mc # Update and log stats. val_meter.update_stats(top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, loss_mc_val, des_q.size()[0], mb_size_mc) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() #For testing implementation if test_imp: print(" --- Descriptive questions results --- ") # print("Des_q") # print(des_q) print("Des_ans") print(des_ans) #print("Des_ans_pred") #print(pred_des_ans) print("Argmax => prediction") print(torch.argmax(pred_des_ans, dim=1, keepdim=False)) print("Top1_err and Top5err") print(top1_err, top5_err) print("Loss_des_val = {}".format(loss_des_val)) if has_mc: print(" --- Multiple Choice questions results --- ") # print("Mc_q") # print(mc_q) # print("Mc errors pred x ans") # print(torch.abs(mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float())) print("mc_opt_err = {} \nmc_q_err = {}".format( mc_opt_err, mc_q_err)) print("Loss_mc_val = {}".format(loss_mc_val)) test_counter += 1 if test_counter == 4: break # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) val_meter.reset()