def train(self, loader, model, criterion, optimizer, epoch, metrics, args, validate=False): timer = Timer() data_time = AverageMeter() losses = AverageMeter() metrics = [m() for m in metrics] if validate: # switch to evaluate mode model.eval() criterion.eval() iter_size = args.val_size setting = 'Validate Epoch' else: # switch to train mode adjust_learning_rate(args.lr, args.lr_decay_rate, optimizer, epoch) model.train() criterion.train() optimizer.zero_grad() iter_size = args.train_size setting = 'Train Epoch' for i, (input, target, meta) in enumerate(part(loader, iter_size)): if args.synchronous: assert meta['id'][0] == meta['id'][1], "dataset not synced" data_time.update(timer.thetime() - timer.end) if not args.cpu: target = target.cuda(non_blocking=True) output = model(input, meta) if type(output) != tuple: output = (output,) scores, loss, score_target = criterion(*(output + (target, meta))) losses.update(loss.item()) with torch.no_grad(): for m in metrics: m.update(scores, score_target) if not validate: loss.backward() if i % args.accum_grad == args.accum_grad-1: print('updating parameters') optimizer.step() optimizer.zero_grad() timer.tic() if i % args.print_freq == 0: print('[{name}] {setting}: [{0}][{1}/{2}({3})]\t' 'Time {timer.val:.3f} ({timer.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' '{metrics}'.format( epoch, i, int(len(loader)*iter_size), len(loader), name=args.name, setting=setting, timer=timer, data_time=data_time, loss=losses, metrics=' \t'.join(str(m) for m in metrics))) del loss, output, target # make sure we don't hold on to the graph metrics = dict(m.compute() for m in metrics) metrics.update({'loss': losses.avg}) metrics = dict(('val_'+k, v) if validate else ('train_'+k, v) for k, v in metrics.items()) return metrics
def __init__(self): self.am = AverageMeter() self.predictions = [] self.targets = []
def stabilize_all(self, loader, model, epoch, args): timer = Timer() content_losses = AverageMeter() motion_losses = AverageMeter() original_losses = AverageMeter() output_losses = AverageMeter() for i, (inputs, target, meta) in enumerate(loader): if i >= self.num_videos: break if not args.cpu: inputs = inputs.cuda() target = target.cuda(async=True) original = inputs.detach().clone() with torch.enable_grad(): output, content_loss, motion_loss = self.stabilize_video( inputs, model, args) content_losses.update(content_loss) motion_losses.update(motion_loss) # prepare videos original = original[0] output = output[0] original *= torch.Tensor([0.229, 0.224, 0.225])[None, None, None, :].to(original.device) original += torch.Tensor([0.485, 0.456, 0.406])[None, None, None, :].to(original.device) output *= torch.Tensor([0.229, 0.224, 0.225])[None, None, None, :].to(output.device) output += torch.Tensor([0.485, 0.456, 0.406])[None, None, None, :].to(output.device) # save video name = '{}_{}'.format(meta[0]['id'], meta[0]['time']) ffmpeg_video_writer(original.cpu(), '{}/{}_original.mp4'.format(args.cache, name)) ffmpeg_video_writer(output.cpu(), '{}/{}_processed.mp4'.format(args.cache, name)) combined = torch.cat((original.cpu(), output.cpu()), 2) ffmpeg_video_writer(combined, '{}/{}_combined.mp4'.format(args.cache, name)) # calculate stability losses print('calculating stability losses') try: # this can fail when there are no feature matches found original_trajectory = video_trajectory(original.cpu().numpy()) original_losses.update(trajectory_loss(original_trajectory)) output_trajectory = video_trajectory(output.cpu().numpy()) output_losses.update(trajectory_loss(output_trajectory)) except Exception as e: print(e) timer.tic() print( 'Stabilization: [{0}/{1}]\t' 'Time {timer.val:.3f} ({timer.avg:.3f}) Original Loss {2} \t Output Loss {3}' .format(i, self.num_videos, original_losses.avg, output_losses.avg, timer=timer)) scores = { 'stabilization_task_content_loss': content_losses.avg, 'stabilization_task_motion_loss': motion_losses.avg, 'stabilization_task_original_loss': original_losses.avg, 'stabilization_task_output_loss': output_losses.avg } return scores
def train(loader, model, optimizer, epoch, args): timer = Timer() data_time = AverageMeter() loss_meter = AverageMeter() ce_loss_meter = AverageMeter() cur_lr = adjust_learning_rate(args.lr_decay_rate, optimizer, epoch) model.train() optimizer.zero_grad() ce_loss_criterion = nn.CrossEntropyLoss() for i, (input, meta) in tqdm(enumerate(loader), desc="Train Epoch"): if args.debug and i >= debug_short_train_num: break data_time.update(timer.thetime() - timer.end) _batch_size = len(meta) target = [] for _ in range(_batch_size): target.extend(meta[_]["labels"]) target = torch.from_numpy(np.array(target)) input = input.view( _batch_size * 3, input.shape[2], input.shape[3], input.shape[4], input.shape[5], ) metric_feat, output = model(input) ce_loss = ce_loss_criterion(output.cuda(), target.long().cuda()) loss = ce_loss loss.backward() loss_meter.update(loss.item()) ce_loss_meter.update(ce_loss.item()) if i % args.accum_grad == args.accum_grad - 1: optimizer.step() optimizer.zero_grad() if i % args.print_freq == 0 and i > 0: logger.info("[{0}][{1}/{2}]\t" "Dataload_Time={data_time.avg:.3f}\t" "Loss={loss.avg:.4f}\t" "CELoss={ce_loss.avg:.4f}\t" "LR={cur_lr:.7f}\t" "bestAP={ap:.3f}".format( epoch, i, len(loader), data_time=data_time, loss=loss_meter, ce_loss=ce_loss_meter, ap=args.best_score, cur_lr=cur_lr, )) loss_meter.reset() ce_loss_meter.reset()