def main(): args = parser.parse_args() hostname = socket.gethostname() if hostname in ['mars', 'sun']: root = '/mnt/mars-delta/' else: raise 'PLEASE SPECIFY root FOR ' + hostname exp_name = '{}-{}-{}-sl{:02d}-g{:d}-fs{:d}-{}-{:06d}'.format( args.dataset, args.arch, args.input, args.seq_len, args.gap, args.frame_step, args.batch_size, int(args.lr * 1000000)) args.exp_name = exp_name root += args.dataset + '/' args.root = root model_save_dir = root + 'cache/' + exp_name args.model_save_dir = model_save_dir args.global_models_dir = os.path.expanduser(args.global_models_dir) subset = 'val' args.subset = subset input_size, means, stds = get_mean_size(args.arch) print('means ', means) print('stds ', stds) normalize = transforms.Normalize(mean=means, std=stds) val_transform = transforms.Compose([ transforms.Scale(int(input_size * 1.1)), transforms.CenterCrop(int(input_size)), transforms.ToTensor(), normalize, ]) val_dataset = KINETICS(args.root, args.input, val_transform, netname=args.arch, subsets=['val'], exp_name=exp_name, scale_size=int(input_size * 1.1), input_size=int(input_size), frame_step=2, seq_len=args.seq_len, gap=args.gap) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) args.num_classes = val_dataset.num_classes log_fid = open(args.model_save_dir + '/test_log.txt', 'w') log_fid.write(args.exp_name + '\n') for arg in vars(args): print(arg, getattr(args, arg)) log_fid.write(str(arg) + ': ' + str(getattr(args, arg)) + '\n') for test_iteration in [ int(itr) for itr in args.test_iterations.split(',') ]: save_filename = '{:s}/output_{:s}_{:06d}.pkl'.format( args.model_save_dir, subset, test_iteration) if not os.path.isfile(save_filename): print('Models will be cached in ', args.model_save_dir) model, criterion = initialise_model(args) model_file_name = '{:s}/model_{:06d}.pth'.format( args.model_save_dir, test_iteration) print('Loading model from ', model_file_name) log_fid.write('Loading model from ' + model_file_name + '\n') model_dict = torch.load(model_file_name) # if args.ngpu>1: model.load_state_dict(model_dict) # else: # model.load_my_state_dict(model_dict) print('Done loading model') model.eval() log_fid.write(str(model)) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top3 = AverageMeter() # switch to evaluate mode model.eval() torch.cuda.synchronize() end = time.perf_counter() allscores = dict() print('Starting to Iterate') for i, (batch, targets, video_num, frame_nums) in enumerate(val_loader): targets = targets.cuda(async=True) input_var = torch.autograd.Variable(batch.cuda(async=True), volatile=True) target_var = torch.autograd.Variable(targets, volatile=True) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec3 = accuracy(output.data, targets, topk=(1, 3)) losses.update(loss.data[0], batch.size(0)) top1.update(prec1[0], batch.size(0)) top3.update(prec3[0], batch.size(0)) # measure elapsed time torch.cuda.synchronize() batch_time.update(time.perf_counter() - end) end = time.perf_counter() if i % args.print_freq == 0: line = 'Test: [{0}/{1}]' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})' \ 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})' \ 'Prec@3 {top3.val:.3f} ({top3.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top3=top3) print(line) log_fid.write(line + '\n') res_data = output.data.cpu().numpy() # print('video_num type', video_num.type()) # print('frame_num type', frame_nums.type()) for k in range(res_data.shape[0]): videoname = val_dataset.video_list[int(video_num[k])] frame_num = int(frame_nums[k]) if videoname not in allscores.keys(): allscores[videoname] = dict() allscores[videoname]['scores'] = np.zeros( (100, val_dataset.num_classes), dtype=np.float) allscores[videoname]['fids'] = np.zeros(100, dtype=np.int16) allscores[videoname]['count'] = 0 scores = res_data[k, :] count = allscores[videoname]['count'] allscores[videoname]['scores'][count, :] = scores allscores[videoname]['fids'][count] = frame_num allscores[videoname]['count'] += 1 line = ' * Prec@1 {top1.avg:.3f} Prec@3 {top3.avg:.3f}'.format( top1=top1, top3=top3) print(line) log_fid.write(line + '\n') print('Done FRAME LEVEL evaluation Con') for videoname in allscores.keys(): count = allscores[videoname]['count'] allscores[videoname]['scores'] = allscores[videoname][ 'scores'][:count] fids = allscores[videoname]['fids'][:count] sortedfidsinds = np.argsort(fids) fids = fids[sortedfidsinds] allscores[videoname]['scores'] = allscores[videoname][ 'scores'][sortedfidsinds] allscores[videoname]['fids'] = fids with open(save_filename, 'wb') as f: pickle.dump(allscores, f) else: with open(save_filename, 'rb') as f: allscores = pickle.load(f) evaluate(allscores, val_dataset.annot_file, save_filename, subset)
def main(): val_step = 25000 val_steps = [ 5000, ] train_step = 500 args = parser.parse_args() hostname = socket.gethostname() args.stepvalues = [int(val) for val in args.stepvalues.split(',')] exp_name = '{}-{}-{}-sl{:02d}-g{:d}-fs{:d}-{}-{:06d}'.format( args.dataset, args.arch, args.input, args.seq_len, args.gap, args.frame_step, args.batch_size, int(args.lr * 1000000)) args.exp_name = exp_name args.root += args.dataset + '/' model_save_dir = args.root + 'cache/' + exp_name if not os.path.isdir(model_save_dir): os.system('mkdir -p ' + model_save_dir) args.model_save_dir = model_save_dir args.global_models_dir = os.path.expanduser(args.global_models_dir) if args.visdom: import visdom viz = visdom.Visdom() ports = {'mars': 8097, 'sun': 8096} viz.port = ports[hostname] viz.env = exp_name # initialize visdom loss plot loss_plot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 2)).cpu(), opts=dict(xlabel='Iteration', ylabel='Losses', title='Train & Val Losses', legend=['Train-Loss', 'Val-Loss'])) eval_plot = viz.line( X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 4)).cpu(), opts=dict(xlabel='Iteration', ylabel='Accuracy', title='Train & Val Accuracies', legend=['trainTop3', 'valTop3', 'trainTop1', 'valTop1'])) ## load dataloading configs input_size, means, stds = get_mean_size(args.arch) normalize = transforms.Normalize(mean=means, std=stds) # Data loading transform based on model type transform = transforms.Compose([transforms.ToTensor(), normalize]) val_transform = transforms.Compose([ transforms.Scale(int(input_size * 1.1)), transforms.CenterCrop(int(input_size)), transforms.ToTensor(), normalize, ]) if args.arch.find('vgg') > -1: transform = BaseTransform(size=input_size, mean=means) val_transform = transform print('\n\ntransforms are going to be VGG type\n\n') train_dataset = KINETICS(args.root, args.input, transform, netname=args.arch, subsets=['train'], scale_size=int(input_size * 1.1), input_size=int(input_size), exp_name=exp_name, frame_step=args.frame_step, seq_len=args.seq_len, gap=args.gap) args.num_classes = train_dataset.num_classes print('Models will be cached in ', args.model_save_dir) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_dataset = KINETICS(args.root, args.input, val_transform, netname=args.arch, subsets=['val'], exp_name=exp_name, scale_size=int(input_size * 1.1), input_size=int(input_size), frame_step=args.frame_step * 6, seq_len=args.seq_len, gap=args.gap) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) model, criterion = initialise_model(args) parameter_dict = dict(model.named_parameters()) params = [] for name, param in parameter_dict.items(): if name.find('bias') > -1: params += [{ 'params': [param], 'lr': args.lr * 2, 'weight_decay': 0 }] else: params += [{ 'params': [param], 'lr': args.lr, 'weight_decay': args.weight_decay }] optimizer = torch.optim.SGD(params, args.lr, momentum=args.momentum) if args.resume: latest_file_name = '{:s}/latest.pth'.format(args.model_save_dir) latest_dict = torch.load(latest_file_name) args.start_iteration = latest_dict['iteration'] + 1 model.load_state_dict(torch.load(latest_dict['model_file_name'])) optimizer.load_state_dict( torch.load(latest_dict['optimizer_file_name'])) log_fid = open(args.model_save_dir + '/training.log', 'a') else: log_fid = open(args.model_save_dir + '/training.log', 'w') log_fid.write(args.exp_name + '\n') for arg in vars(args): print(arg, getattr(args, arg)) log_fid.write(str(arg) + ': ' + str(getattr(args, arg)) + '\n') log_fid.write(str(model)) best_top1 = 0.0 val_loss = 0.0 val_top1 = 0.0 val_top3 = 0.0 batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top3 = AverageMeter() iteration = args.start_iteration approx_epochs = np.ceil( float(args.max_iterations - iteration) / len(train_loader)) print( 'Approx Epochs to RUN: {}, Start Ietration {} Max iterations {} # of samples in dataset {}' .format(approx_epochs, iteration, args.max_iterations, len(train_loader))) epoch = -1 scheduler = MultiStepLR(optimizer, milestones=args.stepvalues, gamma=args.gamma) model.train() torch.cuda.synchronize() start = time.perf_counter() while iteration < args.max_iterations: epoch += 1 for i, (batch, targets, __, __) in enumerate(train_loader): if i < len(train_loader) - 2: if iteration > args.max_iterations: break iteration += 1 #pdb.set_trace() #print('input size ',batch.size()) targets = targets.cuda(async=True) input_var = torch.autograd.Variable(batch.cuda(async=True)) target_var = torch.autograd.Variable(targets) torch.cuda.synchronize() data_time.update(time.perf_counter() - start) # compute output output = model(input_var) loss = criterion(output, target_var) #pdb.set_trace() # measure accuracy and record loss prec1, prec3 = accuracy(output.data, targets, topk=(1, 3)) losses.update(loss.data[0], batch.size(0)) top1.update(prec1[0], batch.size(0)) top3.update(prec3[0], batch.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # measure elapsed time torch.cuda.synchronize() batch_time.update(time.perf_counter() - start) start = time.perf_counter() if iteration % args.print_freq == 0: line = 'Epoch: [{0}][{1}/{2}] Time {batch_time.val:.3f} ({batch_time.avg:.3f}) Data {data_time.val:.3f} ({data_time.avg:.3f})'.format( epoch, iteration, len(train_loader), batch_time=batch_time, data_time=data_time) line += 'Loss {loss.val:.4f} ({loss.avg:.4f}) Prec@1 {top1.val:.3f} ({top1.avg:.3f}) Prec@3 {top3.val:.3f} ({top3.avg:.3f})'.format( loss=losses, top1=top1, top3=top3) print(line) log_fid.write(line + '\n') avgtop1 = top1.avg avgtop3 = top3.avg avgloss = losses.avg if (iteration % val_step == 0 or iteration in val_steps) and iteration > 0: # evaluate on validation set val_top1, val_top3, val_loss = validate( args, val_loader, model, criterion) line = '\n\nValidation @ {:d}: Top1 {:.2f} Top3 {:.2f} Loss {:.3f}\n\n'.format( iteration, val_top1, val_top3, val_loss) print(line) log_fid.write(line) # remember best prec@1 and save checkpoint is_best = val_top1 > best_top1 best_top1 = max(val_top1, best_top1) torch.cuda.synchronize() line = '\nBest Top1 sofar {:.3f} current top1 {:.3f} Time taken for Validation {:0.3f}\n\n'.format( best_top1, val_top1, time.perf_counter() - start) log_fid.write(line + '\n') print(line) save_checkpoint( { 'epoch': epoch, 'iteration': iteration, 'arch': args.arch, 'val_top1': val_top1, 'val_top3': val_top3, 'val_loss': val_loss, 'train_top1': avgtop1, 'train_top3': avgtop3, 'train_loss': avgloss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, args.model_save_dir) if args.visdom: viz.line(X=torch.ones((1, 2)).cpu() * iteration, Y=torch.Tensor([avgloss, val_loss]).unsqueeze(0).cpu(), win=loss_plot, update='append') viz.line(X=torch.ones((1, 4)).cpu() * iteration, Y=torch.Tensor( [avgtop3, val_top3, avgtop1, val_top1]).unsqueeze(0).cpu(), win=eval_plot, update='append') model.train() if iteration % train_step == 0 and iteration > 0: if args.visdom: viz.line(X=torch.ones((1, 2)).cpu() * iteration, Y=torch.Tensor([avgloss, val_loss]).unsqueeze(0).cpu(), win=loss_plot, update='append') viz.line(X=torch.ones((1, 4)).cpu() * iteration, Y=torch.Tensor( [avgtop3, val_top3, avgtop1, val_top1]).unsqueeze(0).cpu(), win=eval_plot, update='append') top1.reset() top3.reset() losses.reset() print('RESET::=> ', args.exp_name)