def get_transform(mode, args): seq_len = args.seq_len * 2 # for both rgb and flow null_transform = transforms.Compose([ A.RandomSizedCrop(size=args.img_dim, consistent=False, seq_len=seq_len, bottom_area=0.2), A.RandomHorizontalFlip(consistent=False, seq_len=seq_len), A.ToTensor(), ]) base_transform = transforms.Compose([ A.RandomSizedCrop(size=args.img_dim, consistent=False, seq_len=seq_len, bottom_area=0.2), transforms.RandomApply([ A.ColorJitter(0.4, 0.4, 0.4, 0.1, p=1.0, consistent=False, seq_len=seq_len) ], p=0.8), A.RandomGray(p=0.2, seq_len=seq_len), transforms.RandomApply([A.GaussianBlur([.1, 2.], seq_len=seq_len)], p=0.5), A.RandomHorizontalFlip(consistent=False, seq_len=seq_len), A.ToTensor(), ]) # oneclip: temporally take one clip, random augment twice # twoclip: temporally take two clips, random augment for each # merge oneclip & twoclip transforms with 50%/50% probability transform = A.TransformController( [A.TwoClipTransform(base_transform, null_transform, seq_len=seq_len, p=0.3), A.OneClipTransform(base_transform, null_transform, seq_len=seq_len)], weights=[0.5,0.5]) print(transform) return transform
def get_transform(mode, args): seq_len = args.seq_len * 2 # for both rgb and flow null_transform = transforms.Compose([ A.RandomSizedCrop(size=args.img_dim, consistent=False, seq_len=seq_len, bottom_area=0.2), A.RandomHorizontalFlip(consistent=False, seq_len=seq_len), A.ToTensor(), ]) base_transform = transforms.Compose([ A.RandomSizedCrop(size=args.img_dim, consistent=False, seq_len=seq_len, bottom_area=0.2), transforms.RandomApply([ A.ColorJitter( 0.4, 0.4, 0.4, 0.1, p=1.0, consistent=False, seq_len=seq_len) ], p=0.8), A.RandomGray(p=0.2, seq_len=seq_len), transforms.RandomApply([A.GaussianBlur([.1, 2.], seq_len=seq_len)], p=0.5), A.RandomHorizontalFlip(consistent=False, seq_len=seq_len), A.ToTensor(), ]) transform = A.TransformController([ A.TwoClipTransform( base_transform, null_transform, seq_len=seq_len, p=0.3), A.OneClipTransform(base_transform, null_transform, seq_len=seq_len) ], weights=[0.5, 0.5]) return transform
def get_data(args, mode='train', return_label=False, hierarchical_label=False, action_level_gt=False, num_workers=0, path_dataset='', path_data_info=''): if hierarchical_label and args.dataset not in ['finegym', 'hollywood2']: raise Exception( 'Hierarchical information is only implemented in finegym and hollywood2 datasets' ) if return_label and not action_level_gt and args.dataset != 'finegym': raise Exception( 'subaction only subactions available in finegym dataset') if mode == 'train': transform = transforms.Compose([ augmentation.RandomSizedCrop(size=args.img_dim, consistent=True, p=1.0), augmentation.RandomHorizontalFlip(consistent=True), augmentation.RandomGray(consistent=False, p=0.5), augmentation.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25, p=1.0), augmentation.ToTensor(), augmentation.Normalize() ]) else: transform = transforms.Compose([ augmentation.CenterCrop(size=args.img_dim, consistent=True), augmentation.ToTensor(), augmentation.Normalize() ]) if args.dataset == 'kinetics': dataset = Kinetics600(mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=5, return_label=return_label, return_idx=False, path_dataset=path_dataset, path_data_info=path_data_info) elif args.dataset == 'hollywood2': if return_label: assert action_level_gt, 'hollywood2 does not have subaction labels' dataset = Hollywood2(mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=args.ds, return_label=return_label, hierarchical_label=hierarchical_label, path_dataset=path_dataset, path_data_info=path_data_info) elif args.dataset == 'finegym': if hierarchical_label: assert not action_level_gt, 'finegym does not have hierarchical information at the action level' dataset = FineGym( mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, fps=int(25 / args.ds), # approx return_label=return_label, hierarchical_label=hierarchical_label, action_level_gt=action_level_gt, path_dataset=path_dataset, return_idx=False, path_data_info=path_data_info) elif args.dataset == 'movienet': assert not return_label, 'Not yet implemented (actions not available online)' assert args.seq_len == 3, 'We only have 3 frames per subclip/scene, but always 3' dataset = MovieNet(mode=mode, transform=transform, num_seq=args.num_seq, path_dataset=path_dataset, path_data_info=path_data_info) else: raise ValueError('dataset not supported') sampler = data.RandomSampler( dataset) if mode == 'train' else data.SequentialSampler(dataset) data_loader = data.DataLoader( dataset, batch_size=args.batch_size, sampler=sampler, shuffle=False, num_workers=num_workers, pin_memory=True, drop_last=(mode != 'test' ) # test always same examples independently of batch size ) return data_loader
def test_10crop(dataset, model, criterion, transforms_cuda, device, epoch, args): prob_dict = {} model.eval() # aug_list: 1,2,3,4,5 = topleft, topright, bottomleft, bottomright, center # flip_list: 0,1 = raw, flip if args.center_crop: print('Test using center crop') args.logger.log('Test using center_crop\n') aug_list = [5] flip_list = [0] title = 'center' if args.five_crop: print('Test using 5 crop') args.logger.log('Test using 5_crop\n') aug_list = [5, 1, 2, 3, 4] flip_list = [0] title = 'five' if args.ten_crop: print('Test using 10 crop') args.logger.log('Test using 10_crop\n') aug_list = [5, 1, 2, 3, 4] flip_list = [0, 1] title = 'ten' def tr(x): B = x.size(0) assert B == 1 num_test_sample = x.size(2) // (args.seq_len * args.num_seq) return transforms_cuda(x)\ .view(3,num_test_sample,args.num_seq,args.seq_len,args.img_dim,args.img_dim).permute(1,2,0,3,4,5) with torch.no_grad(): end = time.time() # for loop through 10 types of augmentations, then average the probability for flip_idx in flip_list: for aug_idx in aug_list: print('Aug type: %d; flip: %d' % (aug_idx, flip_idx)) if flip_idx == 0: transform = transforms.Compose([ A.RandomHorizontalFlip(command='left'), A.FiveCrop(size=(224, 224), where=aug_idx), A.Scale(size=(args.img_dim, args.img_dim)), A.ColorJitter(0.2, 0.2, 0.2, 0.1, p=0.3, consistent=True), A.ToTensor() ]) else: transform = transforms.Compose([ A.RandomHorizontalFlip(command='right'), A.FiveCrop(size=(224, 224), where=aug_idx), A.Scale(size=(args.img_dim, args.img_dim)), A.ColorJitter(0.2, 0.2, 0.2, 0.1, p=0.3, consistent=True), A.ToTensor() ]) dataset.transform = transform dataset.return_path = True dataset.return_label = True test_sampler = data.Sequential(dataset) data_loader = data.DataLoader(dataset, batch_size=1, sampler=test_sampler, shuffle=False, num_workers=args.workers, pin_memory=True) for idx, (input_seq, _) in tqdm(enumerate(data_loader), total=len(data_loader)): input_seq = tr(input_seq.to(device, non_blocking=True)) logit, _ = model(input_seq) # average probability along the temporal window prob_mean = F.softmax(logit, dim=-1).mean(0, keepdim=True) vname = vname[0] if vname not in prob_dict.keys(): prob_dict[vname] = {'mean_prob': [], 'last_prob': []} prob_dict[vname]['mean_prob'].append(prob_mean) prob_dict[vname]['last_prob'].append(prob_last) if (title == 'ten') and (flip_idx == 0) and (aug_idx == 5): print('center-crop result:') acc_1 = summarize_probability( prob_dict, data_loader.dataset.encode_action, 'center') args.logger.log('center-crop:') args.logger.log( 'test Epoch: [{0}]\t' 'Mean: Acc@1: {acc[0].avg:.4f} Acc@5: {acc[1].avg:.4f}' .format(epoch, acc=acc_1)) if (title == 'ten') and (flip_idx == 0): print('five-crop result:') acc_5 = summarize_probability( prob_dict, data_loader.dataset.encode_action, 'five') args.logger.log('five-crop:') args.logger.log( 'test Epoch: [{0}]\t' 'Mean: Acc@1: {acc[0].avg:.4f} Acc@5: {acc[1].avg:.4f}'. format(epoch, acc=acc_5)) print('%s-crop result:' % title) acc_final = summarize_probability(prob_dict, data_loader.dataset.encode_action, 'ten') args.logger.log('%s-crop:' % title) args.logger.log( 'test Epoch: [{0}]\t' 'Mean: Acc@1: {acc[0].avg:.4f} Acc@5: {acc[1].avg:.4f}'.format( epoch, acc=acc_final)) sys.exit(0)
def main(args): torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) device = torch.device('cuda') num_gpu = len(str(args.gpu).split(',')) args.batch_size = num_gpu * args.batch_size ### model ### if args.model == 'memdpc': model = MemDPC_BD(sample_size=args.img_dim, num_seq=args.num_seq, seq_len=args.seq_len, network=args.net, pred_step=args.pred_step, mem_size=args.mem_size) else: raise NotImplementedError('wrong model!') model.to(device) model = nn.DataParallel(model) model_without_dp = model.module ### optimizer ### params = model.parameters() optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.wd) criterion = nn.CrossEntropyLoss() ### data ### transform = transforms.Compose([ A.RandomSizedCrop(size=224, consistent=True, p=1.0), # crop from 256 to 224 A.Scale(size=(args.img_dim, args.img_dim)), A.RandomHorizontalFlip(consistent=True), A.RandomGray(consistent=False, p=0.25), A.ColorJitter(0.5, 0.5, 0.5, 0.25, consistent=False, p=1.0), A.ToTensor(), A.Normalize() ]) train_loader = get_data(transform, 'train') val_loader = get_data(transform, 'val') if 'ucf' in args.dataset: lr_milestones_eps = [300, 400] elif 'k400' in args.dataset: lr_milestones_eps = [120, 160] else: lr_milestones_eps = [1000] # NEVER lr_milestones = [len(train_loader) * m for m in lr_milestones_eps] print('=> Use lr_scheduler: %s eps == %s iters' % (str(lr_milestones_eps), str(lr_milestones))) lr_lambda = lambda ep: MultiStepLR_Restart_Multiplier( ep, gamma=0.1, step=lr_milestones, repeat=1) lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda) best_acc = 0 args.iteration = 1 ### restart training ### if args.resume: if os.path.isfile(args.resume): print("=> loading resumed checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) args.start_epoch = checkpoint['epoch'] args.iteration = checkpoint['iteration'] best_acc = checkpoint['best_acc'] model_without_dp.load_state_dict(checkpoint['state_dict']) try: optimizer.load_state_dict(checkpoint['optimizer']) except: print('[WARNING] Not loading optimizer states') print("=> loaded resumed checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("[Warning] no checkpoint found at '{}'".format(args.resume)) sys.exit(0) # logging tools args.img_path, args.model_path = set_path(args) args.logger = Logger(path=args.img_path) args.logger.log('args=\n\t\t' + '\n\t\t'.join( ['%s:%s' % (str(k), str(v)) for k, v in vars(args).items()])) args.writer_val = SummaryWriter(logdir=os.path.join(args.img_path, 'val')) args.writer_train = SummaryWriter( logdir=os.path.join(args.img_path, 'train')) torch.backends.cudnn.benchmark = True ### main loop ### for epoch in range(args.start_epoch, args.epochs): np.random.seed(epoch) random.seed(epoch) train_loss, train_acc = train_one_epoch(train_loader, model, criterion, optimizer, lr_scheduler, device, epoch, args) val_loss, val_acc = validate(val_loader, model, criterion, device, epoch, args) # save check_point is_best = val_acc > best_acc best_acc = max(val_acc, best_acc) save_dict = { 'epoch': epoch, 'state_dict': model_without_dp.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'iteration': args.iteration } save_checkpoint(save_dict, is_best, filename=os.path.join(args.model_path, 'epoch%s.pth.tar' % str(epoch)), keep_all=False) print('Training from ep %d to ep %d finished' % (args.start_epoch, args.epochs)) sys.exit(0)
def main(args): torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) device = torch.device('cuda') num_gpu = len(str(args.gpu).split(',')) args.batch_size = num_gpu * args.batch_size if args.dataset == 'ucf101': args.num_class = 101 elif args.dataset == 'hmdb51': args.num_class = 51 ### classifier model ### if args.model == 'lc': model = LC(sample_size=args.img_dim, num_seq=args.num_seq, seq_len=args.seq_len, network=args.net, num_class=args.num_class, dropout=args.dropout, train_what=args.train_what) else: raise ValueError('wrong model!') model.to(device) model = nn.DataParallel(model) model_without_dp = model.module criterion = nn.CrossEntropyLoss() ### optimizer ### params = None if args.train_what == 'ft': print('=> finetune backbone with smaller lr') params = [] for name, param in model.module.named_parameters(): if ('resnet' in name) or ('rnn' in name): params.append({'params': param, 'lr': args.lr / 10}) else: params.append({'params': param}) elif args.train_what == 'last': print('=> train only last layer') params = [] for name, param in model.named_parameters(): if ('bone' in name) or ('agg' in name) or ('mb' in name) or ( 'network_pred' in name): param.requires_grad = False else: params.append({'params': param}) else: pass # train all layers print('\n===========Check Grad============') for name, param in model.named_parameters(): print(name, param.requires_grad) print('=================================\n') if params is None: params = model.parameters() optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.wd) ### scheduler ### if args.dataset == 'hmdb51': step = args.schedule if step == []: step = [150, 250] lr_lambda = lambda ep: MultiStepLR_Restart_Multiplier( ep, gamma=0.1, step=step, repeat=1) elif args.dataset == 'ucf101': step = args.schedule if step == []: step = [300, 400] lr_lambda = lambda ep: MultiStepLR_Restart_Multiplier( ep, gamma=0.1, step=step, repeat=1) lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda) print('=> Using scheduler at {} epochs'.format(step)) args.old_lr = None best_acc = 0 args.iteration = 1 ### if in test mode ### if args.test: if os.path.isfile(args.test): print("=> loading test checkpoint '{}'".format(args.test)) checkpoint = torch.load(args.test, map_location=torch.device('cpu')) try: model_without_dp.load_state_dict(checkpoint['state_dict']) except: print( '=> [Warning]: weight structure is not equal to test model; Load anyway ==' ) model_without_dp = neq_load_customized( model_without_dp, checkpoint['state_dict']) epoch = checkpoint['epoch'] print("=> loaded testing checkpoint '{}' (epoch {})".format( args.test, checkpoint['epoch'])) elif args.test == 'random': epoch = 0 print("=> loaded random weights") else: print("=> no checkpoint found at '{}'".format(args.test)) sys.exit(0) args.logger = Logger(path=os.path.dirname(args.test)) _, test_dataset = get_data(None, 'test') test_loss, test_acc = test(test_dataset, model, criterion, device, epoch, args) sys.exit() ### restart training ### if args.resume: if os.path.isfile(args.resume): print("=> loading resumed checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) args.start_epoch = checkpoint['epoch'] args.iteration = checkpoint['iteration'] best_acc = checkpoint['best_acc'] model_without_dp.load_state_dict(checkpoint['state_dict']) try: optimizer.load_state_dict(checkpoint['optimizer']) except: print('[WARNING] Not loading optimizer states') print("=> loaded resumed checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) sys.exit(0) if (not args.resume) and args.pretrain: if args.pretrain == 'random': print('=> using random weights') elif os.path.isfile(args.pretrain): print("=> loading pretrained checkpoint '{}'".format( args.pretrain)) checkpoint = torch.load(args.pretrain, map_location=torch.device('cpu')) model_without_dp = neq_load_customized(model_without_dp, checkpoint['state_dict']) print("=> loaded pretrained checkpoint '{}' (epoch {})".format( args.pretrain, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.pretrain)) sys.exit(0) ### data ### transform = transforms.Compose([ A.RandomSizedCrop(consistent=True, size=224, p=1.0), A.Scale(size=(args.img_dim, args.img_dim)), A.RandomHorizontalFlip(consistent=True), A.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25, p=0.3, consistent=True), A.ToTensor(), A.Normalize() ]) val_transform = transforms.Compose([ A.RandomSizedCrop(consistent=True, size=224, p=0.3), A.Scale(size=(args.img_dim, args.img_dim)), A.RandomHorizontalFlip(consistent=True), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.3, consistent=True), A.ToTensor(), A.Normalize() ]) train_loader, _ = get_data(transform, 'train') val_loader, _ = get_data(val_transform, 'val') # setup tools args.img_path, args.model_path = set_path(args) args.writer_val = SummaryWriter(logdir=os.path.join(args.img_path, 'val')) args.writer_train = SummaryWriter( logdir=os.path.join(args.img_path, 'train')) torch.backends.cudnn.benchmark = True ### main loop ### for epoch in range(args.start_epoch, args.epochs): train_loss, train_acc = train_one_epoch(train_loader, model, criterion, optimizer, device, epoch, args) val_loss, val_acc = validate(val_loader, model, criterion, device, epoch, args) lr_scheduler.step(epoch) # save check_point is_best = val_acc > best_acc best_acc = max(val_acc, best_acc) save_dict = { 'epoch': epoch, 'backbone': args.net, 'state_dict': model_without_dp.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'iteration': args.iteration } save_checkpoint(save_dict, is_best, filename=os.path.join(args.model_path, 'epoch%s.pth.tar' % str(epoch)), keep_all=False) print('Training from ep %d to ep %d finished' % (args.start_epoch, args.epochs)) sys.exit(0)
def test(dataset, model, criterion, device, epoch, args): # 10-crop then average the probability prob_dict = {} model.eval() # aug_list: 1,2,3,4,5 = top-left, top-right, bottom-left, bottom-right, center # flip_list: 0,1 = original, horizontal-flip if args.center_crop: print('Test using center crop') args.logger.log('Test using center_crop\n') aug_list = [5] flip_list = [0] title = 'center' if args.five_crop: print('Test using 5 crop') args.logger.log('Test using 5_crop\n') aug_list = [5, 1, 2, 3, 4] flip_list = [0] title = 'five' if args.ten_crop: print('Test using 10 crop') args.logger.log('Test using 10_crop\n') aug_list = [5, 1, 2, 3, 4] flip_list = [0, 1] title = 'ten' with torch.no_grad(): end = time.time() for flip_idx in flip_list: for aug_idx in aug_list: print('Aug type: %d; flip: %d' % (aug_idx, flip_idx)) if flip_idx == 0: transform = transforms.Compose([ A.RandomHorizontalFlip(command='left'), A.FiveCrop(size=(224, 224), where=aug_idx), A.Scale(size=(args.img_dim, args.img_dim)), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.3, consistent=True), A.ToTensor(), ]) else: transform = transforms.Compose([ A.RandomHorizontalFlip(command='right'), A.FiveCrop(size=(224, 224), where=aug_idx), A.Scale(size=(args.img_dim, args.img_dim)), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.3, consistent=True), A.ToTensor(), ]) dataset.transform = transform dataset.return_path = True dataset.return_label = True data_sampler = data.RandomSampler(dataset) data_loader = data.DataLoader(dataset, batch_size=1, sampler=data_sampler, shuffle=False, num_workers=16, pin_memory=True) for idx, (input_seq, target) in tqdm(enumerate(data_loader), total=len(data_loader)): B = 1 input_seq = input_seq.to(device) target, vname = target target = target.to(device) input_seq = input_seq.squeeze( 0) # squeeze the '1' batch dim output, _ = model(input_seq) prob_mean = nn.functional.softmax(output, 2).mean(1).mean( 0, keepdim=True) vname = vname[0] if vname not in prob_dict.keys(): prob_dict[vname] = [] prob_dict[vname].append(prob_mean) # show intermediate result if (title == 'ten') and (flip_idx == 0) and (aug_idx == 5): print('center-crop result:') acc_1 = summarize_probability( prob_dict, data_loader.dataset.encode_action, 'center') args.logger.log('center-crop:') args.logger.log( 'test Epoch: [{0}]\t' 'Mean: Acc@1: {acc[0].avg:.4f} Acc@5: {acc[1].avg:.4f}' .format(epoch, acc=acc_1)) # show intermediate result if (title == 'ten') and (flip_idx == 0): print('five-crop result:') acc_5 = summarize_probability( prob_dict, data_loader.dataset.encode_action, 'five') args.logger.log('five-crop:') args.logger.log( 'test Epoch: [{0}]\t' 'Mean: Acc@1: {acc[0].avg:.4f} Acc@5: {acc[1].avg:.4f}'. format(epoch, acc=acc_5)) # show final result print('%s-crop result:' % title) acc_final = summarize_probability(prob_dict, data_loader.dataset.encode_action, 'ten') args.logger.log('%s-crop:' % title) args.logger.log( 'test Epoch: [{0}]\t' 'Mean: Acc@1: {acc[0].avg:.4f} Acc@5: {acc[1].avg:.4f}'.format( epoch, acc=acc_final)) sys.exit(0)