def main(): args = parser.parse_args() splitanno = sio.loadmat('./dataanno/anno.mat') # labelmap = sio.loadmat('./dataanno/setid.mat') trainid = splitanno['trnid'][0].tolist() valid = splitanno['valid'][0].tolist() testid = splitanno['tstid'][0].tolist() labellist = splitanno['labels'][0].tolist() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = FlowerDataSet(indexlist = trainid, labellist = labellist, transform=torchvision.transforms.Compose([ torchvision.transforms.Compose([GroupMultiScaleCrop(224, [1, .875, .75, .66]), GroupRandomHorizontalFlip(is_flow=False)]), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) for i, (input, target) in enumerate(train_loader): print input.size()
def forward(self, input): global args args = parser.parse_args() b = args.batch_size sample_len = (3 if self.modality == "RGB" else 1) * self.new_length if self.modality == 'RGBDiff': sample_len = 3 * self.new_length input = self._get_diff(input) base_out = self.base_model(input.view((-1, sample_len) + input.size()[-2:])) ''' y = base_out.view((-1, 25, base_out.size()[-1])) pool = nn.MaxPool2d(kernel_size=(25, 1), stride=(25, 1)) y = pool(y) ''' y = torch.max(base_out[:25], 0).values.unsqueeze(0) for i in range(25, len(base_out), 25): y = torch.cat([y, torch.max(base_out[i: min(i+25, len(base_out))], 0).values.unsqueeze(0)], 0) base_out = y if self.dropout > 0: base_out = self.new_fc(base_out) base_out = self.softmax(base_out) return base_out.squeeze(1)
def main(): global arg arg = parser.parse_args() print(arg) categories, train_list, val_list, root_path, prefix = return_moments() num_class = len(categories) assert (num_class == arg.num_classes) dataloader = spatial_dataloader( BATCH_SIZE=arg.batch_size, num_workers=8, path= '/media/lili/fce9875a-a5c8-4c35-8f60-db60be29ea5d/Moments_in_Time_Raw/', train_list='./img_list/new_moments_train_list.txt', test_list='./img_list/new_moments_validation_list.txt') train_loader, val_loader, test_video = dataloader.run() model = Spatial_CNN(nb_epochs=arg.epochs, lr=arg.lr, batch_size=arg.batch_size, resume=arg.resume, start_epoch=arg.start_epoch, evaluate=arg.evaluate, train_loader=train_loader, test_loader=val_loader, test_video=test_video) #Training model.run()
def main(): global arg arg = parser.parse_args() print(arg) # Prepare DataLoader data_loader = spatial_dataloader.spatial_dataloader( BATCH_SIZE=arg.batch_size, # 批次 num_workers=8, # 定义8个子进程加载数据 path=opt.spatial_train_data_root, ucf_list=opt.ucf_list, ucf_split=opt.ucf_split, ) train_loader, test_loader, test_video = data_loader.run() # test_loader: 71877, 数据加载时的测试集合, 类型: {DataLoader} batch_size:25, # train_loader: 9537 类型: {DataLoader} # test_video: 类型 {dict} 长度3783, 如: {'Unxxxx_g04_c02' : 96} # 得到训练集合等 # Model model = Spatial_CNN( nb_epochs=arg.epochs, lr=arg.lr, batch_size=arg.batch_size, resume=arg.resume, start_epoch=arg.start_epoch, evaluate=arg.evaluate, train_loader=train_loader, test_loader=test_loader, test_video=test_video ) # Training model.run()
def main(): global args args = parser.parse_args() check_rootfolders() if not args.new_length:# none data_length= 1 if args.modality== 'RGB' else 5 else : data_length= args.new_length trnmodel= TRN( dataset = args.dataset, num_segments = args.num_segments, modality = args.modality, new_length=data_length, # lr = args.lr, loss_type = args.loss_type, # default="nll", weight_decay = args.weight_decay, # default=5e-4, lr_steps = args.lr_steps, # default=[50, 100], momentum= args.momentum, # default=0.9, gpus = args.gpus, clip_gradient = args.clip_gradient, base_model= args.arch, #"resnet50", dropout= args.dropout, #0.7, img_feature_dim=args.img_feature_dim ,#default 256, partial_bn= not args.no_partialbn ,# default=False consensus_type= args.consensus_type, #'TRN', # MTRN batch_size= args.batch_size,# default=64 workers= args.workers, #default=2 resume = args.resume , # pretained model epochs= args.epochs, start_epoch = args.start_epoch, # ifprintmodel= args.print_model in [1, 'True'], # default =1 print_freq =1, eval_freq =1 ) #---------if evaluation: ---------------------------------------------------- print('evalutate=',args.evaluate) if str(args.evaluate).lower()=='true' or args.evaluate=='1': logits= trnmodel(args.test_pickle) logits= np.array(logits) print('output size: ',logits.shape) with np.printoptions(threshold=np.inf): print(logits) else: trnmodel.do_training( ifprint= args.print_training_in_terminal)
def main(): global args, web_path, best_prec1 best_prec1 = 0 args = parser.parse_args() network_config = Foo( encoder=args.encoder, decoder=args.decoder, fc_dim=args.fc_dim, num_views=args.n_views, num_class=94, transform_type=args.transform_type, output_size=args.label_resolution, ) val_dataset = Seq_OVMDataset(args.test_dir, pix_file=args.pix_file, transform=torchvision.transforms.Compose([ Stack(roll=True), ToTorchFormatTensor(div=True), GroupNormalize(mean_rgb, std_rgb) ]), n_views=network_config.num_views, resolution=args.input_resolution, label_res=args.label_resolution, use_mask=args.use_mask, is_train=False) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, pin_memory=True ) mapper = VPNModel(network_config) mapper = nn.DataParallel(mapper.cuda()) if args.weights: if os.path.isfile(args.weights): print(("=> loading checkpoint '{}'".format(args.weights))) checkpoint = torch.load(args.weights) args.start_epoch = checkpoint['epoch'] mapper.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.weights))) web_path = os.path.join(args.visualize, args.store_name) criterion = nn.NLLLoss(weight=None, size_average=True) eval(val_loader, mapper, criterion, web_path) web_path = os.path.join(args.visualize, args.store_name)
def main(): args = parser.parse_args() args = prepare(args) if args.wandb: wandb.init(project=args.project_name) wandb.run.name = args.subproject_name wandb.config.update(args) train_loader, index_loader, val_loader, test_loader = dataset_manager(args) model, optimizer, scheduler, criterion, hash_center = model_manager(args) if args.wandb: wandb.watch(model) if args.train: train(args, train_loader, index_loader, val_loader, model, optimizer, scheduler, criterion, hash_center) if args.test: mAP = evaluation(args, index_loader, test_loader, model, T=0) print("mAP : {:.3f}".format(mAP))
def main(): global args args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.available_gpus args.consensus_type = 'avg' args.pretrain = 'imagenet' args.tune_from = None args.img_feature_dim = 256 args.loss_type ='nll' args.evaluate = False if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
batch_size=args.batch_size, shuffle=False, num_workers=args.workers) model = ActionModifiers(test_set, args) if args.gpu: model = model.cuda() #TODO implement gpu option properly everywhere evaluator = Evaluator(test_set, model) checkpoint = torch.load(args.load) model.load_state_dict(checkpoint['net']) print('loaded model from', os.path.basename(args.load)) v2a_ant, v2a_all, a2v_ant, a2v_all, v2action = test( model, test_loader, test_set, evaluator) print('Video-to-Adverb Antonym: %.3f' % v2a_ant) print('Video-to-Adverb All: %.3f' % v2a_all) print('Adverb-to-Video Antonym: %.3f' % a2v_ant) print('Adverb-to-Video All: %.3f' % a2v_all) print('Video-to-Action: %.3f' % v2action) if __name__ == '__main__': args = parser.parse_args() args.batch_size = 1 if args.modality == 'both': args.modality = ['rgb', 'flow'] else: args.modality = [args.modality] main(args)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() if not args.test: categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) else: categories, args.test_list, args.root_path, prefix = datasets_video.return_dataset( 'SHGDTuples', args.modality) num_class = len(categories) args.store_name = '_'.join([ args.dataset, args.modality, args.arch, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = MFF(num_class, args.num_segments, args.modality, base_model=args.arch, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.test, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) if args.pretrained: if args.arch == 'squeezenet1_1': name = 'module.base_model.0.weight' else: name = 'module.base_model.features.0.0.weight' if os.path.isfile(args.pretrained): pretrained_dict = torch.load(args.pretrained) pretrained_state_dict = pretrained_dict['state_dict'] #for name, param in pretrained_state_dict.items(): #if 'base_model' in name: #print(name) pretrained_state_dict = { k: v for k, v in pretrained_state_dict.items() if 'module.consensus.classifier.3.' not in k } model_dict = model.state_dict() weight_conv_t = pretrained_state_dict[name] else: print(("=> no pretrained model checkpoint found at '{}'".format( args.pretrained))) if args.modality == 'IRD': # make the first conv from 3 chann to 2 chann (average the sum of 3 chann) weight_conv_t = weight_conv_t.sum(1) weight_conv_t = weight_conv_t.unsqueeze(1) weight_conv_t = weight_conv_t.mean(1) weight_conv_t = torch.stack((weight_conv_t, weight_conv_t), 1) pretrained_state_dict[name] = weight_conv_t model_dict.update(pretrained_state_dict) print("Converted the first conv layer to 2 channels.") if args.modality == 'IR' or args.modality == 'D': # make the first conv from 3 chann to 1 chann (average the sum of 3 chann) weight_conv_t = weight_conv_t.sum(1) weight_conv_t = weight_conv_t.unsqueeze(1) weight_conv_t = weight_conv_t.mean(1) weight_conv_t = weight_conv_t.unsqueeze(1) pretrained_state_dict[name] = weight_conv_t model_dict.update(pretrained_state_dict) print("Converted the first conv layer to 1 channel.") model.load_state_dict(model_dict) print("=> loaded pretrained model checkpoint '{}'".format( args.pretrained)) print(model) ## to print the number of trainable paramters in the network model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params_num = sum([np.prod(p.size()) for p in model_parameters]) print('Total number of parameters:' + str(params_num)) cudnn.benchmark = True # Data loading code normalize = GroupNormalize(input_mean, input_std) if args.dataset == 'SHGD': from SHGD import DataSet if args.dataset == 'jester': from Jester import DataSet if not args.test: train_loader = torch.utils.data.DataLoader(DataSet( args.root_path, args.train_list, num_segments=args.num_segments, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(), ToTorchFormatTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(DataSet( args.root_path, args.val_list, num_segments=args.num_segments, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(), ToTorchFormatTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) else: test_loader = torch.utils.data.DataLoader(DataSet( args.root_path, args.test_list, num_segments=args.num_segments, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, test_mode=True, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(), ToTorchFormatTensor(), normalize, ])), batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=False) if args.test: if not args.resume: print('Please give a path to a trained model for testing.') sys.exit() else: test(args.start_epoch, test_loader, model, args) return if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() elif args.loss_type == 'nll' and num_class == 13: # give the "No gesture/Hand up/Hand down less weight than the other classes. No:4420 Hand up:2280 Hand Down:2190 Others:228 weights = [1, 1, 1, 1 / 10, 1 / 10, 1 / 20, 1, 1, 1, 1, 1, 1, 1] class_weights = torch.Tensor(weights).cuda() criterion = torch.nn.CrossEntropyLoss(weight=class_weights) else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, num_class) #remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_acc args = parser.parse_args() writer = LogWriter(args.log) # writer = None cfg = parse_config('config.txt') print_configs(cfg, 'TRAIN') main_program = fluid.default_main_program() start_program = fluid.default_startup_program() place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() with fluid.program_guard(main_program, start_program): # data placeholder input = fluid.data(name='data', shape=[-1, 3, 224, 224], dtype='float32') label = fluid.data(name='label', shape=[-1, 1], dtype='int64') print(f'label shape:{label.shape}') model = ECOfull(input, num_segments=args.num_segments) net_out = model() cost = fluid.layers.softmax_with_cross_entropy(net_out, label) avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(net_out, label) # test program eval_program = main_program.clone(for_test=True) # optimizer fluid.optimizer.SGD(args.lr).minimize(avg_cost) #print(main_program.all_parameters()) reader = KineticsReader('eco', 'train', cfg).create_reader() feeder = fluid.DataFeeder([input, label], place) # 验证集 val_reader = KineticsReader('eco', 'valid', cfg).create_reader() # 初始化参数 exe = fluid.Executor(place=place) exe.run(start_program) train_exe = fluid.Executor(place=place) if 0: # fluid.io.load(train_exe, 'models/', filename='eco_full.pdparams') fluid.io.load(main_program, 'models/eco_full_best', train_exe) # # pre-trained else: f = open('program_state_dict.pkl', 'rb') state_dict = pickle.load(f) f.close() fluid.io.set_program_state(main_program, state_dict) step = 0 best_acc = read_best_acc() for i in range(args.epochs): for index, data in enumerate(reader()): avg_cost_, acc_ = train_exe.run( main_program, feed=feeder.feed(data), fetch_list=[avg_cost.name, acc.name]) if (index + 1) % args.print_freq == 0: if not writer is None: writer.add_scalar(tag='train/loss', step=step, value=avg_cost_[0]) writer.add_scalar(tag='train/acc', step=step, value=acc_[0]) print( f'epoch:{i+1} step:{index + 1} avg loss:{avg_cost_[0]} acc:{acc_[0]}' ) step += 1 if (i + 1) % args.eval_freq == 0: fetch_list = [avg_cost.name, acc.name] validate(val_reader, feeder, place, eval_program, fetch_list, epoch=i, writer=writer)
def main(): finetuning = False global args, best_prec1 args = parser.parse_args() check_rootfolders() if args.dataset == 'something-v1': num_class = 174 args.rgb_prefix = '' rgb_read_format = "{:05d}.jpg" elif args.dataset == 'diving48': num_class = 48 args.rgb_prefix = 'frames' rgb_read_format = "{:05d}.jpg" else: raise ValueError('Unknown dataset ' + args.dataset) model_dir = os.path.join('experiments', args.dataset, args.arch, args.consensus_type + '-' + args.modality, str(args.run_iter)) if not args.resume: if os.path.exists(model_dir): print('Dir {} exists!!!'.format(model_dir)) sys.exit() else: os.makedirs(model_dir) os.makedirs(os.path.join(model_dir, args.root_log)) writer = SummaryWriter(model_dir) args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset) if 'something' in args.dataset: # label transformation for left/right categories target_transforms = { 86: 87, 87: 86, 93: 94, 94: 93, 166: 167, 167: 166 } print('Target transformation is enabled....') else: target_transforms = None args.store_name = '_'.join([ args.dataset, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = VideoModel(num_class=num_class, modality=args.modality, num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=target_transforms) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR( optimizer=optimizer, milestones=[args.warmup, args.epochs], warmup_iters=args.warmup, min_ratio=1e-7) if args.resume: for epoch in range(0, args.start_epoch): lr_scheduler_clr.step() if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name), 'a') for epoch in range(args.start_epoch, args.epochs): writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1) train_prec1 = train(train_loader, model, criterion, optimizer, epoch, log_training, writer=writer) lr_scheduler_clr.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, writer=writer, epoch=epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': prec1, 'lr': optimizer.param_groups[-1]['lr'], }, is_best, model_dir) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': train_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, False, model_dir)
def main(): global args, web_path, best_prec1 parser.add_argument('--test-views', type=int, default=94) parser.add_argument('--view-bias', type=int, default=8) best_prec1 = 0 args = parser.parse_args() network_config = Foo( encoder=args.encoder, decoder=args.decoder, fc_dim=args.fc_dim, num_views=args.n_views, num_class=args.num_class, transform_type=args.transform_type, output_size=args.label_resolution, ) val_dataset = OVMDataset(args.data_root, args.eval_list, transform=torchvision.transforms.Compose([ Stack(roll=True), ToTorchFormatTensor(div=True), GroupNormalize(mean_rgb, std_rgb) ]), num_views=network_config.num_views, input_size=args.input_resolution, label_size=args.segSize, use_mask=args.use_mask, use_depth=args.use_depth, is_train=False) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True ) mapper = VPNModel(network_config) mapper = nn.DataParallel(mapper.cuda()) if args.weights: if os.path.isfile(args.weights): print(("=> loading checkpoint '{}'".format(args.weights))) checkpoint = torch.load(args.weights) args.start_epoch = checkpoint['epoch'] mapper.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.weights))) criterion = nn.NLLLoss(weight=None, size_average=True) eval(val_loader, mapper, criterion) web_path = os.path.join(args.visualize, args.store_name) if os.path.isdir(web_path): pass else: os.makedirs(web_path) with dominate.document(title=web_path) as web: for step in range(len(val_loader)): if step % args.print_freq == 0: h2('Step {}'.format(step*args.batch_size)) with table(border = 1, style = 'table-layout: fixed;'): with tr(): for i in range(args.test_views): path = 'Step-{}-{}.png'.format(step * args.batch_size, i) with td(style='word-wrap: break-word;', halign='center', valign='top'): img(style='width:128px', src=path) path = 'Step-{}-pred.png'.format(step * args.batch_size) with td(style='word-wrap: break-word;', halign='center', valign='top'): img(style='width:128px', src=path) path = 'Step-{}-gt.png'.format(step * args.batch_size) with td(style='word-wrap: break-word;', halign='center', valign='top'): img(style='width:128px', src=path) with open(os.path.join(web_path, 'index.html'), 'w') as fp: fp.write(web.render())
def main(): global args, best_prec1 args = parser.parse_args() assert len(args.train_id) > 0 check_rootfolders(args.train_id) summary_w = tf and tf.summary.FileWriter( os.path.join('results', args.train_id, args.root_log)) #tensorboard categories, args.train_list, args.val_list, args.root_path, prefix = return_something_path( args.modality) num_class = len(categories) args.store_name = '_'.join([args.model, args.modality, args.arch]) print('storing name: ' + args.store_name) policies = -1 if args.model == 'TwoStream': model = TwoStream(num_class, args.modality, base_model=args.arch, dropout=args.dropout, crop_num=1, partial_bn=not args.no_partialbn) policies = model.get_optim_policies() elif args.model == 'TSN': model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, dropout=args.dropout, crop_num=1, partial_bn=not args.no_partialbn) policies = model.get_optim_policies() elif args.model == 'C3D': model = C3D() model_dict = model.state_dict() pretrained_dict = torch.load('./model_zoo/c3d.pickle') # 1. filter out unnecessary keys pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } # 2. overwrite entries in the existing state dict model_dict.update(pretrained_dict) # 3. load the new state dict model.load_state_dict(model_dict) print('c3d pretrained model loaded~') else: print('error!') exit() crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 if args.modality == 'RGB' and args.model == 'C3D': data_length = 16 # clip if args.model == 'TwoStream': datasettrain = TwoStreamDataSet( args.root_path, args.train_list, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])) datasetval = TwoStreamDataSet( args.root_path, args.val_list, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])) elif args.model == 'TSN': datasettrain = TSNDataSet( args.root_path, args.train_list, args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])) datasetval = TSNDataSet( args.root_path, args.val_list, args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])) elif args.model == 'C3D': datasettrain = C3DDataSet( args.root_path, args.train_list, 1, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=( args.arch not in ['BNInception', 'InceptionV3', 'C3D'])), normalize, ])) datasetval = C3DDataSet( args.root_path, args.val_list, 1, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=( args.arch not in ['BNInception', 'InceptionV3', 'C3D'])), normalize, ])) trainvidnum = len(datasettrain) valvidnum = len(datasetval) train_loader = torch.utils.data.DataLoader(datasettrain, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasetval, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() if policies != -1: for group in policies: print( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps, args.factor, policies != -1) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, trainvidnum, summary_w) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * trainvidnum, summary_w) # prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), summary_w) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): torch.set_printoptions(precision=6) global args, best_prec1 args = parser.parse_args() #导入参数设置数据集类数量 if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'cad': num_class = 8 else: raise ValueError('Unknown dataset ' + args.dataset) """ #导入模型,输入包含分类的类别数: # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3; # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等; # 采用哪种模型:args.arch,比如resnet101,BNInception等; # 不同输入snippet的融合方式:args.consensus_type,比如avg等; # dropout参数:args.dropout。 """ model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() """ 接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。 args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练, 因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。 其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。 model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。 """ if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 """ 接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型, 一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。 torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装, 这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签, 而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。 对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。 """ train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=3, pin_memory=True) """ 接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。 adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率, 在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。 train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。 最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。 prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。 最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。 模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。 """ # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) ''' optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) ''' # try Adam instead. optimizer = torch.optim.Adam(policies, args.lr) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1, class_to_name parser.add_argument('--class_index', type=str, help='class index file') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'something': num_class = 174 else: raise ValueError('Unknown dataset ' + args.dataset) if args.dataset == 'something': img_prefix = '' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = { idx: line.strip().replace(' ', '-') for idx, line in enumerate(content) } else: img_prefix = 'image_' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \ for line in content} with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'): args.lstm_out_type = None model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, lstm_out_type=args.lstm_out_type, lstm_layers=args.lstm_layers, lstm_hidden_dims=args.lstm_hidden_dims, conv_lstm_kernel=args.conv_lstm_kernel, bi_add_clf=args.bi_add_clf, bi_out_dims=args.bi_out_dims, bi_rank=args.bi_rank, bi_att_softmax=args.bi_att_softmax, bi_filter_size=args.bi_filter_size, bi_dropout=args.bi_dropout, bi_conv_dropout=args.bi_conv_dropout, get_att_maps=True, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # print(model) # input('...') model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) # print(model) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) # input('...') else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) rev_normalize = ReverseGroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 10 # data_length = 5 if args.val_reverse: val_temp_transform = ReverseFrames(size=data_length * args.num_segments) print('using reverse val') elif args.val_shuffle: val_temp_transform = ShuffleFrames(size=data_length * args.num_segments) print('using shuffle val') else: val_temp_transform = IdentityTransform() print('using normal val') val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=img_prefix + "{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, temp_transform=val_temp_transform, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w') print('visualizing...') val_logger = os.path.join(args.result_path, 'visualize.log') validate(val_loader, model, 0, val_logger=val_logger, rev_normalize=rev_normalize) return
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'something-v1': num_class = 174 elif args.dataset == 'diving48': num_class = 48 elif args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'skating2': num_class = 63 else: raise ValueError('Unknown dataset ' + args.dataset) model_dir = os.path.join('experiments', args.dataset, args.arch, args.consensus_type + '-' + args.modality, str(args.run_iter)) args.train_list, args.val_list, args.root_path, args.rgb_prefix = datasets_video.return_dataset( args.dataset) if 'something' in args.dataset: # label transformation for left/right categories target_transforms = { 86: 87, 87: 86, 93: 94, 94: 93, 166: 167, 167: 166 } print('Target transformation is enabled....') else: target_transforms = None if not args.resume_rgb: if os.path.exists(model_dir): print('Dir {} exists!!! it will be removed'.format(model_dir)) shutil.rmtree(model_dir) os.makedirs(model_dir) os.makedirs(os.path.join(model_dir, args.root_log)) if args.modality == 'RGB': data_length = 1 elif args.modality in ['flow', 'RGBDiff']: data_length = 5 # data_length = 1 if args.resume_rgb: if args.modality == 'RGB': if 'gst' in args.arch: model = TemporalModel(num_class, args.num_segments, model='GST', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'stm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='STM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tmp' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TMP', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tsm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TSM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'ori' in args.arch: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'I3D' in args.arch: print("!!!!!!!!!!!!!!!!!!!!!!!\n\n") model = TemporalModel(num_class, args.num_segments, model='I3D', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) else: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) if os.path.isfile(args.resume_rgb): print(("=> loading checkpoint '{}'".format(args.resume_rgb))) checkpoint = torch.load(args.resume_rgb) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] original_checkpoint = checkpoint['state_dict'] print(("(epoch {} ) best_prec1 : {} ".format( checkpoint['epoch'], best_prec1))) original_checkpoint = { k[7:]: v for k, v in original_checkpoint.items() } #model_dict = i3d_model.state_dict() #model_dict.update(pretrained_dict) model.load_state_dict(original_checkpoint) print( ("=> loaded checkpoint '{}' (epoch {} ) best_prec1 : {} ". format(args.resume_rgb, checkpoint['epoch'], best_prec1))) else: raise ValueError("=> no checkpoint found at '{}'".format( args.resume_rgb)) else: if args.modality == 'flow': if 'I3D' in args.arch: model = TemporalModel(num_class, args.num_segments, model='I3D', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi, modality='flow', new_length=data_length) elif args.modality == 'RGB': if 'gst' in args.arch: model = TemporalModel(num_class, args.num_segments, model='GST', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'stm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='STM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tmp' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TMP', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'tsm' in args.arch: model = TemporalModel(num_class, args.num_segments, model='TSM', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'ori' in args.arch: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) elif 'I3D' in args.arch: model = TemporalModel(num_class, args.num_segments, model='I3D', backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) else: model = TemporalModel(num_class, args.num_segments, model='ORI', backbone=args.arch + '_ori', alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms, resi=args.resi) cudnn.benchmark = True writer = SummaryWriter(model_dir) # Data loading code args.store_name = '_'.join([ args.dataset, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = get_optim_policies(model) train_augmentation = get_augmentation(mode='train') val_trans = get_augmentation(mode='val') normalize = GroupNormalize(input_mean, input_std) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.dataset == 'diving48': args.root_path = args.root_path + '/train' train_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dataset=args.dataset), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print("trainloader.type = {}".format(type(train_loader))) if args.dataset == 'diving48': args.root_path = args.root_path[:-6] + '/test' val_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dataset=args.dataset), batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: log_test = open('test_not.csv', 'w') validate(val_loader, model, criterion, log_test) os.remove(log_test) return if args.lr_scheduler == 'cos_warmup': lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR( optimizer=optimizer, milestones=[args.warmup, args.epochs], warmup_iters=args.warmup, min_ratio=1e-7) elif args.lr_scheduler == 'lr_step_warmup': lr_scheduler_clr = CosineAnnealingLR.WarmupStepLR( optimizer=optimizer, milestones=[args.warmup] + [args.epochs - 30, args.epochs - 10, args.epochs], warmup_iters=args.warmup) elif args.lr_scheduler == 'lr_step': lr_scheduler_clr = torch.optim.lr_scheduler.MultiStepLR( optimizer, args.lr_steps, 0.1) if args.resume_rgb: for epoch in range(0, args.start_epoch): optimizer.step() lr_scheduler_clr.step() log_training = open( os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name), 'a') for epoch in range(args.start_epoch, args.epochs): writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1) train(train_loader, model, criterion, optimizer, epoch, log_training, writer=writer) lr_scheduler_clr.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, log_training, writer=writer, epoch=epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, is_best, model_dir) print('best_prec1: {}'.format(best_prec1)) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, False, model_dir)
def main(): global args, best_prec1 best_prec1 = 0 args = parser.parse_args() network_config = Foo( encoder=args.encoder, decoder=args.decoder, fc_dim=args.fc_dim, output_size=args.label_resolution, num_views=args.n_views, num_class=args.num_class, transform_type=args.transform_type, ) train_dataset = OVMDataset(args.data_root, args.train_list, transform=torchvision.transforms.Compose([ Stack(roll=True), ToTorchFormatTensor(div=True), GroupNormalize(mean_rgb, std_rgb) ]), num_views=network_config.num_views, input_size=args.input_resolution, label_size=args.label_resolution, use_mask=args.use_mask, use_depth=args.use_depth) val_dataset = OVMDataset(args.data_root, args.eval_list, transform=torchvision.transforms.Compose([ Stack(roll=True), ToTorchFormatTensor(div=True), GroupNormalize(mean_rgb, std_rgb) ]), num_views=network_config.num_views, input_size=args.input_resolution, label_size=args.label_resolution, use_mask=args.use_mask, use_depth=args.use_depth) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) mapper = VPNModel(network_config) mapper = nn.DataParallel(mapper.cuda()) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] mapper.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) criterion = nn.NLLLoss(weight=None, size_average=True) optimizer = optim.Adam(mapper.parameters(), lr=args.start_lr, betas=(0.95, 0.999)) if not os.path.isdir(args.log_root): os.mkdir(args.log_root) log_train = open(os.path.join(args.log_root, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) train(train_loader, mapper, criterion, optimizer, epoch, log_train) if (epoch + 1) % args.ckpt_freq == 0 or epoch == args.epochs - 1: prec1 = eval(val_loader, mapper, criterion, log_train, epoch) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': network_config.encoder, 'state_dict': mapper.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'STSNN', args.dataset, args.modality, args.arch, 'group%d' % args.num_segments, '%df1c' % args.num_motion ]) print('storing name: ' + args.store_name) model = STSNN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, num_motion=args.num_motion, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) print(model) cudnn.benchmark = True # Data loading code if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1, train_list, experiment_dir, best_loss args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'epic': num_class = (125, 352) else: raise ValueError('Unknown dataset ' + args.dataset) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = TBN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, midfusion=args.midfusion) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std data_length = model.new_length # policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # Resume training from a checkpoint if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] state_dict_new = OrderedDict() for k, v in checkpoint['state_dict'].items(): state_dict_new[k.split('.', 1)[1]] = v model.load_state_dict(state_dict_new) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) # Load pretrained weights for each stream if args.pretrained_flow_weights: print('Initialize Flow stream from Kinetics') pretrained = os.path.join('pretrained/kinetics_tsn_flow.pth.tar') state_dict = torch.load(pretrained) for k, v in state_dict.items(): state_dict[k] = torch.squeeze(v, dim=0) base_model = getattr(model, 'flow') base_model.load_state_dict(state_dict, strict=False) # Freeze stream weights (leaves only fusion and classification trainable) if args.freeze: model.freeze_fn('modalities') # Freeze batch normalisation layers except the first if args.partialbn: model.freeze_fn('partialbn_parameters') model = torch.nn.DataParallel(model, device_ids=args.gpus).to(device) cudnn.benchmark = True # Data loading code normalize = {} for m in args.modality: if (m != 'Spec'): if (m != 'RGBDiff'): normalize[m] = GroupNormalize(input_mean[m], input_std[m]) else: normalize[m] = IdentityTransform() image_tmpl = {} train_transform = {} val_transform = {} for m in args.modality: if (m != 'Spec'): # Prepare dictionaries containing image name templates for each modality if m in ['RGB', 'RGBDiff']: image_tmpl[m] = "img_{:010d}.jpg" elif m == 'Flow': image_tmpl[m] = args.flow_prefix + "{}_{:010d}.jpg" # Prepare train/val dictionaries containing the transformations # (augmentation+normalization) # for each modality train_transform[m] = torchvision.transforms.Compose([ train_augmentation[m], Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize[m], ]) val_transform[m] = torchvision.transforms.Compose([ GroupScale(int(scale_size[m])), GroupCenterCrop(crop_size[m]), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize[m], ]) else: # Prepare train/val dictionaries containing the transformations # (augmentation+normalization) # for each modality train_transform[m] = torchvision.transforms.Compose([ Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=False), ]) val_transform[m] = torchvision.transforms.Compose([ Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=False), ]) if args.train_list is None: # If train_list is not provided, we train on the default # dataset which is all the training set train_loader = torch.utils.data.DataLoader(TBNDataSet( args.dataset, training_labels(), data_length, args.modality, image_tmpl, visual_path=args.visual_path, audio_path=args.audio_path, num_segments=args.num_segments, transform=train_transform, fps=args.fps, resampling_rate=args.resampling_rate), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) else: train_loader = torch.utils.data.DataLoader(TBNDataSet( args.dataset, args.train_list, data_length, args.modality, image_tmpl, visual_path=args.visual_path, audio_path=args.audio_path, num_segments=args.num_segments, transform=train_transform, fps=args.fps, resampling_rate=args.resampling_rate), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.train_list is not None: # we cannot validate on part of the training set # if we use all the training set for training val_loader = torch.utils.data.DataLoader(TBNDataSet( args.dataset, args.val_list, data_length, args.modality, image_tmpl, visual_path=args.visual_path, audio_path=args.audio_path, num_segments=args.num_segments, mode='val', transform=val_transform, fps=args.fps, resampling_rate=args.resampling_rate), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() if len(args.modality) > 1: param_groups = [ { 'params': filter(lambda p: p.requires_grad, model.module.rgb.parameters()) }, { 'params': filter(lambda p: p.requires_grad, model.module.flow.parameters()), 'lr': 0.001 }, { 'params': filter(lambda p: p.requires_grad, model.module.spec.parameters()) }, { 'params': filter(lambda p: p.requires_grad, model.module.fusion_classification_net.parameters()) }, ] else: param_groups = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.SGD(param_groups, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer, args.lr_steps, gamma=0.1) if args.evaluate: validate(val_loader, model, criterion, device) return if args.save_stats: if args.dataset != 'epic': stats_dict = { 'train_loss': np.zeros((args.epochs, )), 'val_loss': np.zeros((args.epochs, )), 'train_acc': np.zeros((args.epochs, )), 'val_acc': np.zeros((args.epochs, )) } elif args.dataset == 'epic': if args.train_list is not None: stats_dict = { 'train_loss': np.zeros((args.epochs, )), 'train_verb_loss': np.zeros((args.epochs, )), 'train_noun_loss': np.zeros((args.epochs, )), 'train_acc': np.zeros((args.epochs, )), 'train_verb_acc': np.zeros((args.epochs, )), 'train_noun_acc': np.zeros((args.epochs, )), 'val_loss': np.zeros((args.epochs, )), 'val_verb_loss': np.zeros((args.epochs, )), 'val_noun_loss': np.zeros((args.epochs, )), 'val_acc': np.zeros((args.epochs, )), 'val_verb_acc': np.zeros((args.epochs, )), 'val_noun_acc': np.zeros((args.epochs, )) } else: stats_dict = { 'train_loss': np.zeros((args.epochs, )), 'train_verb_loss': np.zeros((args.epochs, )), 'train_noun_loss': np.zeros((args.epochs, )), 'train_acc': np.zeros((args.epochs, )), 'train_verb_acc': np.zeros((args.epochs, )), 'train_noun_acc': np.zeros((args.epochs, )) } for epoch in range(args.start_epoch, args.epochs): scheduler.step() # train for one epoch training_metrics = train(train_loader, model, criterion, optimizer, epoch, device) if args.save_stats: for k, v in training_metrics.items(): stats_dict[k][epoch] = v # evaluate on validation set if args.train_list is not None: if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: test_metrics = validate(val_loader, model, criterion, device) if args.save_stats: for k, v in test_metrics.items(): stats_dict[k][epoch] = v prec1 = test_metrics['val_acc'] # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) else: # No validation set save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': training_metrics['train_acc'], }, False) summaryWriter.close() if args.save_stats: save_stats_dir = os.path.join('stats', experiment_dir) if not os.path.exists(save_stats_dir): os.makedirs(save_stats_dir) with open(os.path.join(save_stats_dir, 'training_stats.npz'), 'wb') as f: np.savez(f, **stats_dict)
def main(): # do some pre_process, such as ignore warning pre_process() global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'streetdance245': num_class = 245 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) # print('Do not parallel: ', model) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # calculate flops from ptflops import get_model_complexity_info macs, params = get_model_complexity_info(model, (1, 9, 224, 224), as_strings=True, print_per_layer_stat=True, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) set_break() # # input of model is (batch_size, n_seg*c, h, w) # from torchsummary import summary # summary(model, input_size=(1, 9, 224, 224)) # set_break() # print('Parallel module features: ', model.module._modules.keys()) # print('Parallel layer4 :2: ', model.module.base_model.layer4[:2]) # print('Parallel layer4 2 conv1: ', model.module.base_model.layer4[2].conv1) # print('lists: ', len(list(model.module.base_model.children()))) # Input size here is (batch_size, n_seg*c, h, w) # with open('new_model.txt', 'w') as f: # f.write(str(model)) # f.write(str(model.module.base_model._modules.keys())) # for m in model.module.modules(): # if isinstance(m, torch.nn.Conv3d): # print(m) # set_break() # Additional long-range Module # input size here is (batch_size, N_seg, fc_input, 7, 7), fc_input here is 2048 # from torchsummary import summary # summary(model3d, input_size=(4, 3, 2048, 7, 7)) # set_break() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # set_break() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print('best_prec1:', best_prec1)
def main(): global args, best_prec1, best_prec5 args = parser.parse_args() args.store_name = '_'.join(['part_iSLR',args.train_mode,\ 'class'+str(args.num_class)]) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus create_path(args.root_model) # get model model = islr_model(args.num_class,train_mode=args.train_mode) model = torch.nn.DataParallel(model).cuda() # restore model if args.val_resume: if osp.isfile(args.val_resume): checkpoint = torch.load(args.val_resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] best_prec5 = checkpoint['best_prec5'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})\n \ best_prec1: {:.3f}\n \ best_prec5: {:.3f}" .format(args.evaluate, checkpoint['epoch'],\ best_prec1,best_prec5))) else: print(("=> no checkpoint found at '{}'".format(args.val_resume))) cudnn.benchmark = True # Data loading code scale_size = 256 crop_size = 224 input_mean = [0.485, 0.456, 0.406] input_std = [0.229, 0.224, 0.225] normalize = GroupNormalize(input_mean,input_std) val_loader = torch.utils.data.DataLoader( iSLR_Dataset(args.val_file, args=args, transform=torchvision.transforms.Compose([ GroupScale((crop_size,crop_size)), # GroupScale(int(scale_size)), # GroupCenterCrop(crop_size), Stack(roll=False), ToTorchFormatTensor(div=True), normalize, ]) ), batch_size=args.batch_size,shuffle=False, num_workers=args.workers,pin_memory=True, # collate_fn=collate ) # define loss function (criterion) criterion = torch.nn.CrossEntropyLoss().cuda() prec1,prec5 = validate(val_loader, model, criterion, 0 // args.eval_freq)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_classes = 101 elif args.dataset == 'hmdb51': num_classes = 51 elif args.dataset == 'kinetics': num_classes = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = getattr(i3d, args.arch)(modality=args.modality, num_classes=num_classes, dropout_ratio=args.dropout) crop_size = args.input_size scale_size = args.input_size * 256 // 224 input_mean = [0.485, 0.456, 0.406] input_std = [0.229, 0.224, 0.225] if args.modality == 'Flow': input_mean = [0.5] input_std = [np.mean(input_std)] train_augmentation = get_augmentation(args.modality, args.input_size) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code train_loader = torch.utils.data.DataLoader(I3DDataSet( args.root_path, args.train_list, clip_length=args.clip_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality == "RGB" else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, ToNumpyNDArray(), ToTorchFormatTensor(), GroupNormalize(input_mean, input_std), ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(I3DDataSet( args.root_path, args.val_list, clip_length=args.clip_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality == "RGB" else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), ToNumpyNDArray(), ToTorchFormatTensor(), GroupNormalize(input_mean, input_std), ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") optimizer = torch.optim.SGD(params=model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch+" Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:06d}.jpg" # Format for THUMOS14 videos # rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:04d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:04d}.jpg" else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std if _CUDA: model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # CUDA print_model(model) if not _CUDA: model = torch.nn.DataParallel(model) # CPU print("pretrained_parts: ", args.pretrained_parts) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) if _CUDA: checkpoint = torch.load(args.resume) # CUDA else: checkpoint = torch.load(args.resume, map_location='cpu') # CPU # if not checkpoint['lr']: if "lr" not in checkpoint.keys(): args.lr = input("No 'lr' attribute found in resume model, please input the 'lr' manually: ") args.lr = float(args.lr) else: args.lr = checkpoint['lr'] args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch: {}, lr: {})" .format(args.resume, checkpoint['epoch'], args.lr))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) else: print("Please specify the checkpoint to pretrained model") return cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': #input_mean = [0,0,0] #for debugging normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 end = time.time() # data_loader = torch.utils.data.DataLoader( dataset = TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix+rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')), #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')), normalize, ]), test_mode=True, window_size=_WINDOW_SIZE, window_stride=_WINDOW_STRIDE); data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) # criterion = torch.nn.CrossEntropyLoss().cuda() # predict(data_loader, model, criterion, 0) predict(dataset, model, criterion=None, iter=0) # profile_model(model) elapsed_time = time.time() - end print("STATS_TOT_WINDOWS={0}, Total prediction time={1}".format(STATS_TOT_WINDOWS, elapsed_time)) return
from __future__ import absolute_import from __future__ import division from __future__ import print_function import torch.nn as nn import torch import logging import numpy as np from contrib import adf from opts import parser FLAGS = parser.parse_args() def keep_variance(x, min_variance): return x + min_variance def finitialize_msra(modules, small=False): logging.info("Initializing MSRA") for layer in modules: if isinstance(layer, adf.Conv2d) or isinstance(layer, adf.Linear): # convolution: bias=0, weight=msra nn.init.kaiming_normal_(layer.weight) if small: layer.weight.data.mul_(0.001) if layer.bias is not None: nn.init.constant_(layer.bias, 0) def finitialize_xavier(modules, small=False): logging.info("Initializing Xavier")
def main(): global args, logger, writer, dataset_configs global best_top1_epoch, best_top5_epoch, best_top1, best_top5, best_top1_top5, best_top5_top1 dataset_configs = get_and_save_args(parser) parser.set_defaults(**dataset_configs) args = parser.parse_args() # ================== GPU setting =============== os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu """copy codes and creat dir for saving models and logs""" if not os.path.isdir(args.snapshot_pref): os.makedirs(args.snapshot_pref) logger = Prepare_logger(args) logger.info('\ncreating folder: ' + args.snapshot_pref) if not args.evaluate: writer = SummaryWriter(args.snapshot_pref) recorder = Recorder(args.snapshot_pref) recorder.writeopt(args) logger.info('\nruntime args\n\n{}\n'.format(json.dumps(vars(args), indent=4))) """prepare dataset and model""" # word2idx = json.load(open('./data/dataset/TACoS/TACoS_word2id_glove_lower.json', 'r')) # train_dataset = TACoS(args, split='train') # test_dataset = TACoS(args, split='test') word2idx = json.load(open('./data/dataset/Charades/Charades_word2id.json', 'r')) train_dataset = CharadesSTA(args, split='train') test_dataset = CharadesSTA(args, split='test') train_dataloader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collate_data, num_workers=8, pin_memory=True ) test_dataloader = DataLoader( test_dataset, batch_size=args.test_batch_size, shuffle=False, collate_fn=collate_data, num_workers=8, pin_memory=True ) vocab_size = len(word2idx) lr = args.lr n_epoch = args.n_epoch main_model = mainModel(vocab_size, args, hidden_dim=512, embed_dim=300, bidirection=True, graph_node_features=1024) if os.path.exists(args.glove_weights): logger.info("Loading glove weights") main_model.query_encoder.embedding.weight.data.copy_(torch.load(args.glove_weights)) else: logger.info("Generating glove weights") main_model.query_encoder.embedding.weight.data.copy_(glove_init(word2idx)) main_model = nn.DataParallel(main_model).cuda() if args.resume: if os.path.isfile(args.resume): logger.info(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] pretrained_dict = checkpoint['state_dict'] # only resume part of model paramete model_dict = main_model.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) main_model.load_state_dict(model_dict) # main_model.load_state_dict(checkpoint['state_dict']) logger.info(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: logger.info(("=> no checkpoint found at '{}'".format(args.resume))) if args.evaluate: topks, accuracy_topks = evaluate(main_model, test_dataloader, word2idx, False) for ind, topk in enumerate(topks): print("R@{}: {:.1f}\n".format(topk, accuracy_topks[ind] * 100)) return learned_params = None if args.is_first_stage: for name, value in main_model.named_parameters(): if 'iou_scores' in name or 'mix_fc' in name: value.requires_grad = False learned_params = filter(lambda p: p.requires_grad, main_model.parameters()) n_epoch = 10 elif args.is_second_stage: head_params = main_model.module.fcos.head.iou_scores.parameters() fc_params = main_model.module.fcos.head.mix_fc.parameters() learned_params = list(head_params) + list(fc_params) lr /= 100 elif args.is_third_stage: learned_params = main_model.parameters() lr /= 10000 optimizer = torch.optim.Adam(learned_params, lr) for epoch in range(args.start_epoch, n_epoch): train_loss = train_epoch(main_model, train_dataloader, optimizer, epoch) if (epoch + 1) % args.eval_freq == 0 or epoch == args.n_epoch - 1: val_loss, topks, accuracy_topks = validate_epoch( main_model, test_dataloader, epoch, word2idx, False ) for ind, topk in enumerate(topks): writer.add_scalar('test_result/Recall@top{}'.format(topk), accuracy_topks[ind]*100, epoch) is_best_top1 = (accuracy_topks[0]*100) > best_top1 best_top1 = max((accuracy_topks[0]*100), best_top1) if is_best_top1: best_top1_epoch = epoch best_top1_top5 = accuracy_topks[1]*100 save_checkpoint({ 'epoch': epoch + 1, 'state_dict': main_model.state_dict(), 'loss': val_loss, 'top1': accuracy_topks[0]*100, 'top5': accuracy_topks[1]*100, }, is_best_top1, epoch=epoch, top1=accuracy_topks[0]*100, top5=accuracy_topks[1]*100) is_best_top5 = (accuracy_topks[1]*100) > best_top5 best_top5= max((accuracy_topks[1]*100), best_top5) if is_best_top5: best_top5_epoch = epoch best_top5_top1= accuracy_topks[0] * 100 save_checkpoint({ 'epoch': epoch + 1, 'state_dict': main_model.state_dict(), 'loss': val_loss, 'top1': accuracy_topks[0]*100, 'top5': accuracy_topks[1]*100, }, is_best_top5, epoch=epoch, top1=accuracy_topks[0]*100, top5=accuracy_topks[1]*100) writer.add_scalar('test_result/Best_Recall@top1', best_top1, epoch) writer.add_scalar('test_result/Best_Recall@top5', best_top5, epoch) logger.info( "R@1: {:.2f}, R@5: {:.2f}, epoch: {}\n".format( accuracy_topks[0] * 100, accuracy_topks[1] * 100, epoch) ) logger.info( "Current best top1: R@1: {:.2f}, R@5: {:.2f}, epoch: {} \n".format( best_top1, best_top1_top5, best_top1_epoch) ) logger.info( "Current best top5: R@1: {:.2f}, R@5: {:.2f}, epoch: {} \n".format( best_top5_top1, best_top5, best_top5_epoch) )
def main(): global args, best_prec1 global crop_size args = parser.parse_args() num_class, train_list, val_list, args.root_path, prefix = dataset_config.return_dataset( args.dataset, args.modality) num_class = 1 if args.train_list == "": args.train_list = train_list if args.val_list == "": args.val_list = val_list full_arch_name = args.arch if args.shift: full_arch_name += '_shift{}_{}'.format(args.shift_div, args.shift_place) if args.concat != "": full_arch_name += '_concat{}'.format(args.concat) if args.temporal_pool: full_arch_name += '_tpool' args.store_name += '_'.join([ 'TSM', args.dataset, args.modality, full_arch_name, args.consensus_type, 'lr%.5f' % args.lr, 'dropout%.2f' % args.dropout, 'wd%.5f' % args.weight_decay, 'batch%d' % args.batch_size, 'segment%d' % args.num_segments, 'e{}'.format(args.epochs) ]) if args.data_fuse: args.store_name += '_fuse' if args.extra_temporal_modeling: args.store_name += '_extra' if args.tune_from is not None: args.store_name += '_finetune' if args.pretrain != 'imagenet': args.store_name += '_{}'.format(args.pretrain) if args.lr_type != 'step': args.store_name += '_{}'.format(args.lr_type) if args.dense_sample: args.store_name += '_dense' if args.non_local > 0: args.store_name += '_nl' if args.clipnums: args.store_name += "_clip{}".format(args.clipnums) if args.suffix is not None: args.store_name += '_{}'.format(args.suffix) print('storing name: ' + args.store_name) check_rootfolders() if args.prune in ['input', 'inout'] and args.tune_from: sd = torch.load(args.tune_from) sd = sd['state_dict'] sd = input_dim_L2distance(sd, args.shift_div) model = TSN( num_class, args.num_segments, args.modality, base_model=args.arch, new_length=2 if args.data_fuse else None, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, pretrain=args.pretrain, is_shift=args.shift, shift_div=args.shift_div, shift_place=args.shift_place, fc_lr5=not (args.tune_from and args.dataset in args.tune_from), temporal_pool=args.temporal_pool, non_local=args.non_local, concat=args.concat, extra_temporal_modeling=args.extra_temporal_modeling, prune_list=[prune_conv1in_list, prune_conv1out_list], is_prune=args.prune, ) print(model) #summary(model, torch.zeros((16, 24, 224, 224))) #exit(1) if args.dataset == 'ucf101': #twice sample & full resolution twice_sample = True crop_size = model.scale_size #256 x 256 else: twice_sample = False crop_size = model.crop_size #224 x 224 crop_size = 256 scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies(args.concat) #print(type(policies)) #print(policies) #exit() train_augmentation = model.get_augmentation( flip=False if 'something' in args.dataset or 'jester' in args.dataset or 'nvgesture' in args.dataset else True) model = torch.nn.DataParallel(model).cuda() if args.resume: if args.temporal_pool: # early temporal pool so that we can load the state_dict make_temporal_pool(model.module.base_model, args.num_segments) if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) if args.tune_from: print(("=> fine-tuning from '{}'".format(args.tune_from))) tune_from_list = args.tune_from.split(',') sd = torch.load(tune_from_list[0]) sd = sd['state_dict'] model_dict = model.state_dict() replace_dict = [] for k, v in sd.items(): if k not in model_dict and k.replace('.net', '') in model_dict: print('=> Load after remove .net: ', k) replace_dict.append((k, k.replace('.net', ''))) for k, v in model_dict.items(): if k not in sd and k.replace('.net', '') in sd: print('=> Load after adding .net: ', k) replace_dict.append((k.replace('.net', ''), k)) for k, v in model_dict.items(): if k not in sd and k.replace('.prune', '') in sd: print('=> Load after adding .prune: ', k) replace_dict.append((k.replace('.prune', ''), k)) if args.prune in ['input', 'inout']: sd = adjust_para_shape_prunein(sd, model_dict) if args.prune in ['output', 'inout']: sd = adjust_para_shape_pruneout(sd, model_dict) if args.concat != "" and "concat" not in tune_from_list[0]: sd = adjust_para_shape_concat(sd, model_dict) for k, k_new in replace_dict: sd[k_new] = sd.pop(k) keys1 = set(list(sd.keys())) keys2 = set(list(model_dict.keys())) set_diff = (keys1 - keys2) | (keys2 - keys1) print('#### Notice: keys that failed to load: {}'.format(set_diff)) if args.dataset not in tune_from_list[0]: # new dataset print('=> New dataset, do not load fc weights') sd = {k: v for k, v in sd.items() if 'fc' not in k} if args.modality != 'Flow' and 'Flow' in tune_from_list[0]: sd = {k: v for k, v in sd.items() if 'conv1.weight' not in k} #print(sd.keys()) #print("*"*50) #print(model_dict.keys()) for k, v in list(sd.items()): if k not in model_dict: sd.pop(k) sd.pop("module.base_model.embedding.weight") model_dict.update(sd) model.load_state_dict(model_dict) if args.temporal_pool and not args.resume: make_temporal_pool(model.module.base_model, args.num_segments) decoder = TransformerModel().cuda() if args.decoder_resume: decoder_chkpoint = torch.load(args.decoder_resume) decoder.load_state_dict(decoder_chkpoint["state_dict"]) print("decoder parameters = ", decoder.parameters()) policies.append({ "params": decoder.parameters(), "lr_mult": 5, "decay_mult": 1, "name": "Attndecoder_weight" }) cudnn.benchmark = True optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality in ['RGB']: data_length = 1 elif args.modality in ['Depth']: data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 ''' dataRoot = r"/home/share/YouCook/downloadVideo" for dirPath, dirnames, filenames in os.walk(dataRoot): for filename in filenames: print(os.path.join(dirPath,filename) +"is {}".format("exist" if os.path.isfile(os.path.join(dirPath,filename))else "NON")) train_data = torchvision.io.read_video(os.path.join(dirPath,filename),start_pts=0,end_pts=1001, ) tmp = torchvision.io.read_video_timestamps(os.path.join(dirPath,filename),) print(tmp) print(len(tmp[0])) print(train_data[0].size()) exit() exit() ''' ''' train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dense_sample=args.dense_sample, data_fuse = args.data_fuse), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) # prevent something not % n_GPU val_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dense_sample=args.dense_sample, twice_sample=twice_sample, data_fuse = args.data_fuse), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) ''' #global trainDataloader, valDataloader, train_loader, val_loader trainDataloader = YouCookDataSetRcg(args.root_path, args.train_list,train=True,inputsize=crop_size,hasPreprocess = False,\ clipnums=args.clipnums, hasWordIndex = True,) valDataloader = YouCookDataSetRcg(args.root_path, args.val_list,val=True,inputsize=crop_size,hasPreprocess = False,\ #clipnums=args.clipnums, hasWordIndex = True,) #print(trainDataloader._getMode()) #print(valDataloader._getMode()) #exit() train_loader = torch.utils.data.DataLoader(trainDataloader, #shuffle=True, ) val_loader = torch.utils.data.DataLoader(valDataloader) index2wordDict = trainDataloader.getIndex2wordDict() #print(train_loader is val_loader) #print(trainDataloader._getMode()) #print(valDataloader._getMode()) #print(trainDataloader._getMode()) #print(valDataloader._getMode()) #print(len(train_loader)) #exit() # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.NLLLoss().cuda() elif args.loss_type == "MSELoss": criterion = torch.nn.MSELoss().cuda() elif args.loss_type == "BCELoss": #print("BCELoss") criterion = torch.nn.BCELoss().cuda() elif args.loss_type == "CrossEntropyLoss": criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, args.store_name, 'log.csv'), 'w') with open(os.path.join(args.root_log, args.store_name, 'args.txt'), 'w') as f: f.write(str(args)) #print(os.path.join(args.root_log, args.store_name, 'args.txt')) #exit() tf_writer = SummaryWriter( log_dir=os.path.join(args.root_log, args.store_name)) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_type, args.lr_steps) #print("265") # train for one epoch ###### #print(trainDataloader._getMode()) #print(valDataloader._getMode()) train(train_loader, model, decoder, criterion, optimizer, epoch, log_training, tf_writer, index2wordDict) ###### #print("268") # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, decoder, criterion, epoch, log_training, tf_writer, index2wordDict=index2wordDict) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) tf_writer.add_scalar('acc/test_top1_best', best_prec1, epoch) output_best = 'Best Prec@1: %.3f\n' % (best_prec1) #print(output_best) log_training.write(output_best + '\n') log_training.flush() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1, }, is_best) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': decoder.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1, }, is_best, filename="decoder") else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1, }, False) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': decoder.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1, }, is_best, filename="decoder") #break print("test pass")
def main(): global args args = parser.parse_args() train_videofolder, val_videofolder, args.root_path, _ = return_dataset(args.dataset) num_class = 174 rgb_prefix = '' rgb_read_format = "{:05d}.jpg" model = VideoModel(num_class=num_class, modality=args.modality, num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=None) model.consensus = Identity() print("parameters", sum(p.numel() for p in model.parameters())) print(model) sys.exit(1) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True normalize = GroupNormalize(input_mean, input_std) dataset = VideoDataset(args.root_path, train_videofolder, num_segments=8, new_length=1, modality="RGB", image_tmpl=rgb_prefix+rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize ])) def normalize_output(img): img = img - img.min() img = img / img.max() return img data = dataset[0][0].unsqueeze_(0).cuda() output = model(data) #print(model) #.exit(1) # Plot some images idx = torch.randint(0, output.size(0), ()) #pred = normalize_output(output[idx, 0]) img = data[idx, 0] #fig, axarr = plt.subplots(1, 2) plt.imshow(img.cpu().detach().numpy()) #axarr[1].imshow(pred.cpu().detach().numpy()) # Visualize feature maps activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model.base_model.conv1_7x7_s2.register_forward_hook(get_activation('conv1')) data, _ = dataset[0] data.unsqueeze_(0) output = model(data.cuda()) kernels = model.base_model.conv1_7x7_s2.weight.cpu().detach() fig, axarr = plt.subplots(kernels.size(0)-40, figsize=(15,15)) for idx in range(kernels.size(0)-40): axarr[idx].imshow(np.transpose(kernels[idx].squeeze(), (1,2,0))) act = activation['conv1'].squeeze() fig, axarr = plt.subplots(act.size(0), figsize=(15,15)) for idx in range(act.size(0)): axarr[idx].imshow(np.transpose(act[idx][:3].cpu(), (1,2,0))) plt.tight_layout() plt.show()
def main(): global args, best_prec1 args = parser.parse_args() num_class, args.train_list, args.val_list, args.root_path, prefix = dataset_config.return_dataset( args.dataset, args.modality) full_arch_name = args.arch if args.shift: full_arch_name += '_shift{}_{}'.format(args.shift_div, args.shift_place) if args.temporal_pool: full_arch_name += '_tpool' args.store_name = '_'.join([ 'TSM', args.dataset, args.modality, full_arch_name, args.consensus_type, 'segment%d' % args.num_segments, 'e{}'.format(args.epochs) ]) if args.pretrain != 'imagenet': args.store_name += '_{}'.format(args.pretrain) if args.lr_type != 'step': args.store_name += '_{}'.format(args.lr_type) if args.dense_sample: args.store_name += '_dense' if args.non_local > 0: args.store_name += '_nl' if args.suffix is not None: args.store_name += '_{}'.format(args.suffix) print('storing name: ' + args.store_name) check_rootfolders() model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, pretrain=args.pretrain, is_shift=args.shift, shift_div=args.shift_div, shift_place=args.shift_place, fc_lr5=not (args.tune_from and args.dataset in args.tune_from), temporal_pool=args.temporal_pool, non_local=args.non_local) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation( flip=False if 'something' in args.dataset or 'jester' in args.dataset else True) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.resume: if args.temporal_pool: # early temporal pool so that we can load the state_dict make_temporal_pool(model.module.base_model, args.num_segments) if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) if args.tune_from: print(("=> fine-tuning from '{}'".format(args.tune_from))) sd = torch.load(args.tune_from) sd = sd['state_dict'] model_dict = model.state_dict() replace_dict = [] for k, v in sd.items(): if k not in model_dict and k.replace('.net', '') in model_dict: print('=> Load after remove .net: ', k) replace_dict.append((k, k.replace('.net', ''))) for k, v in model_dict.items(): if k not in sd and k.replace('.net', '') in sd: print('=> Load after adding .net: ', k) replace_dict.append((k.replace('.net', ''), k)) for k, k_new in replace_dict: sd[k_new] = sd.pop(k) keys1 = set(list(sd.keys())) keys2 = set(list(model_dict.keys())) set_diff = (keys1 - keys2) | (keys2 - keys1) print('#### Notice: keys that failed to load: {}'.format(set_diff)) if args.dataset not in args.tune_from: # new dataset print('=> New dataset, do not load fc weights') sd = {k: v for k, v in sd.items() if 'fc' not in k} if args.modality == 'Flow' and 'Flow' not in args.tune_from: sd = {k: v for k, v in sd.items() if 'conv1.weight' not in k} model_dict.update(sd) model.load_state_dict(model_dict) if args.temporal_pool and not args.resume: make_temporal_pool(model.module.base_model, args.num_segments) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dense_sample=args.dense_sample), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) # prevent something not % n_GPU val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), dense_sample=args.dense_sample), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, args.store_name, 'log.csv'), 'w') with open(os.path.join(args.root_log, args.store_name, 'args.txt'), 'w') as f: f.write(str(args)) tf_writer = SummaryWriter( log_dir=os.path.join(args.root_log, args.store_name)) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_type, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training, tf_writer) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch, log_training, tf_writer) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) tf_writer.add_scalar('acc/test_top1_best', best_prec1, epoch) output_best = 'Best Prec@1: %.3f\n' % (best_prec1) print(output_best) log_training.write(output_best + '\n') log_training.flush() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet("", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)