def init_model(num_classes, new_length, args): model = TSN(num_classes, args.num_segments, args.modality, base_model=args.arch, new_length=new_length, consensus_type=args.consensus_type, dropout=0.5, partial_bn=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_size = model.input_size input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() cropping = torchvision.transforms.Compose([ GroupScale(scale_size), GroupCenterCrop(input_size), ]) return model
for module in model.consensus.modules(): if isinstance(module, torch.nn.Linear): torch.nn.init.xavier_uniform(module.weight.data) torch.nn.init.normal(module.bias.data) print("MODEL CREATED, STARTING FINETUNING") print(model.crop_size, model.scale_size, model.input_mean, model.input_std) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() print("crop", crop_size, "scale", scale_size) policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) print(model)
def main(): global args, best_prec1, class_to_name parser.add_argument('--class_index', type=str, help='class index file') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'something': num_class = 174 else: raise ValueError('Unknown dataset ' + args.dataset) if args.dataset == 'something': img_prefix = '' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = { idx: line.strip().replace(' ', '-') for idx, line in enumerate(content) } else: img_prefix = 'image_' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \ for line in content} with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'): args.lstm_out_type = None model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, lstm_out_type=args.lstm_out_type, lstm_layers=args.lstm_layers, lstm_hidden_dims=args.lstm_hidden_dims, conv_lstm_kernel=args.conv_lstm_kernel, bi_add_clf=args.bi_add_clf, bi_out_dims=args.bi_out_dims, bi_rank=args.bi_rank, bi_att_softmax=args.bi_att_softmax, bi_filter_size=args.bi_filter_size, bi_dropout=args.bi_dropout, bi_conv_dropout=args.bi_conv_dropout, get_att_maps=True, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # print(model) # input('...') model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) # print(model) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) # input('...') else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) rev_normalize = ReverseGroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 10 # data_length = 5 if args.val_reverse: val_temp_transform = ReverseFrames(size=data_length * args.num_segments) print('using reverse val') elif args.val_shuffle: val_temp_transform = ShuffleFrames(size=data_length * args.num_segments) print('using shuffle val') else: val_temp_transform = IdentityTransform() print('using normal val') val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=img_prefix + "{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, temp_transform=val_temp_transform, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w') print('visualizing...') val_logger = os.path.join(args.result_path, 'visualize.log') validate(val_loader, model, 0, val_logger=val_logger, rev_normalize=rev_normalize) return
def main(): global args, best_prec1 num_class = 4 rgb_read_format = "{:d}.jpg" model = TSN(num_class, args.num_segments, args.pretrained_parts, 'RGB', base_model='ECO', consensus_type='identity', dropout=0.3, partial_bn=True) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() model_dict = model.state_dict() print("pretrained_parts: ", args.pretrained_parts) model_dir = args.model_path new_state_dict = torch.load(model_dir)['state_dict'] un_init_dict_keys = [ k for k in model_dict.keys() if k not in new_state_dict ] print("un_init_dict_keys: ", un_init_dict_keys) print("\n------------------------------------") for k in un_init_dict_keys: new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_() if 'weight' in k: if 'bn' in k: print("{} init as: 1".format(k)) constant_(new_state_dict[k], 1) else: print("{} init as: xavier".format(k)) xavier_uniform_(new_state_dict[k]) elif 'bias' in k: print("{} init as: 0".format(k)) constant_(new_state_dict[k], 0) print("------------------------------------") model.load_state_dict(new_state_dict) cudnn.benchmark = True # Data loading code normalize = GroupNormalize(input_mean, input_std) data_length = 1 val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality='RGB', image_tmpl=rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), normalize, ])), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) model.eval() for i, (input, target) in enumerate(val_loader): target = target.cuda() input_var = input target_var = target output = model(input_var) _, pred = output.data.topk(1, 1, True, True) print(pred, target) print('done')
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'myDataset': num_class = 12 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return f, axs = plt.subplots(4, 1, figsize=(10, 5)) if args.start_epoch == 0: train_acc = [] train_loss = [] val_acc = [] val_loss = [] epochs = [] val_epochs = [] else: train_acc = np.load("./%s/train_acc.npy" % args.snapshot_pref).tolist() train_loss = np.load("./%s/train_loss.npy" % args.snapshot_pref).tolist() val_acc = np.load("./%s/val_acc.npy" % args.snapshot_pref).tolist() val_loss = np.load("./%s/val_loss.npy" % args.snapshot_pref).tolist() epochs = np.load("./%s/epochs.npy" % args.snapshot_pref).tolist() val_epochs = np.load("./%s/val_epochs.npy" % args.snapshot_pref).tolist() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch acc, loss = train(train_loader, model, criterion, optimizer, epoch) train_acc.append(acc) train_loss.append(loss) epochs.append(epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, v_loss = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) val_acc.append(prec1) val_loss.append(v_loss) val_epochs.append(epoch) axs[0].plot(val_epochs, val_loss, c='b', marker='.', label='val_loss') axs[1].plot(val_epochs, val_acc, c='r', marker='.', label='val_acc') axs[2].plot(epochs, train_loss, c='b', marker='.', label='train_loss') axs[3].plot(epochs, train_acc, c='r', marker='.', label='train_acc') plt.title('TSN_' + args.snapshot_pref) if epoch == 0: for i in range(4): axs[i].legend(loc='best') plt.pause(0.000001) if not os.path.exists(args.snapshot_pref): os.makedirs(args.snapshot_pref) plt.savefig('./%s/%s.jpg' % (args.snapshot_pref, str(epoch).zfill(5))) np.save("./%s/train_acc.npy" % args.snapshot_pref, train_acc) np.save("./%s/train_loss.npy" % args.snapshot_pref, train_loss) np.save("./%s/val_acc.npy" % args.snapshot_pref, val_acc) np.save("./%s/val_loss.npy" % args.snapshot_pref, val_loss) np.save("./%s/val_epochs.npy" % args.snapshot_pref, val_epochs) np.save("./%s/epochs.npy" % args.snapshot_pref, epochs)
def main(): global args, best_prec1 args = parser.parse_args() print("args args args") print(args) check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # optimizer = torch.optim.SGD(policies, # args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay) optimizer = torch.optim.Adam(policies, lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s_adam.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
else: print('Extracting frames using ffmpeg...') frames = extract_frames(args.video_file, args.test_segments) # ---core process :from frames-> prediction------------------------------- # Make video prediction. data = transform(frames) # torch.Size([240, 299, 299]) Jprint('frames:{} * {}'.format(len(frames), frames[0].size), ' transformed into:', data.size()) input = data.view(-1, 3, data.size(1), data.size(2)).unsqueeze(0).cuda() Jprint('input:', input.size()) #torch.Size([1, 80, 3, 299, 299]) (1, C/3, 3, H, W) #---way1 to parameters---- print('*' * 20, 'TSN:') Tools.parameter_desc(net.base_model) policy = net.get_optim_policies() #----way2 to parameters--- toal_params = 0 for p in policy: print( '-' * 20, '{} ( num: {})'.format(p['name'], len(p['params'])), '-' * 20, ) for param in p['params']: toal_params += param.size().numel() print(param.size(), param.size().numel()) print('*' * 20, 'total parameters: {:,}'.format(toal_params)) #------- gc.collect()
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) ''' consensue_type = avg base_model = resnet_101 dropout : 0.5 ''' model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) #224 crop_size = model.crop_size #256/224 scale_size = model.scale_size # for each modiltiy is different input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() #这里拥有三个augmentation #GroupMultiScaleCrop,GroupRandomHorizontalFlip #here GropMultiScaleCrop ,is a easily method for 裁剪边用固定位置的crop并最终resize 到 224 ,采用的插值方式,为双线性插值 #GroupRandomHorizontalFlip train_augmentation = model.get_augmentation() print(args.gpus) model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 #解释说这里为什么要有roll,主要还是考虑到我们所训练的是对于BGR 还是RGB train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="im{}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") #see the optim policy for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # general the lr here is 1e-3 optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #如果说这里是验证过程,如果说不是验证过程 if args.evaluate: validate(val_loader, model, criterion, 0) return viz = vis.Visualizer() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, viz) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch, viz=viz) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'test_crops': model.state_dict(), 'best_prec1': prec1, }, is_best)
def main(): logger.auto_set_dir() global args, best_prec1 import argparse parser = argparse.ArgumentParser(description="PyTorch implementation of Temporal Segment Networks") parser.add_argument('--dataset', type=str,default="something", choices=['something', 'jester', 'moments']) parser.add_argument('--modality', type=str, default="RGB", choices=['RGB', 'Flow']) parser.add_argument('--train_list', type=str, default="") parser.add_argument('--val_list', type=str, default="") parser.add_argument('--root_path', type=str, default="") parser.add_argument('--store_name', type=str, default="") # ========================= Model Configs ========================== parser.add_argument('--arch', type=str, default="BNInception") parser.add_argument('--num_segments', type=int, default=3) parser.add_argument('--consensus_type', type=str, default='avg') parser.add_argument('--k', type=int, default=3) parser.add_argument('--dropout', '--do', default=0.8, type=float, metavar='DO', help='dropout ratio (default: 0.5)') parser.add_argument('--loss_type', type=str, default="nll", choices=['nll']) parser.add_argument('--img_feature_dim', default=256, type=int, help="the feature dimension for each frame") # ========================= Learning Configs ========================== parser.add_argument('--epochs', default=120, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate') parser.add_argument('--lr_steps', default=[50, 100], type=float, nargs="+", metavar='LRSteps', help='epochs to decay learning rate by 10') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float, metavar='W', help='weight decay (default: 5e-4)') parser.add_argument('--clip-gradient', '--gd', default=20, type=float, metavar='W', help='gradient norm clipping (default: disabled)') parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true") # ========================= Monitor Configs ========================== parser.add_argument('--print-freq', '-p', default=20, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--eval-freq', '-ef', default=5, type=int, metavar='N', help='evaluation frequency (default: 5)') # ========================= Runtime Configs ========================== parser.add_argument('-j', '--workers', default=30, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument('--snapshot_pref', type=str, default="") parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--gpu', type=str, default='4') parser.add_argument('--flow_prefix', default="", type=str) parser.add_argument('--root_log', type=str, default='log') parser.add_argument('--root_model', type=str, default='model') parser.add_argument('--root_output', type=str, default='output') args = parser.parse_args() args.consensus_type = "TRN" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device_ids = [int(id) for id in args.gpu.split(',')] assert len(device_ids) >1, "TRN must run with GPU_num > 1" args.root_log = logger.get_logger_dir() args.root_model = logger.get_logger_dir() args.root_output = logger.get_logger_dir() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model)#TODO, , device_ids=[int(id) for id in args.gpu.split(',')] if torch.cuda.is_available(): model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: logger.info('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'something': num_class = 174 else: raise ValueError('Unknown dataset ' + args.dataset) if args.dataset == 'something': img_prefix = '' else: img_prefix = 'image_' with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'): args.lstm_out_type = None model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, lstm_out_type=args.lstm_out_type, lstm_layers=args.lstm_layers, lstm_hidden_dims=args.lstm_hidden_dims, conv_lstm_kernel=args.conv_lstm_kernel, bi_add_clf=args.bi_add_clf, bi_out_dims=args.bi_out_dims, bi_rank=args.bi_rank, bi_att_softmax=args.bi_att_softmax, bi_filter_size=args.bi_filter_size, bi_dropout=args.bi_dropout, bi_conv_dropout=args.bi_conv_dropout, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # print(model) # input('...') model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) # print(model) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) # input('...') else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 10 # data_length = 5 if args.train_reverse: train_temp_transform = ReverseFrames(size=data_length * args.num_segments) elif args.train_shuffle: train_temp_transform = ShuffleFrames(size=data_length * args.num_segments) else: train_temp_transform = IdentityTransform() train_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=img_prefix + "{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", temp_transform=train_temp_transform, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ]), contrastive_mode=args.contrastive_mode), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.val_reverse: val_temp_transform = ReverseFrames(size=data_length * args.num_segments) print('using reverse val') elif args.val_shuffle: val_temp_transform = ShuffleFrames(size=data_length * args.num_segments) print('using shuffle val') else: val_temp_transform = IdentityTransform() print('using normal val') val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=img_prefix + "{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, temp_transform=val_temp_transform, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': if args.contrastive_mode: criterion = ContrastiveLoss(m1=args.contras_m1, m2=args.contras_m2).cuda() val_criterion = torch.nn.CrossEntropyLoss().cuda() else: criterion = torch.nn.CrossEntropyLoss().cuda() val_criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.Adagrad(policies, # args.lr, # weight_decay=args.weight_decay) if args.evaluate: # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w') print('evaluating') val_logger = os.path.join(args.result_path, 'test.log') validate(val_loader, model, val_criterion, 0, val_logger=val_logger) # val_logger.close() return # train_logger = open(os.path.join(args.result_path, 'train.log'), 'w') # val_logger = open(os.path.join(args.result_path, 'val.log'), 'w') train_logger = os.path.join(args.result_path, 'train.log') val_logger = os.path.join(args.result_path, 'val.log') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) if args.contrastive_mode: train_contrastive(train_loader, model, criterion, optimizer, epoch, train_logger=train_logger, args=args) else: # train for one epoch train(train_loader, model, criterion, optimizer, epoch, train_logger=train_logger) # train_logger.write('\n') with open(train_logger, 'a') as f: f.write('\n') # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, val_criterion, (epoch + 1) * len(train_loader), val_logger=val_logger) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() args.dataset = "thumos" args.modality = "RGB" categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( 'thumos', args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) ############################################################################### # ALL LINES AFTER THIS REPRESENT NEW CODE WRITTEN TO TRAIN THE FEW-SHOT MODEL # ############################################################################### for i in range(10): print("TRAINING FEW-SHOT MODEL") num_fs_class = 14 # number of few shot classes categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( 'thumos-fs', args.modality) # load few-shot dataset # modify the fully connected layers to fit our new task with 14 classes fs_model = model fs_model.module.consensus.classifier = nn.Sequential( nn.ReLU(), nn.Linear(in_features=768, out_features=512, bias=True), nn.ReLU(), nn.Linear(in_features=512, out_features=num_fs_class, bias=True)).cuda() train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, fs_model, criterion, 0) return for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) args.store_name = '_'.join([ 'fs_TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) best_prec1 = 0 log_fs_training = open( os.path.join(args.root_log, '%s.csv' % "fs-logging"), 'w') for epoch in range(args.start_epoch, args.epochs): torch.cuda.empty_cache() adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, fs_model, criterion, optimizer, epoch, log_fs_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, fs_model, criterion, (epoch + 1) * len(train_loader), log_fs_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': fs_model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename='fs_checkpoint.pth.tar')
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) print("num_class: " + str(num_class)) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'Flow' or args.modality == 'RGB': data_length = 1 elif args.modality in ['RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print("Creating val_loader:") print("args.root_path: " + str(args.root_path)) print("args.val_list: " + str(args.val_list)) print("args.num_segments: " + str(args.num_segments)) print("data_length: " + str(data_length)) print("modality: " + str(args.modality)) print("prefix: " + str(prefix)) print("scale_size: " + str(int(scale_size))) print("crop_size: " + str(crop_size)) print("args.arch: " + str(args.arch)) print("args.batch_size: " + str(args.batch_size)) print("args.workers: " + str(args.workers)) print("") val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return git_log_output = subprocess.run( [ 'git', 'log', '-n1', '--pretty=format:commit: %h%nauthor: %an%n%s%n%b' ], stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n') git_diff_output = subprocess.run( ['git', 'diff'], stdout=subprocess.PIPE).stdout.decode('utf-8') if args.exp_name == '': exp_name_match = re.match(r'experiment: *(.+)', git_log_output[2]) if exp_name_match is None: print( 'Experiment name required:\n' ' current commit subject does not specify an experiment, and\n' ' --experiment_name was not specified') sys.exit(0) args.exp_name = exp_name_match.group(1) print(f'experiment name: {args.exp_name}') time = str(datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) exp_dir_path = os.path.join(args.root_log, args.exp_name, time) log_file_path = os.path.join(exp_dir_path, f'{args.store_name}.csv') print("log_file_path:") print(log_file_path) os.makedirs(exp_dir_path) log_training = open(log_file_path, 'w') # store information about git status git_info_path = os.path.join(exp_dir_path, 'experiment_info.txt') with open(git_info_path, 'w') as f: f.write('\n'.join(git_log_output)) f.write('\n\n' + ('=' * 80) + '\n') f.write(git_diff_output) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, time)
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, "RGB", base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) model2 = TSN(num_class, args.num_segments, "Flow", base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model2 = torch.nn.DataParallel(model2, device_ids=args.gpus).cuda() if args.resume_rgb: if os.path.isfile(args.resume_rgb): print(("=> loading checkpoint '{}'".format(args.resume_rgb))) checkpoint = torch.load(args.resume_rgb) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}'".format(args.resume_rgb))) else: print(("=> no checkpoint found at '{}'".format(args.resume_rgb))) if args.resume_flow: if os.path.isfile(args.resume_flow): print(("=> loading checkpoint '{}'".format(args.resume_flow))) checkpoint = torch.load(args.resume_flow) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model2.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}'".format(args.resume_flow))) else: print(("=> no checkpoint found at '{}'".format(args.resume_flow))) cudnn.benchmark = True # Data loading code normalize = GroupNormalize(input_mean, input_std) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=1, modality="RGB", image_tmpl="frame{:06d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) val_loader2 = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list2, num_segments=args.num_segments, new_length=5, modality="Flow", image_tmpl="frame{:06d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) validate(val_loader, val_loader2, model, model2, criterion)
def main(): global args, best_prec1, num_train_dataset, num_val_dataset, writer args = parser.parse_args() # if args.no_cudnn: # torch.backends.cudnn.benchmark = False # print (torch.backends.cudnn.benchmark) # asdf _fill_in_None_args() _join_result_path() check_rootfolders() with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality, args.root_path, args.file_type) # print(categories, args.train_list, args.val_list, args.root_path, prefix) num_class = len(categories) args.store_name = '_'.join([args.consensus_type, args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments, \ 'key%d'%args.key_dim, 'value%d'%args.value_dim, 'query%d'%args.query_dim, 'queryUpdateby%s'%args.query_update_method,\ 'NoSoftmax%s'%args.no_softmax_on_p, 'hopMethod%s'%args.hop_method]) print('storing name: ' + args.store_name) model = TSN( num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, key_dim=args.key_dim, value_dim=args.value_dim, query_dim=args.query_dim, query_update_method=args.query_update_method, partial_bn=not args.no_partialbn, freezeBN_Eval=args.freezeBN_Eval, freezeBN_Require_Grad_True=args.freezeBN_Require_Grad_True, num_hop=args.hop, hop_method=args.hop_method, num_CNNs=args.num_CNNs, no_softmax_on_p=args.no_softmax_on_p, freezeBackbone=args.freezeBackbone, CustomPolicy=args.CustomPolicy, sorting=args.sorting, MultiStageLoss=args.MultiStageLoss, MultiStageLoss_MLP=args.MultiStageLoss_MLP, how_to_get_query=args.how_to_get_query, only_query=args.only_query, CC=args.CC, channel=args.channel, memory_dim=args.memory_dim, image_resolution=args.image_resolution, how_many_objects=args.how_many_objects, Each_Embedding=args.Each_Embedding, Curriculum=args.Curriculum, Curriculum_dim=args.Curriculum_dim, lr_steps=args.lr_steps, ) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) # asdf cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_data = TSNDataSet( args.root_path, args.train_list, args.file_type, num_segments=args.num_segments, MoreAug_Rotation=args.MoreAug_Rotation, MoreAug_ColorJitter=args.MoreAug_ColorJitter, new_length=data_length, modality=args.modality, image_tmpl=prefix, phase='train', transform1=torchvision.transforms.Compose([ train_augmentation, # GroupMultiScaleCrop[1, .875, .75, .66] AND GroupRandomHorizontalFlip ]), transform2=torchvision.transforms.Compose([ Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, # GroupNormalize ]), image_resolution=args.image_resolution) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False, drop_last=True) val_data = TSNDataSet( args.root_path, args.val_list, args.file_type, num_segments=args.num_segments, MoreAug_Rotation=args.MoreAug_Rotation, MoreAug_ColorJitter=args.MoreAug_ColorJitter, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, phase='test', transform1=torchvision.transforms.Compose( [GroupScale(int(scale_size)), GroupCenterCrop(crop_size)]), transform2=torchvision.transforms.Compose([ Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ]), image_resolution=args.image_resolution) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, drop_last=True) num_train_dataset = len(train_data) num_val_dataset = len(val_data) # print (num_train_dataset, num_val_dataset) # print (len(train_loader), len(val_loader)) # asdf # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss(reduce=False).cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) if args.optimizer == 'sgd': optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(policies, lr=args.lr, weight_decay=args.weight_decay) # optimizer = torch.optim.SGD(policies, # args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay) if args.evaluate: json_file_path = os.path.join( args.result_path, 'results_epoch%d.json' % args.evaluation_epoch) validate(val_loader, model, criterion, 0, json_file=json_file_path, idx2class=categories, epoch=args.evaluation_epoch) return writer = SummaryWriter(args.result_path) log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'a') # print (count_parameters(model)) # asdf for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: json_file_path = os.path.join(args.result_path, 'results_epoch%d.json' % (epoch + 1)) # prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log=log_training, json_file=json_file_path, idx2class=categories) prec1 = validate(val_loader, model, criterion, (epoch + 1) * num_train_dataset, log=log_training, json_file=json_file_path, idx2class=categories, epoch=epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) log_training.close() writer.close()
def main(): global args, best_prec1 args = parser.parse_args() if not os.path.exists(args.record_path + args.modality.lower()): os.mkdir(args.record_path + args.modality.lower()) num_class = 2 model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_set = TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])) train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_set = TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1, pred_dict = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if is_best: with open( args.record_path + args.modality.lower() + '/' + args.snapshot_pref + args.modality.lower() + '_video_preds.pickle', 'wb') as f: pickle.dump(pred_dict, f) f.close() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.test_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code # Four types of input modalities for two-stream ConvNets (one stream spatial and the other temporal): a single RGB image, stacked RGB difference, # stacked optical flow field, and stacked warped optical flow field; the spatial stream ConvNet operates on a single RGB images, # and the temporal stream ConvNet takes a stack of consecutive optical flow fields as input. # A single RGB image usually encodes static appearance at a specific time point and lacks the contextual information about previous and next frames. # RGB difference between two consecutive frames describe the appearance change, which may correspond to the motion salient region. # Optical flow fields may not concentrate on the human action; the warped optical flow suppresses the background motion and makes motion concentrate # on the actor. if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 # Division between train and val set train_loader = torch.utils.data.DataLoader( TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack( roll=(args.arch in ['BNInception', 'InceptionV3']) ), # Batch-Normalization-Inception, InceptionV3: evolution of InceptionV2 of GoogleNet ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): check_rootfolders() global best_prec1 if args.run_for == 'train': categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) elif args.run_for == 'test': categories, args.test_list, args.root_path, prefix = datasets_video.return_data( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'STModeling', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] # best_prec1 = 0 model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) #print(model) cudnn.benchmark = True # Data loading code if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion if args.run_for == 'train': train_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.consensus_type == 'DNDF': params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): if not args.consensus_type == 'DNDF': adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) elif args.run_for == 'test': print("=> loading checkpoint '{}'".format(args.root_weights)) checkpoint = torch.load(args.root_weights) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) model.cuda().eval() print("=> loaded checkpoint ") test_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.test_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # cam = cv2.VideoCapture(0) # cam.set(cv2.CAP_PROP_FPS, 48) # for i, (input, _) in enumerate(test_loader): # with torch.no_grad(): # input_var = torch.autograd.Variable(input) # # ret, frame = cam.read() # frame_map = np.full((280, 640, 3), 0, np.uint8) # frame_map = frame # print(frame_map) # while (True): # bg = np.full((480, 1200, 3), 15, np.uint8) # bg[:480, :640] = frame # # font = cv2.FONT_HERSHEY_SIMPLEX # # cv2.rectangle(bg, (128, 48), (640 - 128, 480 - 48), (0, 255, 0), 3) # # cv2.imshow('preview', bg) # # if cv2.waitKey(1) & 0xFF == ord('q'): # break test(test_loader, model, categories)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments]) print('storing name: ' + args.store_name) model = TSN(2, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) checkpoint = torch.load('pretrain/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar', map_location='cpu') base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())} for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias', 'consensus.fc_fusion_scales.4.3.bias', 'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias', 'consensus.fc_fusion_scales.1.3.bias', 'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight', 'consensus.fc_fusion_scales.5.3.weight', 'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight', 'consensus.fc_fusion_scales.2.3.weight', 'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']: del base_dict[key] # print(base_dict) model.load_state_dict(base_dict, strict=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # val_loader = torch.utils.data.DataLoader( # TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, # new_length=data_length, # modality=args.modality, # image_tmpl=prefix, # random_shift=False, # transform=torchvision.transforms.Compose([ # GroupScale(int(scale_size)), # GroupCenterCrop(crop_size), # Stack(roll=(args.arch in ['BNInception','InceptionV3'])), # ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])), # normalize, # ])), # batch_size=args.batch_size, shuffle=False, # num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': weight = torch.ones([2]).cuda() weight[0] = 1.2 pos_weight = torch.ones([2]).cuda() #pos_weight[0] = 2 criterion = torch.nn.BCEWithLogitsLoss(weight = weight, pos_weight=pos_weight).cuda() #criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, 0.0001, momentum=args.momentum, weight_decay=args.weight_decay) # if args.evaluate: # validate(val_loader, model, criterion, 0) # return log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth.tar'.format(epoch)) torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth'.format(epoch)) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training)
def main(): global args, best_prec1, class_to_name parser.add_argument('--class_index', type=str, help='class index file') args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'something': num_class = 174 else: raise ValueError('Unknown dataset '+args.dataset) if args.dataset == 'something': img_prefix = '' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = {idx:line.strip().replace(' ', '-') for idx, line in enumerate(content)} else: img_prefix = 'image_' with open(args.class_index, 'r') as f: content = f.readlines() class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \ for line in content} with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(args), opt_file) if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'): args.lstm_out_type = None model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, lstm_out_type=args.lstm_out_type, lstm_layers=args.lstm_layers, lstm_hidden_dims=args.lstm_hidden_dims, conv_lstm_kernel=args.conv_lstm_kernel, bi_add_clf=args.bi_add_clf, bi_out_dims=args.bi_out_dims, bi_rank=args.bi_rank, bi_att_softmax=args.bi_att_softmax, bi_filter_size=args.bi_filter_size, bi_dropout=args.bi_dropout, bi_conv_dropout=args.bi_conv_dropout, get_att_maps=True, dataset=args.dataset) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() # print(model) # input('...') model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) # print(model) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) # input('...') else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) rev_normalize = ReverseGroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 10 # data_length = 5 print('training inputs...') val_logger = os.path.join(args.result_path, 'vis_feature.log') train_inputs(model, 0, val_logger=val_logger, rev_normalize=rev_normalize) return
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'movie': num_class = 21 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] #best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSetMovie( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSetMovie( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="frame_{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=int(args.batch_size / 2), shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer #if args.loss_type == 'nll': #criterion = torch.nn.CrossEntropyLoss().cuda() #else: #raise ValueError("Unknown loss type") #class_weight = torch.tensor([1] * 21).cuda().float() #pos_weight = torch.tensor([1] * 21).cuda().float() criterion = torch.nn.BCEWithLogitsLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) zero_time = time.time() best_map = 0 print('Start training...') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch start_time = time.time() trainloss = train(train_loader, model, criterion, optimizer, epoch) print('Traing loss %4f Epoch %d' % (trainloss, epoch)) if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: valloss, mAP, wAP, output_mtx = validate(val_loader, model, criterion) end_time = time.time() epoch_time = end_time - start_time total_time = end_time - zero_time print('Total time used: %s Epoch %d time uesd: %s' % (str(datetime.timedelta(seconds=int(total_time))), epoch, str(datetime.timedelta(seconds=int(epoch_time))))) print( 'Train loss: {0:.4f} val loss: {1:.4f} mAP: {2:.4f} wAP: {3:.4f}' .format(trainloss, valloss, mAP, wAP)) # evaluate on validation set is_best = mAP > best_map if mAP > best_map: best_map = mAP # checkpoint_name = "%04d_%s" % (epoch+1, "checkpoint.pth.tar") checkpoint_name = "best_checkpoint.pth.tar" save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, epoch) npy_name = str(epoch) + args.result_path np.save(npy_name, output_mtx) with open(args.record_path, 'a') as file: file.write( 'Epoch:[{0}]' 'Train loss: {1:.4f} val loss: {2:.4f} map: {3:.4f}\n'. format(epoch + 1, trainloss, valloss, mAP)) print('************ Done!... ************')
class TRN(): def __init__(self, num_segments, modality, lr =0.001, loss_type = 'nll', # cross entropy weight_decay=5e-4, #weight_decay: L2 penalty #default lr_steps =[30, 60], # epochs to decay learning rate by 10 momentum= 0.9, gpus= None, clip_gradient =20, new_length=None, base_model="resnet50", dropout=0.7, img_feature_dim=256, #The dimensionality of the features used for relational reasoning. partial_bn=True, consensus_type= 'TRN', # MTRN dataset = 'epic', batch_size= 1, workers= 2, resume = None, # pretained model (path) epochs= None, start_epoch = None, # ifprintmodel= 0, # print the model structure print_freq =1, eval_freq =1, ): self.num_segments= num_segments self.modality= modality self.base_model= base_model self.new_length= new_length self.img_feature_dim= img_feature_dim self.consensus_type= consensus_type self.dataset= dataset self.resume = resume self.epochs = epochs self.start_epoch= start_epoch self.lr= lr self.loss_type= loss_type self.weight_decay = weight_decay self.lr_steps= lr_steps self.momentum= momentum self.partial_bn= partial_bn self.dropout= dropout self.batch_size = batch_size self.workers= workers self.gpus= gpus self.eval_freq= eval_freq self.print_freq= print_freq self.num_class, self.train_list, self.val_list, self.root_path, self.prefix = datasets_video.return_dataset(self.dataset, self.modality) self.store_name = '_'.join(['TRN', self.dataset, self.modality, self.base_model, self.consensus_type, 'segment%d'% self.num_segments, 'K%d'% self.new_length]) self.best_prec1= 0 self.clip_gradient= clip_gradient self.model = TSN(self.num_class, self.num_segments, self.modality, new_length= self.new_length, base_model= self.base_model, consensus_type= self.consensus_type, dropout=self.dropout, img_feature_dim= self.img_feature_dim, partial_bn= self.partial_bn) self.crop_size = self.model.crop_size self.scale_size = self.model.scale_size self.input_mean = self.model.input_mean self.input_std = self.model.input_std self.model_policies = self.model.get_optim_policies() self.augmentation= self.model.get_augmentation() print('we have {} GPUs found'.format(torch.cuda.device_count())) self.model = torch.nn.DataParallel(self.model #, device_ids=self.gpus ).cuda() print(f''' +-------------------------------------------------------+ num_class : {self.num_class} modality : {self.modality} base_model : {self.base_model} new_length : {self.new_length} consensus_type : {self.consensus_type} img_feature_dim : {self.img_feature_dim} resume : {self.resume} epochs : {self.epochs } start_epoch : {self.start_epoch } lr : {self.lr } loss_type : {self.loss_type } weight_decay : {self.weight_decay } lr_steps : {self.lr_steps } momentum : {self.momentum } partial_bn : {self.partial_bn} clip_gradient : {self.clip_gradient } dropout : {self.dropout} batch_size : {self.batch_size} workers : {self.workers} gpus : {self.gpus } ( no use now) eval_freq : {self.eval_freq } print_freq : {self.print_freq } crop_size : {self.crop_size} scale_size : {self.scale_size} +-------------------------------------------------------+ construct a network named : {self.store_name}''') #---- checkpoint------load model ---- if self.resume: if os.path.isfile(self.resume): print(("=> loading checkpoint '{}'".format(self.resume))) checkpoint = torch.load(self.resume) self.start_epoch = checkpoint['epoch'] self.best_prec1 = checkpoint['best_prec1'] self.model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {}) (epochs={})" .format(self.resume, checkpoint['epoch'], self.epochs))) else: print(("=> no checkpoint found at '{}'".format(self.resume))) cudnn.benchmark = True # Data loading code if self.modality != 'RGBDiff': self.normalize = GroupNormalize(self.input_mean, self.input_std) else: self.normalize = IdentityTransform() #------- define loss function (criterion) and optimizer------- if self.loss_type == 'nll': self.criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") #---------=========describe parameters:========= ---------------------- print('*'*20,'TSN parameters:') Tools.parameter_desc(self.model, ifprint= ifprintmodel) #------parameter way2----- print('-'*30) for group in self.model_policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) toal_params=0 for p in self.model_policies: # print('-'*10,'{} ( num: {})'.format(p['name'],len(p['params']))) for i, param in enumerate(p['params']): toal_params+= param.size().numel() # if i< 5 : # print(param.size(), param.size().numel()) # elif i==5 : # print('...') print('*'*20, 'count from policies, total parameters: {:,}'.format(toal_params)) print('TRN initialised \n') def __call__(self, input_pickle): ''' input_list: pickle file ,like ' epic_kitchens/val_02.pkl' ''' end= time.time() DS= EpicDataSet( pickle_file= input_pickle, num_segments= self.num_segments, new_length= self.new_length, modality=self.modality, transform=torchvision.transforms.Compose([ GroupScale(int(self.scale_size)), #hw GroupCenterCrop(self.crop_size), #hw Stack(roll=(self.base_model in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(self.base_model not in ['BNInception','InceptionV3'])), self.normalize, ]), random_shift=False, test_mode= True ) test_loader = torch.utils.data.DataLoader( DS, batch_size= self.batch_size, shuffle=False, num_workers=self.workers, pin_memory=True) time1= time.time()- end print('testmode: load {} actions ({} batchs) '.format(len(DS) ,len(test_loader) )) logits= self.validate(self.model, test_loader, self.criterion)[0] print('cost time: {:.1f} mins, {:.1f} mins'.format(time1,(time.time()- end)/60)) return logits #--------- training: ------------------------------------------------------ def do_training(self, ifprint=1): # initial logging file model= self.model self.log_training = os.path.join('log/{}.txt'.format(self.store_name)) with open(self.log_training, 'w') as log: log.write('initial logfile. {} \n'.format(time.strftime(" %d.%B %H:%M:%S"))) self.logging("Start Training {}epochs (start from epoch {}) at: {}".format(self.epochs, self.start_epoch ,time.strftime(" %d.%B %H:%M:%S"))) #---------load data---------------------------------------------------- print('======batch_size=', self.batch_size ) transform_train= torchvision.transforms.Compose([ self.augmentation, # GroupMultiScaleCrop+ GroupRandomHorizontalFlip Stack(roll=(self.base_model in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(self.base_model not in ['BNInception','InceptionV3'])), self.normalize, ]) train_loader = torch.utils.data.DataLoader( # TSNDataSet(self.root_path, self.train_list, num_segments=self.num_segments, # new_length=self.new_length,modality=self.modality,image_tmpl=self.prefix, # transform=transform_train, # fromEpic= 1 # ), EpicDataSet( pickle_file= self.train_list, num_segments= self.num_segments, new_length= self.new_length, modality=self.modality, transform= transform_train, random_shift=True, test_mode= False ), batch_size=self.batch_size, shuffle=True, num_workers=self.workers, pin_memory=True) transform_val= torchvision.transforms.Compose([ GroupScale(int(self.scale_size)), #hw GroupCenterCrop(self.crop_size), #hw Stack(roll=(self.base_model in ['BNInception','InceptionV3'])), ToTorchFormatTensor(div=(self.base_model not in ['BNInception','InceptionV3'])), self.normalize, ]) val_loader = torch.utils.data.DataLoader( # TSNDataSet(self.root_path, self.val_list, num_segments=self.num_segments, # new_length= self.new_length,modality=self.modality,image_tmpl=self.prefix, # random_shift=False, # transform= transform_val, # fromEpic= 1 # ), EpicDataSet( pickle_file= self.val_list, num_segments= self.num_segments, new_length= self.new_length, modality=self.modality, transform= transform_val, random_shift=False, test_mode= True ), batch_size=self.batch_size, shuffle=False, num_workers=self.workers, pin_memory=True) optimizer = torch.optim.SGD(self.model_policies, #---params self.lr, momentum= self.momentum, weight_decay=self.weight_decay) val_monitor=[] end= time.time() for epoch in range(self.start_epoch, self.epochs): print('\n','-'*10,'epoch:{}, lr {}'.format(epoch, self.lr),'-'*10, time.strftime(" %d.%B %H:%M:%S")) self.adjust_learning_rate(optimizer, epoch, self.lr_steps) # train for one epoch tr_loss= self.train(train_loader, model, self.criterion, optimizer, epoch, self.log_training, ifprint) #----------------validation ---------------------------- # evaluate on validation set val= ((epoch- self.start_epoch +1) % self.eval_freq == 0 or epoch== self.epochs- 1 ) if (val): print('-'*5,'begin evaluating.. ') val_prec1, val_prec5, val_loss = self.validate( model, val_loader, self.criterion, epoch, self.log_training) val_monitor.append((epoch, (time.time()-end)/60, tr_loss.item(), val_loss.item(), val_prec1.item(), val_prec5.item()))# get single item from tensor is_best = val_prec1 > self.best_prec1 self.best_prec1 = max(val_prec1, self.best_prec1) #---- checkpoint------save-------- print('-'*5,'save checkpoint.. ') self.save_checkpoint({ 'epoch': epoch + 1, 'arch': self.base_model, 'state_dict': model.state_dict(), 'best_prec1': self.best_prec1, }, is_best) print('-'*30, '\n') #--------------log loss-------------------------------- print('*'*10, 'training completed!') print('Best Prec@1: %.3f'%(self.best_prec1)) df= pd.DataFrame(val_monitor, columns=['epoch','epochtime(min)','train_loss','val_loss','val_prec1','val_prec5']) print(df) # ---------------------------------------------------------------------------------------------------------------- def train(self, train_loader, model, criterion, optimizer, epoch, outputfile, ifprint): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if not self.partial_bn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() # num_segments*new_length* channel (rgb 3, flow 2)* crop_size if ifprint: print('begin train(), train_loader len:{}'.format( len(train_loader))) tr_end=time.time() for i, (input, target) in enumerate(train_loader): if i==0 and ifprint: print('input size: {} (batchsize ,num_segments*new_length* channel (rgb 3, flow 2), [crop_size])'.format(input.size())) # measure data loading time data_time.update(time.time() - tr_end) ## target = target.cuda(async=True) #async is now a reserved word in Python >= 3.7 so use non_blocking instead. target = target.cuda(non_blocking=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = self.accuracy(output.data, target, topk=(1,5)) losses.update(loss.data, input.size(0)) top1.update(prec1, input.size(0)) top5.update(prec5, input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() if self.clip_gradient is not None: total_norm = clip_grad_norm(model.parameters(), self.clip_gradient) ##torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_. if total_norm > self.clip_gradient: print("clipping gradient: {} with coef {}".format(total_norm, self.clip_gradient / total_norm)) optimizer.step() # measure elapsed time batcht0= time.time() - tr_end batch_time.update(batcht0) idx= epoch * len(train_loader) + i+1 op= 'epoch:{:^5d}, batch: {:2d} /{}, lr: {:.5f}, time(min): {:.1f}, loss: {:.4f}, prec1: {:.2f}, prec5: {:.2f}'.format( epoch, i, len(train_loader), optimizer.param_groups[-1]['lr'], batcht0/60, loss.data , prec1, prec5 ) if outputfile: self.logging(op) if (ifprint and idx % self.print_freq == 0) or (i== len(train_loader)-1) : print(op) return losses.avg def validate(self, model, val_loader, criterion, iter= None, outputfile=None): # batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode: same as model.train(mode=False) model.eval() val_end = time.time() logits=[] for i, (input, target) in enumerate(val_loader): if i==0 : print('input size: {} (batchsize ,num_segments*new_length* channel (rgb 3, flow 2), [crop_size])'.format(input.size())) #print('validating enumerate batch=', i ) target = target.cuda(non_blocking=True) input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # compute output output = model(input_var) #print('###{}: input batchs*size: {}*{} output size:{}'.format(i, len(val_loader), input.size(),output.size())) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = self.accuracy(output.data, target, topk=(1,5)) losses.update(loss.data, input.size(0)) top1.update(prec1, input.size(0)) #input.size(0)= batch_size top5.update(prec5, input.size(0)) if not outputfile:# testing logits.extend(output.tolist()) val_time= time.time() - val_end op = ( (('-'*60 +'(epoch{}), validating'.format(iter)) if outputfile else 'testing') + 'Results: time(min): {:.1f}, Loss {loss.avg:.5f}, Prec1 {top1.avg:.3f}( best:{best_p:.3f} ), Prec5 {top5.avg:.3f}' .format( val_time/60, loss=losses, top1=top1, best_p= self.best_prec1, top5=top5)) print(op) if outputfile: # validating self.logging(op) return top1.avg, top5.avg, losses.avg else: # testing return logits, top1.avg, top5.avg, losses.avg def save_checkpoint(self, state, is_best, filename='checkpoint.pth.tar'): torch.save(state, '%s/%s_checkpoint.pth.tar' % ('model', self.store_name)) if is_best: shutil.copyfile('%s/%s_checkpoint.pth.tar' % ('model', self.store_name),'%s/%s_best.pth.tar' % ('model', self.store_name)) def adjust_learning_rate(self, optimizer, epoch, lr_steps): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" decay = 0.1 ** (sum(epoch >= np.array(lr_steps))) lr = self.lr * decay if epoch in np.array(lr_steps).astype(int): text= 'learning_rate decayed({}*{}) to {}'.format(self.lr, decay, lr) print(text) self.logging(text) decay = self.weight_decay for param_group in optimizer.param_groups: param_group['lr'] = lr * param_group['lr_mult'] param_group['weight_decay'] = decay * param_group['decay_mult'] def accuracy(self, output, target, topk=(1,)): """Computes the precision@k for the specified values of k""" maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0) res.append(correct_k.mul_(100.0 / batch_size)) return res def logging(self, text): with open(self.log_training, 'a') as log: log.write(text + '\n') log.flush()
def main(): global args, best_prec1 args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch + " Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") print(args.mode) if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'somethingv2': num_class = 174 rgb_read_format = "img_{:05d}.jpg" elif args.dataset == 'NTU_RGBD': num_class = 120 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'tinykinetics': num_class = 150 rgb_read_format = "{:05d}.jpg" else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, non_local=args.non_local) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies(args.dataset) train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model_dict = model.state_dict() if args.arch == "resnet50": new_state_dict = {} #model_dict div = False roll = True elif args.arch == "resnet34": pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) div = False roll = True elif (args.arch[:3] == "TCM"): pretrained_dict = {} new_state_dict = {} #model_dict for k, v in model_dict.items(): if ('fc' not in k): new_state_dict.update({k: v}) div = True roll = False if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 1 train_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, mode=args.mode, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, img_start_idx=args.img_start_idx, transform=torchvision.transforms.Compose([ GroupScale((240, 320)), # GroupScale(int(scale_size)), train_augmentation, Stack(roll=roll), ToTorchFormatTensor(div=div), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, mode=args.mode, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, img_start_idx=args.img_start_idx, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale((240, 320)), # GroupScale((224)), # GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=roll), ToTorchFormatTensor(div=div), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) output_list = [] if args.evaluate: prec1, score_tensor = validate(val_loader, model, criterion, temperature=100) output_list.append(score_tensor) save_validation_score(output_list, filename='score.pt') print("validation score saved in {}".format('/'.join( (args.val_output_folder, 'score_inf5.pt')))) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch temperature = train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, score_tensor = validate(val_loader, model, criterion, temperature=temperature) output_list.append(score_tensor) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) output_best = 'Best Prec@1: %.3f\n' % (best_prec1) print(output_best) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) # save validation score save_validation_score(output_list) print("validation score saved in {}".format('/'.join( (args.val_output_folder, 'score.pt'))))
def main(): # main函数主要包含导入模型、数据准备、训练三个部分 global args, best_prec1 args = parser.parse_args() # 导入数据集 # UCF101数据集包含13,320个视频剪辑,其中共101类动作。HMDB51数据集是来自各种来源的大量现实视频的集合,比如:电影和网络视频,数据集包含来自51个动作分类的6,766个视频剪辑。 if args.dataset == 'ucf101': num_clsass = 101 elif args.dataset == 'hmdb51': num_clsass = 51 elif args.dataset == 'kinetics': num_clsass == 400 else: raise ValueError('Unknown dataset ' + args.dataset) # part1:模型导入 # 输入包含分 # 类的类别数:num_class; # args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3; # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等; # 采用哪种模型:args.arch,比如resnet101,BNInception等; # 不同输入snippet的融合方式:args.consensus_type,比如avg等; # dropout参数:args.dropout。 model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) # 交叉模式预训练技术:利用RGB模型初始化时间网络。 # 根据不同输入类型改变网络第一层的结构 # setattr(),getattr() crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # 修改RGB模型第一个卷积层的权重来处理光流场的输入 policies = model.get_optim_policies() # 获得数据转换函数 train_augmentation = model.get_augmentation() # 设置多GPU训练模型 model = torch.nn.DataParaller(model, device_ids=args.gpus).cuda() # 用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练,因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径 if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) # 导入已训练好的模型 checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] # 完成导入模型的参数初始化model这个网络的过程 model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # part2:数据导入 if args.modality != 'RGBDiff': # 数据预处理 # 如果是rgb or flow做数据归一化 normalize = GroupNormalize(input_mean, input_std) else: # 如果是RGBDiff数据不做处理 normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 # TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型 # PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类 # (self, root_path, list_file, # num_segments=3, new_length=1, modality='RGB', # image_tmpl='img_{:05d}.jpg', transform=None, # force_grayscale=False, random_shift=True, test_mode=False) # torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了 train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # 导入测试数据 val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # part3:训练模型 # 包括定义损失函数、优化函数、一些超参数设置等 # 定义损失函数 if args.loss_type == 'nll': # pytorch中CrossEntropyLoss是通过两个步骤计算出来的,第一步是计算log softmax,第二步是计算cross entropy(或者说是negative log likehood) # CrossEntropyLoss不需要在网络的最后一层添加softmax和log层,直接输出全连接层即可。而NLLLoss则需要在定义网络的时候在最后一层添加softmax和log层 criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") # policies是网络第一层的信息 for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # 定义优化函数 optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): # 调整学习率 adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch)
def main(): global args, best_prec1 args = Parse_args() log.l.info('Input command:\n ===========> python ' + ' '.join(sys.argv) + ' ===========>') if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'mm': num_class = 500 elif args.dataset == 'thumos14': num_class = 21 else: raise ValueError('Unknown dataset ' + args.dataset) log.l.info( '============= prepare the model and model\'s parameters =============' ) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: log.l.info( '============== train from checkpoint (finetune mode) =================' ) if os.path.isfile(args.resume): log.l.info(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) log.l.info(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: log.l.info(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True log.l.info('============== Now, loading data ... ==============\n') if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(PerFrameData( args.frames_root, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, data_gap=args.data_gap, test_mode=False, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.data_workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(PerFrameData( args.frames_root, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, data_gap=args.data_gap, test_mode=True, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.data_workers, pin_memory=True) log.l.info( '================= Now, define loss function and optimizer ==============' ) weight = torch.from_numpy(np.array([1] + [3] * (num_class - 1))) if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: log.l.info( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: log.l.info('Need val the data first...') validate(val_loader, model, criterion, 0) log.l.info( '\n\n===================> TRAIN and VAL begins <===================\n') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() # if args.dataset == 'ucf101': # num_class = 101 # elif args.dataset == 'hmdb51': # num_class = 51 # elif args.dataset == 'kinetics': # num_class = 400 # elif args.dataset == 'kaist': # num_class = 4 # elif args.dataset == 'ma': # num_class = 5 # else: # raise ValueError('Unknown dataset '+args.dataset) num_class = args.num_class model = TSN(num_class, args.num_segments * args.num_spacial_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint[ 'epoch'] ##lz: comment these lines when fine-tune (not resume) best_prec1 = checkpoint[ 'best_prec1'] ##lz: comment these lines when fine-tune (not resume) model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet( args.root_path, args.train_list, train_val_test='train', num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="-{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "_{}-{:04d}.jpg", transform=torchvision.transforms.Compose([ # GroupCrop(300,(200,50)),# tree # GroupCrop(224,(250,50)),# tree # GroupCrop(224,(1000,0)),# sky # GroupRandomCrop(256), # ImgRandomCrop(256), # GroupNRandomCrop(224, 4), # r4TSSN # GroupMbyNCrop(2, 2, 720, 1280), # 2x2TSSN # GroupMbyNCrop(4, 4, 720, 1280), # 4x4TSSN GroupMbyNRandomCrop(1, 2, 2, 720, 1280), # one out of 2x2TSSN # GroupColorJitter(brightness=0.4, contrast=0, saturation=0, hue=0), ImgColorJitter(brightness=0.4, contrast=0, saturation=0, hue=0), train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet( args.root_path, args.train_list, train_val_test='val', num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="-{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "_{}-{:04d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ # GroupCrop(300,(200,50)),# tree # GroupCrop(224,(250,50)),# tree # GroupCrop(224,(1000,0)),# sky # GroupRandomCrop(256), # ImgRandomCrop(256), # GroupNRandomCrop(224, 4), # r4TSSN GroupMbyNCrop(2, 2, 720, 1280), # 2x2TSSN # GroupMbyNCrop(4, 4, 720, 1280), # 4x4TSSN # GroupMbyNRandomCrop(1, 2, 2, 720, 1280), # one out of 2x2TSSN # GroupColorJitter(brightness=0.4, contrast=0, saturation=0, hue=0), GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda( ) # This criterion expects a class index (0 to C-1) else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 >= best_prec1 # need = to avoid lucky 100% best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): parser = options() args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'saag01': num_class = 2 else: raise ValueError('Unknown dataset ' + args.dataset) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=0.5, partial_bn=False) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_size = model.input_size input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() cropping = torchvision.transforms.Compose([ GroupScale(scale_size), GroupCenterCrop(input_size), ]) checkpoint = torch.load(args.checkpoint) start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] state_dict = checkpoint['state_dict'] # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model.load_state_dict(state_dict) test_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.test_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.img_prefix + "_{:05d}" + args.ext if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "_{}_{:05d}" + args.ext, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), GroupNormalize(input_mean, input_std), ]), custom_prefix=args.custom_prefix), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) ### Test ### test(model, test_loader, args)
def main(): global args, best_prec1 args = parser.parse_args() print("------------------------------------") print("Environment Versions:") print("- Python: {}".format(sys.version)) print("- PyTorch: {}".format(torch.__version__)) print("- TorchVison: {}".format(torchvision.__version__)) args_dict = args.__dict__ print("------------------------------------") print(args.arch + " Configurations:") for key in args_dict.keys(): print("- {}: {}".format(key, args_dict[key])) print("------------------------------------") if args.dataset == 'ucf101': num_class = 101 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 rgb_read_format = "{:05d}.jpg" elif args.dataset == 'something': num_class = 174 rgb_read_format = "{:04d}.jpg" else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std # Optimizer s also support specifying per-parameter options. # To do this, pass in an iterable of dict s. # Each of them will define a separate parameter group, # and should contain a params key, containing a list of parameters belonging to it. # Other keys should match the keyword arguments accepted by the optimizers, # and will be used as optimization options for this group. policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() model_dict = model.state_dict() print("pretrained_parts: ", args.pretrained_parts) if args.arch == "ECO": new_state_dict = init_ECO(model_dict) if args.arch == "ECOfull": new_state_dict = init_ECOfull(model_dict) elif args.arch == "C3DRes18": new_state_dict = init_C3DRes18(model_dict) un_init_dict_keys = [ k for k in model_dict.keys() if k not in new_state_dict ] print("un_init_dict_keys: ", un_init_dict_keys) print("\n------------------------------------") for k in un_init_dict_keys: new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_() if 'weight' in k: if 'bn' in k: print("{} init as: 1".format(k)) constant_(new_state_dict[k], 1) else: print("{} init as: xavier".format(k)) xavier_uniform_(new_state_dict[k]) elif 'bias' in k: print("{} init as: 0".format(k)) constant_(new_state_dict[k], 0) print("------------------------------------") model.load_state_dict(new_state_dict) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=True), ToTorchFormatTensor(div=False), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=True), ToTorchFormatTensor(div=False), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): torch.set_printoptions(precision=6) global args, best_prec1 args = parser.parse_args() #导入参数设置数据集类数量 if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 elif args.dataset == 'cad': num_class = 8 else: raise ValueError('Unknown dataset ' + args.dataset) """ #导入模型,输入包含分类的类别数: # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3; # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等; # 采用哪种模型:args.arch,比如resnet101,BNInception等; # 不同输入snippet的融合方式:args.consensus_type,比如avg等; # dropout参数:args.dropout。 """ model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() """ 接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。 args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练, 因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。 其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。 model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。 """ if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 """ 接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型, 一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。 torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装, 这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签, 而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。 对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。 """ train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=3, pin_memory=True) """ 接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。 adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率, 在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。 train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。 最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。 prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。 最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。 模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。 """ # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) ''' optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) ''' # try Adam instead. optimizer = torch.optim.Adam(policies, args.lr) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join(['STModeling', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments]) print('storing name: ' + args.store_name) model = TSN(num_class, args) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] # best_prec1 = 0 model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) print(model) cudnn.benchmark = True # Data loading code if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.consensus_type == 'DNDF': params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') history = { 'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': [] } model_details = { 'backbone': args.arch, 'transformer_arch': args.consensus_type, 'lr': args.lr, 'batch_size': args.batch_size } for epoch in range(args.start_epoch, args.epochs): if not args.consensus_type == 'DNDF': adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch acc, loss = train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, val_loss = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) history['accuracy'].append(acc) history['loss'].append(loss) history['val_accuracy'].append(prec1) history['val_loss'].append(val_loss) plot_utils.plot_statistics(history,model_details)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "UCF-Frames", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader( TSNDataSet("", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( TSNDataSet("", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)