def delete(self, id): if is_authentified() != True: return {"message": "Unauthorized"}, 401 result = VideoModel.get_video_by_id(id) if result: if result and is_user_connected(result.user_id): VideoModel.delete_video_by_id(id) return 204 else: return {"message": "Forbidden"}, 403 else: return {'message': 'Not found'}, 404
def get(self): parser = reqparse.RequestParser() parser.add_argument('page', help='This field cannot be blank', required=False) parser.add_argument('perPage', help='This field cannot be blank', required=False) json = parser.parse_args() datum = VideoModel.return_all() page = json['page'] perPage = json['perPage'] if page is None: page = 1 if perPage is None: perPage = 100 results = paging(datum, int(page), int(perPage)) total_page = number_page(datum, int(perPage)) if results: return { 'message': 'OK', 'data': results, 'pager': { 'current': page, 'total': total_page } }, 200 else: return {'message': 'Not found'}, 404
def post(self): data = parser.parse_args() if is_authentified() is None: return {"message": "Unauthorized"}, 401 user_id = actual_user_id() if VideoCreate.import_data(self, request.files['source']) is None: return { 'message': 'Bad Request', 'code': 2001, 'data': 'Not a video' }, 400 try: new_video = VideoModel(name=data["name"], duration=0, user_id=user_id, source=app.config['VIDEO_FOLDER'] + data["name"], created_at=datetime.datetime.now(), view=0, enabled=True) data_user = UserModel.get_user_by_id(user_id) new_video.save_to_db() with open('../../newFront/myyoutubeapp/assets/uploads/mail', 'w') as f: f.write(data_user.email) with open('../../newFront/myyoutubeapp/assets/uploads/name', 'w') as f: f.write(data["name"]) # print('algolia') # algolia.send_data_to_algolia() return {'message': 'OK', 'data': {'video': "video"}}, 201 except: return { 'message': 'Bad Request', 'code': 2002, 'data': 'failed to save video' }, 400
def get(self, id): # parser = reqparse.RequestParser() # json = parser.parse_args() result = VideoModel.get_video_by_id(id) if result: data = { 'id': result.id, 'name': result.name, 'source': result.source, 'created_at': str(result.created_at), 'view': result.view, 'enabled': result.enabled, 'user': result.id } return {'message': 'OK', 'data': data} else: return {'message': 'Not found'}, 404
def main(): finetuning = False global args, best_prec1 args = parser.parse_args() check_rootfolders() if args.dataset == 'something-v1': num_class = 174 args.rgb_prefix = '' rgb_read_format = "{:05d}.jpg" elif args.dataset == 'diving48': num_class = 48 args.rgb_prefix = 'frames' rgb_read_format = "{:05d}.jpg" else: raise ValueError('Unknown dataset ' + args.dataset) model_dir = os.path.join('experiments', args.dataset, args.arch, args.consensus_type + '-' + args.modality, str(args.run_iter)) if not args.resume: if os.path.exists(model_dir): print('Dir {} exists!!!'.format(model_dir)) sys.exit() else: os.makedirs(model_dir) os.makedirs(os.path.join(model_dir, args.root_log)) writer = SummaryWriter(model_dir) args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset) if 'something' in args.dataset: # label transformation for left/right categories target_transforms = { 86: 87, 87: 86, 93: 94, 94: 93, 166: 167, 167: 166 } print('Target transformation is enabled....') else: target_transforms = None args.store_name = '_'.join([ args.dataset, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = VideoModel(num_class=num_class, modality=args.modality, num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=target_transforms) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=args.rgb_prefix + rgb_read_format, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR( optimizer=optimizer, milestones=[args.warmup, args.epochs], warmup_iters=args.warmup, min_ratio=1e-7) if args.resume: for epoch in range(0, args.start_epoch): lr_scheduler_clr.step() if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name), 'a') for epoch in range(args.start_epoch, args.epochs): writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1) train_prec1 = train(train_loader, model, criterion, optimizer, epoch, log_training, writer=writer) lr_scheduler_clr.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, writer=writer, epoch=epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': prec1, 'lr': optimizer.param_groups[-1]['lr'], }, is_best, model_dir) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': train_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, False, model_dir)
def main(): global args args = parser.parse_args() train_videofolder, val_videofolder, args.root_path, _ = return_dataset(args.dataset) num_class = 174 rgb_prefix = '' rgb_read_format = "{:05d}.jpg" model = VideoModel(num_class=num_class, modality=args.modality, num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=None) model.consensus = Identity() print("parameters", sum(p.numel() for p in model.parameters())) print(model) sys.exit(1) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = model.cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True normalize = GroupNormalize(input_mean, input_std) dataset = VideoDataset(args.root_path, train_videofolder, num_segments=8, new_length=1, modality="RGB", image_tmpl=rgb_prefix+rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize ])) def normalize_output(img): img = img - img.min() img = img / img.max() return img data = dataset[0][0].unsqueeze_(0).cuda() output = model(data) #print(model) #.exit(1) # Plot some images idx = torch.randint(0, output.size(0), ()) #pred = normalize_output(output[idx, 0]) img = data[idx, 0] #fig, axarr = plt.subplots(1, 2) plt.imshow(img.cpu().detach().numpy()) #axarr[1].imshow(pred.cpu().detach().numpy()) # Visualize feature maps activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook model.base_model.conv1_7x7_s2.register_forward_hook(get_activation('conv1')) data, _ = dataset[0] data.unsqueeze_(0) output = model(data.cuda()) kernels = model.base_model.conv1_7x7_s2.weight.cpu().detach() fig, axarr = plt.subplots(kernels.size(0)-40, figsize=(15,15)) for idx in range(kernels.size(0)-40): axarr[idx].imshow(np.transpose(kernels[idx].squeeze(), (1,2,0))) act = activation['conv1'].squeeze() fig, axarr = plt.subplots(act.size(0), figsize=(15,15)) for idx in range(act.size(0)): axarr[idx].imshow(np.transpose(act[idx][:3].cpu(), (1,2,0))) plt.tight_layout() plt.show()
#use_attn=args.use_attn, n_attn=args.n_attn, use_attn_frame=args.use_attn_frame, #verbose=args.verbose) net = VideoModel( num_class, args.baseline_type, args.frame_aggregation, args.modality, train_segments=args.test_segments if args.baseline_type == 'video' else 1, val_segments=args.test_segments if args.baseline_type == 'video' else 1, base_model=args.arch, add_fc=args.add_fc, fc_dim=args.fc_dim, share_params=args.share_params, dropout_i=args.dropout_i, dropout_v=args.dropout_v, use_bn=args.use_bn, partial_bn=False, n_rnn=args.n_rnn, rnn_cell=args.rnn_cell, n_directions=args.n_directions, n_ts=args.n_ts, use_attn=args.use_attn, n_attn=args.n_attn, use_attn_frame=args.use_attn_frame, verbose=args.verbose, in_channel=3, out_channel=64, window_size=window_size, num_joint=25, num_person=2) checkpoint = torch.load(args.weights)
args.train_list, args.val_list, args.root_path, prefix = dataset_video.return_dataset(args.dataset) if args.dataset == 'something-v1': num_class = 174 args.rgb_prefix = '' rgb_read_format = "{:05d}.jpg" elif args.dataset == 'diving48': num_class = 48 args.rgb_prefix = 'frames' rgb_read_format = "{:05d}.jpg" else: raise ValueError('Unknown dataset '+args.dataset) net = VideoModel(num_class=num_class, num_segments=args.test_segments, modality=args.modality, base_model=args.arch, consensus_type=args.crop_fusion_type, gsm=args.gsm) checkpoint = torch.load(args.weights) print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} net.load_state_dict(base_dict, strict=False) if args.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif args.test_crops == 10: cropping = torchvision.transforms.Compose([ GroupOverSample(net.input_size, net.scale_size)
def main(): global args global best_prec1 args = parser.parse_args() check_rootfolders() if args.dataset == 'something-v1': num_class = 174 rgb_prefix = '' rgb_read_format = "{:05d}.jpg" elif args.dataset == 'diving48': num_class = 48 rgb_prefix = 'frames' rgb_read_format = "{:05d}.jpg" else: ValueError("Unknown dataset" + args.dataset) model_dir = os.path.join('experiments', args.dataset, args.arch, args.consensus_type + '-' + args.modality, f"{args.run_iter}") if not args.resume: if os.path.exists(model_dir): print(f"Dir {model_dir} already exists!") else: os.makedirs(model_dir) os.makedirs(os.path.join(model_dir, args.root_log)) writer = SummaryWriter(model_dir) #print("Adding stuff to", model_dir) #.add_scalar("LOSS", 2, 10) writer.flush() #sys.exit(1) train_videofolder, val_videofolder, args.root_path, _ = return_dataset( args.dataset) model = VideoModel(num_class=num_class, modality=args.modality, num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=None) print("parameters", sum(p.numel() for p in model.parameters())) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict'], strict=False) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True normalize = GroupNormalize(input_mean, input_std) train_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, train_videofolder, num_segments=8, new_length=1, modality="RGB", image_tmpl=rgb_prefix + rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, val_videofolder, num_segments=8, new_length=1, modality="RGB", image_tmpl=rgb_prefix + rgb_read_format, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])) ])), batch_size=16, shuffle=True, num_workers=4, pin_memory=True) criterion = torch.nn.CrossEntropyLoss().cuda() #for group in policies: # print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( # group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR( optimizer=optimizer, milestones=[args.warmup, args.epochs], warmup_iters=args.warmup, min_ratio=1e-7) if args.resume: for epoch in range(0, args.start_epoch): lr_scheduler_clr.step() if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name), 'a') for epoch in range(args.start_epoch, args.epochs): writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1) writer.flush() train_prec1 = train(train_loader, model, criterion, optimizer, epoch, log_training, writer=writer) lr_scheduler_clr.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, writer=writer, epoch=epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': prec1, 'lr': optimizer.param_groups[-1]['lr'], }, is_best, model_dir) else: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'current_prec1': train_prec1, 'lr': optimizer.param_groups[-1]['lr'], }, False, model_dir)
def main(): parser = argparse.ArgumentParser( description="TRN testing on the full validation set") parser.add_argument('dataset', type=str, choices=['something-v1', 'diving48']) parser.add_argument('modality', type=str, choices=['RGB', 'Flow', 'RGBDiff']) parser.add_argument('weights', type=str) parser.add_argument('--arch', type=str, default="BNInception") parser.add_argument('--save_scores', default=False, action="store_true") parser.add_argument('--test_segments', type=int, default=8) parser.add_argument('--max_num', type=int, default=-1) parser.add_argument('--test_crops', type=int, default=1) parser.add_argument('--input_size', type=int, default=224) parser.add_argument('--crop_fusion_type', type=str, default='avg') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--gpus', nargs='+', type=int, default=None) parser.add_argument('--num_clips', type=int, default=1, help='Number of clips sampled from a video') parser.add_argument('--softmax', type=int, default=0) parser.add_argument('--gsm', default=False, action="store_true") args = parser.parse_args() train_videofolder, args.val_list, args.root_path, _ = return_dataset( "diving48") #train_videofolder, val_videofolder, _, _ = return_dataset("diving48") #num_class = 174 num_class = 48 net = VideoModel(num_class=174, modality="RGB", num_segments=8, base_model="BNInception", consensus_type="avg", gsm=True, target_transform=None) print("parameters", sum(p.numel() for p in net.parameters())) checkpoint = torch.load( "experiments/diving48/BNInception/avg-RGB/13/log_best.pth.tar") net.load_state_dict(checkpoint['state_dict'], strict=False) #print(("=> loaded checkpoint '{}' (epoch {})".format(args.evaluate, checkpoint['epoch']))) args.rgb_prefix = '' args.rgb_read_format = "{:05d}.jpg" args.rgb_prefix = 'frames' #rgb_read_format = "{:05d}.jpg" cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) data_loader = torch.utils.data.DataLoader(VideoDataset( args.root_path, args.val_list, num_segments=args.test_segments, new_length=1 if args.modality == "RGB" else 5, modality=args.modality, image_tmpl=args.rgb_prefix + args.rgb_read_format, test_mode=True, transform=torchvision.transforms.Compose([ cropping, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), GroupNormalize(net.input_mean, net.input_std), ]), num_clips=args.num_clips), batch_size=1, shuffle=False, num_workers=args.workers * 2, pin_memory=True) if args.gpus is not None: devices = [args.gpus[i] for i in range(args.workers)] else: devices = list(range(args.workers)) net = torch.nn.DataParallel(net.cuda()) net.eval() data_gen = enumerate(data_loader) targets = [video.label for video in data_loader.dataset.video_list] targets = torch.tensor(targets) target_idx = (targets < 50).nonzero() sampler = torch.utils.data.sampler.SubsetRandomSampler(target_idx) net.consensus = Identity() net.new_fc = Identity() net = net.cuda() embs = None targets = None net.eval() data_gen = enumerate(data_loader) #print(next(data_gen)) for i, (input, target) in data_gen: if target == 5 or target == 3 or target == 2: out = net(input.cuda()) temp = out.cpu().detach() del out if embs is None and targets is None: embs = temp targets = target else: embs = torch.cat((embs, temp)) targets = torch.cat((targets, target)) print(embs.shape) embs = tsne(embs) import seaborn as sns N = len(np.unique(targets)) palette = sns.color_palette("bright", N) sns.scatterplot(embs[:, 0], embs[:, 1], hue=targets, legend='full', palette=palette) plt.show() sys.exit(1) fig, ax = plt.subplots(1, 1, figsize=(6, 6)) cmap = plt.cm.jet # extract all colors from the .jet map cmaplist = [cmap(i) for i in range(cmap.N)] # create the new map cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) # define the bins and normalize bounds = np.linspace(0, N, N + 1) norm = mpl.colors.BoundaryNorm(bounds, cmap.N) # make the scatter scat = ax.scatter(embs[:, 0], embs[:, 1], s=10, rasterized=True, c=targets, cmap=cmap) # create the colorbar #cb = plt.colorbar(scat, spacing='proportional',ticks=bounds) #cb.set_label('Custom cbar') ax.set_title('Discrete color mappings') plt.show() sys.exit(1) thumbnails, labels = zip(*dataset) embeddings = torch.cat(thumbnails).view(len(thumbnails), -1) #embeddings, labels, thumbnails = (zip(embeddings, labels, thumbnails)) print(embeddings.shape) #embeddings = torch.stack(embeddings) embeddings = tsne(embeddings) fig, ax = plt.subplots(1, 1, figsize=(6, 6)) N = len(np.unique(labels)) cmap = plt.cm.jet # extract all colors from the .jet map cmaplist = [cmap(i) for i in range(cmap.N)] # create the new map cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) # define the bins and normalize bounds = np.linspace(0, N, N + 1) norm = mpl.colors.BoundaryNorm(bounds, cmap.N) # make the scatter scat = ax.scatter(embeddings[:, 0], embeddings[:, 1], c=labels, cmap=cmap) # create the colorbar #cb = plt.colorbar(scat, spacing='proportional',ticks=bounds) #cb.set_label('Custom cbar') ax.set_title('Discrete color mappings') plt.show()
# single class if len(num_class_str) < 1: raise Exception("Must specify a number of classes to train") else: num_class = [] for num in num_class_str: num_class.append(int(num)) criterion = torch.nn.CrossEntropyLoss().cuda() #=== Load the network ===# print(Fore.CYAN + 'preparing the model......') verb_net = VideoModel(num_class, args.baseline_type, args.frame_aggregation, args.modality, train_segments=args.test_segments if args.baseline_type == 'video' else 1, val_segments=args.test_segments if args.baseline_type == 'video' else 1, base_model=args.arch, add_fc=args.add_fc, fc_dim=args.fc_dim, share_params=args.share_params, dropout_i=args.dropout_i, dropout_v=args.dropout_v, use_bn=args.use_bn, partial_bn=False, n_rnn=args.n_rnn, rnn_cell=args.rnn_cell, n_directions=args.n_directions, n_ts=args.n_ts, use_attn=args.use_attn, n_attn=args.n_attn, use_attn_frame=args.use_attn_frame, verbose=args.verbose, before_softmax=False) verb_checkpoint = torch.load(args.weights) verb_base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(verb_checkpoint['state_dict'].items())} verb_net.load_state_dict(verb_base_dict) verb_net = torch.nn.DataParallel(verb_net.cuda()) verb_net.eval() if args.noun_weights is not None: noun_net = VideoModel(num_class, args.baseline_type, args.frame_aggregation, args.modality, train_segments=args.test_segments if args.baseline_type == 'video' else 1, val_segments=args.test_segments if args.baseline_type == 'video' else 1,
def main(): global args, best_prec1, writer args = parser.parse_args() print(Fore.GREEN + 'Baseline:', args.baseline_type) print(Fore.GREEN + 'Frame aggregation method:', args.frame_aggregation) print(Fore.GREEN + 'target data usage:', args.use_target) if args.use_target == 'none': print(Fore.GREEN + 'no Domain Adaptation') else: if args.dis_DA != 'none': print( Fore.GREEN + 'Apply the discrepancy-based Domain Adaptation approach:', args.dis_DA) if len(args.place_dis) != args.add_fc + 2: raise ValueError( Back.RED + 'len(place_dis) should be equal to add_fc + 2') if args.adv_DA != 'none': print( Fore.GREEN + 'Apply the adversarial-based Domain Adaptation approach:', args.adv_DA) if args.use_bn != 'none': print(Fore.GREEN + 'Apply the adaptive normalization approach:', args.use_bn) # determine the categories class_names = [ line.strip().split(' ', 1)[1] for line in open(args.class_file) ] num_class = len(class_names) #=== check the folder existence ===# path_exp = args.exp_path + args.modality + '/' if not os.path.isdir(path_exp): os.makedirs(path_exp) if args.tensorboard: writer = SummaryWriter(path_exp + '/tensorboard') # for tensorboardX #=== initialize the model ===# print(Fore.CYAN + 'preparing the model......') model = VideoModel( num_class, args.baseline_type, args.frame_aggregation, args.modality, train_segments=args.num_segments, val_segments=args.val_segments, base_model=args.arch, path_pretrained=args.pretrained, add_fc=args.add_fc, fc_dim=args.fc_dim, dropout_i=args.dropout_i, dropout_v=args.dropout_v, partial_bn=not args.no_partialbn, use_bn=args.use_bn if args.use_target != 'none' else 'none', ens_DA=args.ens_DA if args.use_target != 'none' else 'none', n_rnn=args.n_rnn, rnn_cell=args.rnn_cell, n_directions=args.n_directions, n_ts=args.n_ts, use_attn=args.use_attn, n_attn=args.n_attn, use_attn_frame=args.use_attn_frame, verbose=args.verbose, share_params=args.share_params) model = torch.nn.DataParallel(model, args.gpus).cuda() if args.optimizer == 'SGD': print(Fore.YELLOW + 'using SGD') optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) elif args.optimizer == 'Adam': print(Fore.YELLOW + 'using Adam') optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) else: print(Back.RED + 'optimizer not support or specified!!!') exit() #=== check point ===# start_epoch = 1 print(Fore.CYAN + 'checking the checkpoint......') if args.resume: if os.path.isfile(args.resume): checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] + 1 best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) if args.resume_hp: print("=> loaded checkpoint hyper-parameters") optimizer.load_state_dict(checkpoint['optimizer']) else: print(Back.RED + "=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True #--- open log files ---# if not args.evaluate: if args.resume: train_file = open(path_exp + 'train.log', 'a') train_short_file = open(path_exp + 'train_short.log', 'a') val_file = open(path_exp + 'val.log', 'a') val_short_file = open(path_exp + 'val_short.log', 'a') train_file.write('========== start: ' + str(start_epoch) + '\n') # separation line train_short_file.write('========== start: ' + str(start_epoch) + '\n') val_file.write('========== start: ' + str(start_epoch) + '\n') val_short_file.write('========== start: ' + str(start_epoch) + '\n') else: train_short_file = open(path_exp + 'train_short.log', 'w') val_short_file = open(path_exp + 'val_short.log', 'w') train_file = open(path_exp + 'train.log', 'w') val_file = open(path_exp + 'val.log', 'w') val_best_file = open(args.save_best_log, 'a') else: test_short_file = open(path_exp + 'test_short.log', 'w') test_file = open(path_exp + 'test.log', 'w') #=== Data loading ===# print(Fore.CYAN + 'loading data......') if args.use_opencv: print("use opencv functions") if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff', 'RGBDiff2', 'RGBDiffplus']: data_length = 5 # calculate the number of videos to load for training in each list ==> make sure the iteration # of source & target are same num_source = sum(1 for i in open(args.train_source_list)) num_target = sum(1 for i in open(args.train_target_list)) num_val = sum(1 for i in open(args.val_list)) num_iter_source = num_source / args.batch_size[0] num_iter_target = num_target / args.batch_size[1] num_max_iter = max(num_iter_source, num_iter_target) num_source_train = round( num_max_iter * args.batch_size[0]) if args.copy_list[0] == 'Y' else num_source num_target_train = round( num_max_iter * args.batch_size[1]) if args.copy_list[1] == 'Y' else num_target # calculate the weight for each class class_id_list = [ int(line.strip().split(' ')[2]) for line in open(args.train_source_list) ] class_id, class_data_counts = np.unique(np.array(class_id_list), return_counts=True) class_freq = (class_data_counts / class_data_counts.sum()).tolist() weight_source_class = torch.ones(num_class).cuda() weight_domain_loss = torch.Tensor([1, 1]).cuda() if args.weighted_class_loss == 'Y': weight_source_class = 1 / torch.Tensor(class_freq).cuda() if args.weighted_class_loss_DA == 'Y': weight_domain_loss = torch.Tensor( [1 / num_source_train, 1 / num_target_train]).cuda() # data loading (always need to load the testing data) val_segments = args.val_segments if args.val_segments > 0 else args.num_segments val_set = TSNDataSet( "", args.val_list, num_dataload=num_val, num_segments=val_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.t7" if args.modality in [ "RGB", "RGBDiff", "RGBDiff2", "RGBDiffplus" ] else args.flow_prefix + "{}_{:05d}.t7", random_shift=False, test_mode=True, ) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.batch_size[2], shuffle=False, num_workers=args.workers, pin_memory=True) if not args.evaluate: source_set = TSNDataSet( "", args.train_source_list, num_dataload=num_source_train, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.t7" if args.modality in [ "RGB", "RGBDiff", "RGBDiff2", "RGBDiffplus" ] else args.flow_prefix + "{}_{:05d}.t7", random_shift=False, test_mode=True, ) source_sampler = torch.utils.data.sampler.RandomSampler(source_set) source_loader = torch.utils.data.DataLoader( source_set, batch_size=args.batch_size[0], shuffle=False, sampler=source_sampler, num_workers=args.workers, pin_memory=True) target_set = TSNDataSet( "", args.train_target_list, num_dataload=num_target_train, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.t7" if args.modality in [ "RGB", "RGBDiff", "RGBDiff2", "RGBDiffplus" ] else args.flow_prefix + "{}_{:05d}.t7", random_shift=False, test_mode=True, ) target_sampler = torch.utils.data.sampler.RandomSampler(target_set) target_loader = torch.utils.data.DataLoader( target_set, batch_size=args.batch_size[1], shuffle=False, sampler=target_sampler, num_workers=args.workers, pin_memory=True) # --- Optimizer ---# # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss( weight=weight_source_class).cuda() criterion_domain = torch.nn.CrossEntropyLoss( weight=weight_domain_loss).cuda() else: raise ValueError("Unknown loss type") if args.evaluate: print(Fore.CYAN + 'evaluation only......') prec1 = validate(val_loader, model, criterion, num_class, 0, test_file) test_short_file.write('%.3f\n' % prec1) return #=== Training ===# start_train = time.time() print(Fore.CYAN + 'start training......') beta = args.beta gamma = args.gamma mu = args.mu loss_c_current = 999 # random large number loss_c_previous = 999 # random large number attn_source_all = torch.Tensor() attn_target_all = torch.Tensor() for epoch in range(start_epoch, args.epochs + 1): ## schedule for parameters alpha = 2 / (1 + math.exp(-1 * (epoch) / args.epochs) ) - 1 if args.alpha < 0 else args.alpha ## schedule for learning rate if args.lr_adaptive == 'loss': adjust_learning_rate_loss(optimizer, args.lr_decay, loss_c_current, loss_c_previous, '>') elif args.lr_adaptive == 'none' and epoch in args.lr_steps: adjust_learning_rate(optimizer, args.lr_decay) # train for one epoch loss_c, attn_epoch_source, attn_epoch_target = train( num_class, source_loader, target_loader, model, criterion, criterion_domain, optimizer, epoch, train_file, train_short_file, alpha, beta, gamma, mu) if args.save_attention >= 0: attn_source_all = torch.cat( (attn_source_all, attn_epoch_source.unsqueeze(0))) # save the attention values attn_target_all = torch.cat( (attn_target_all, attn_epoch_target.unsqueeze(0))) # save the attention values # update the recorded loss_c loss_c_previous = loss_c_current loss_c_current = loss_c # evaluate on validation set if epoch % args.eval_freq == 0 or epoch == args.epochs: prec1 = validate(val_loader, model, criterion, num_class, epoch, val_file) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 line_update = ' ==> updating the best accuracy' if is_best else '' line_best = "Best score {} vs current score {}".format( best_prec1, prec1) + line_update print(Fore.YELLOW + line_best) val_short_file.write('%.3f\n' % prec1) best_prec1 = max(prec1, best_prec1) if args.tensorboard: writer.add_text('Best_Accuracy', str(best_prec1), epoch) if args.save_model: save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1, 'prec1': prec1, }, is_best, path_exp) end_train = time.time() print(Fore.CYAN + 'total training time:', end_train - start_train) val_best_file.write('%.3f\n' % best_prec1) # --- write the total time to log files ---# line_time = 'total time: {:.3f} '.format(end_train - start_train) if not args.evaluate: train_file.write(line_time) train_short_file.write(line_time) val_file.write(line_time) val_short_file.write(line_time) else: test_file.write(line_time) test_short_file.write(line_time) #--- close log files ---# if not args.evaluate: train_file.close() train_short_file.close() val_file.close() val_short_file.close() else: test_file.close() test_short_file.close() if args.tensorboard: writer.close() if args.save_attention >= 0: np.savetxt('attn_source_' + str(args.save_attention) + '.log', attn_source_all.cpu().detach().numpy(), fmt="%s") np.savetxt('attn_target_' + str(args.save_attention) + '.log', attn_target_all.cpu().detach().numpy(), fmt="%s")